mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-10 16:07:59 -05:00
Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
621676bd41 | ||
|
|
badb8c5d68 | ||
|
|
1300434bbe | ||
|
|
6a67893773 | ||
|
|
0cb0b49be9 | ||
|
|
8411ed1451 | ||
|
|
53f34aade5 | ||
|
|
aacec3f72f | ||
|
|
a8fa05d0e3 | ||
|
|
877018c84c | ||
|
|
91ac666e06 | ||
|
|
46e6c20440 | ||
|
|
ea71faf1fa | ||
|
|
7fd9ed1b49 | ||
|
|
2d4059c61f | ||
|
|
e4eda8938d | ||
|
|
fb707d5350 | ||
|
|
73cd4c0a99 | ||
|
|
5516320ad7 | ||
|
|
6336e74d5a | ||
|
|
a4b1eb3de9 | ||
|
|
279cdc66e0 | ||
|
|
81644fc28c | ||
|
|
17732ea013 | ||
|
|
9e057c835d | ||
|
|
f08b5bb49d |
@@ -1,6 +1,6 @@
|
||||
# Contributor's Guide
|
||||
|
||||
We welcome all contributions with open arms. At Ingonyama we take a village approach, believing it takes many hands and minds to build a ecosystem.
|
||||
We welcome all contributions with open arms. At Ingonyama we take a village approach, believing it takes many hands and minds to build an ecosystem.
|
||||
|
||||
## Contributing to ICICLE
|
||||
|
||||
@@ -14,9 +14,9 @@ We welcome all contributions with open arms. At Ingonyama we take a village appr
|
||||
When opening a [pull request](https://github.com/ingonyama-zk/icicle/pulls) please keep the following in mind.
|
||||
|
||||
- `Clear Purpose` - The pull request should solve a single issue and be clean of any unrelated changes.
|
||||
- `Clear description` - If the pull request is for a new feature describe what you built, why you added it and how its best that we test it. For bug fixes please describe the issue and the solution.
|
||||
- `Clear description` - If the pull request is for a new feature describe what you built, why you added it and how it's best that we test it. For bug fixes please describe the issue and the solution.
|
||||
- `Consistent style` - Rust and Golang code should be linted by the official linters (golang fmt and rust fmt) and maintain a proper style. For CUDA and C++ code we use [`clang-format`](https://github.com/ingonyama-zk/icicle/blob/main/.clang-format), [here](https://github.com/ingonyama-zk/icicle/blob/605c25f9d22135c54ac49683b710fe2ce06e2300/.github/workflows/main-format.yml#L46) you can see how we run it.
|
||||
- `Minimal Tests` - please add test which cover basic usage of your changes .
|
||||
- `Minimal Tests` - please add test which cover basic usage of your changes.
|
||||
|
||||
## Questions?
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ type NTTConfig[T any] struct {
|
||||
- **`areInputsOnDevice`**: Indicates if input scalars are located on the device.
|
||||
- **`areOutputsOnDevice`**: Indicates if results are stored on the device.
|
||||
- **`IsAsync`**: Controls whether the NTT operation runs asynchronously.
|
||||
- **`NttAlgorithm`**: Explicitly select the NTT algorithm. ECNTT supports running on `Radix2` algoruithm.
|
||||
- **`NttAlgorithm`**: Explicitly select the NTT algorithm. ECNTT supports running on `Radix2` algorithm.
|
||||
|
||||
### Default Configuration
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ func main() {
|
||||
input := createHostSliceFromHexString("1725b6")
|
||||
outHost256 := make(core.HostSlice[uint8], 32)
|
||||
|
||||
cfg := keccak.GetDefaultKeccakConfig()
|
||||
cfg := keccak.GetDefaultHashConfig()
|
||||
e := keccak.Keccak256(input, int32(input.Len()), 1, outHost256, &cfg)
|
||||
if e.CudaErrorCode != cr.CudaSuccess {
|
||||
panic("Keccak256 hashing failed")
|
||||
@@ -49,8 +49,8 @@ func main() {
|
||||
## Keccak Methods
|
||||
|
||||
```go
|
||||
func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError
|
||||
func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *KeccakConfig) core.IcicleError
|
||||
func Keccak256(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError
|
||||
func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int32, output core.HostOrDeviceSlice, config *HashConfig) core.IcicleError
|
||||
```
|
||||
|
||||
### Parameters
|
||||
@@ -59,18 +59,18 @@ func Keccak512(input core.HostOrDeviceSlice, inputBlockSize, numberOfBlocks int3
|
||||
- **`inputBlockSize`**: An integer specifying the size of the input data for a single hash.
|
||||
- **`numberOfBlocks`**: An integer specifying the number of results in the hash batch.
|
||||
- **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory.
|
||||
- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
|
||||
### Return Value
|
||||
|
||||
- **`CudaError`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation.
|
||||
|
||||
## KeccakConfig
|
||||
## HashConfig
|
||||
|
||||
The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
|
||||
```go
|
||||
type KeccakConfig struct {
|
||||
type HashConfig struct {
|
||||
Ctx cr.DeviceContext
|
||||
areInputsOnDevice bool
|
||||
areOutputsOnDevice bool
|
||||
@@ -87,8 +87,8 @@ type KeccakConfig struct {
|
||||
|
||||
### Default Configuration
|
||||
|
||||
Use `GetDefaultKeccakConfig` to obtain a default configuration, which can then be customized as needed.
|
||||
Use `GetDefaultHashConfig` to obtain a default configuration, which can then be customized as needed.
|
||||
|
||||
```go
|
||||
func GetDefaultKeccakConfig() KeccakConfig
|
||||
func GetDefaultHashConfig() HashConfig
|
||||
```
|
||||
@@ -139,7 +139,7 @@ cfg.Ctx.IsBigTriangle = true
|
||||
|
||||
Toggling between MSM modes occurs automatically based on the number of results you are expecting from the `MSM` function.
|
||||
|
||||
The number of results is interpreted from the size of `var out core.DeviceSlice`. Thus its important when allocating memory for `var out core.DeviceSlice` to make sure that you are allocating `<number of results> X <size of a single point>`.
|
||||
The number of results is interpreted from the size of `var out core.DeviceSlice`. Thus it's important when allocating memory for `var out core.DeviceSlice` to make sure that you are allocating `<number of results> X <size of a single point>`.
|
||||
|
||||
```go
|
||||
...
|
||||
@@ -168,7 +168,7 @@ import (
|
||||
)
|
||||
```
|
||||
|
||||
This package include `G2Projective` and `G2Affine` points as well as a `G2Msm` method.
|
||||
This package includes `G2Projective` and `G2Affine` points as well as a `G2Msm` method.
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
@@ -171,7 +171,7 @@ Polynomial& add_monomial_inplace(Coeff monomial_coeff, uint64_t monomial = 0);
|
||||
Polynomial& sub_monomial_inplace(Coeff monomial_coeff, uint64_t monomial = 0);
|
||||
```
|
||||
|
||||
The ability to add or subtract monomials directly and in-place is an efficient way to manipualte polynomials.
|
||||
The ability to add or subtract monomials directly and in-place is an efficient way to manipulate polynomials.
|
||||
|
||||
Example:
|
||||
|
||||
|
||||
@@ -12,11 +12,68 @@ At its core, Keccak consists of a permutation function operating on a state arra
|
||||
- **Chi:** This step applies a nonlinear mixing operation to each lane of the state array.
|
||||
- **Iota:** This step introduces a round constant to the state array.
|
||||
|
||||
## Keccak vs Sha3
|
||||
|
||||
There exists a [confusion](https://www.cybertest.com/blog/keccak-vs-sha3) between what is called `Keccak` and `Sha3`. In ICICLE we support both. `Keccak256` relates to the old hash function used in Ethereum, and `Sha3-256` relates to the modern hash function.
|
||||
|
||||
## Using Keccak
|
||||
|
||||
ICICLE Keccak supports batch hashing, which can be utilized for constructing a merkle tree.
|
||||
ICICLE Keccak supports batch hashing, which can be utilized for constructing a merkle tree or running multiple hashes in parallel.
|
||||
|
||||
### Supported Bindings
|
||||
|
||||
- [Golang](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/golang/hash/keccak)
|
||||
- [Rust](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-hash)
|
||||
- [Rust](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-hash)
|
||||
|
||||
### Example usage
|
||||
|
||||
This is an example of running 1024 Keccak-256 hashes in parallel, where input strings are of size 136 bytes:
|
||||
|
||||
```rust
|
||||
use icicle_core::hash::HashConfig;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
use icicle_hash::keccak::keccak256;
|
||||
|
||||
let config = HashConfig::default();
|
||||
let input_block_len = 136;
|
||||
let number_of_hashes = 1024;
|
||||
|
||||
let preimages = vec![1u8; number_of_hashes * input_block_len];
|
||||
let mut digests = vec![0u8; number_of_hashes * 32];
|
||||
|
||||
let preimages_slice = HostSlice::from_slice(&preimages);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
keccak256(
|
||||
preimages_slice,
|
||||
input_block_len as u32,
|
||||
number_of_hashes as u32,
|
||||
digests_slice,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
### Merkle Tree
|
||||
|
||||
You can build a keccak merkle tree using the corresponding functions:
|
||||
|
||||
```rust
|
||||
use icicle_core::tree::{merkle_tree_digests_len, TreeBuilderConfig};
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
use icicle_hash::keccak::build_keccak256_merkle_tree;
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.arity = 2;
|
||||
let height = 22;
|
||||
let input_block_len = 136;
|
||||
let leaves = vec![1u8; (1 << height) * input_block_len];
|
||||
let mut digests = vec![0u64; merkle_tree_digests_len((height + 1) as u32, 2, 1)];
|
||||
|
||||
let leaves_slice = HostSlice::from_slice(&leaves);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
build_keccak256_merkle_tree(leaves_slice, digests_slice, height, input_block_len, &config).unwrap();
|
||||
```
|
||||
|
||||
In the example above, a binary tree of height 22 is being built. Each leaf is considered to be a 136 byte long array. The leaves and digests are aligned in a flat array. You can also use keccak512 in `build_keccak512_merkle_tree` function.
|
||||
@@ -1,6 +1,6 @@
|
||||
# MSM - Multi scalar multiplication
|
||||
|
||||
MSM stands for Multi scalar multiplication, its defined as:
|
||||
MSM stands for Multi scalar multiplication, it's defined as:
|
||||
|
||||
<math xmlns="http://www.w3.org/1998/Math/MathML">
|
||||
<mi>M</mi>
|
||||
@@ -43,7 +43,7 @@ $a_0, \ldots, a_n$ - Scalars
|
||||
|
||||
$MSM(a, G) \in G$ - a single EC (elliptic curve) point
|
||||
|
||||
In words, MSM is the sum of scalar and EC point multiplications. We can see from this definition that the core operations occurring are Modular Multiplication and Elliptic curve point addition. Its obvious that multiplication can be computed in parallel and then the products summed, making MSM inherently parallelizable.
|
||||
In words, MSM is the sum of scalar and EC point multiplications. We can see from this definition that the core operations occurring are Modular Multiplication and Elliptic curve point addition. It's obvious that multiplication can be computed in parallel and then the products summed, making MSM inherently parallelizable.
|
||||
|
||||
Accelerating MSM is crucial to a ZK protocol's performance due to the [large percent of run time](https://hackmd.io/@0xMonia/SkQ6-oRz3#Hardware-acceleration-in-action) they take when generating proofs.
|
||||
|
||||
@@ -131,7 +131,7 @@ Large buckets exist in two cases:
|
||||
2. When `c` does not divide the scalar bit-size.
|
||||
|
||||
`large_bucket_factor` that is equal to 10 yields good results for most cases, but it's best to fine tune this parameter per `c` and per scalar distribution.
|
||||
The two most important parameters for performance are `c` and the `precompute_factor`. They affect the number of EC additions as well as the memory size. When the points are not known in advance we cannot use precomputation. In this case the best `c` value is usually around $log_2(msmSize) - 4$. However, in most protocols the points are known in advanced and precomputation can be used unless limited by memory. Usually it's best to use maximum precomputation (such that we end up with only a single bucket module) combined we a `c` value around $log_2(msmSize) - 1$.
|
||||
The two most important parameters for performance are `c` and the `precompute_factor`. They affect the number of EC additions as well as the memory size. When the points are not known in advance we cannot use precomputation. In this case the best `c` value is usually around $log_2(msmSize) - 4$. However, in most protocols the points are known in advance and precomputation can be used unless limited by memory. Usually it's best to use maximum precomputation (such that we end up with only a single bucket module) combined with a `c` value around $log_2(msmSize) - 1$.
|
||||
|
||||
## Memory usage estimation
|
||||
|
||||
|
||||
@@ -56,7 +56,7 @@ Choosing an algorithm is heavily dependent on your use case. For example Cooley-
|
||||
|
||||
NTT also supports two different modes `Batch NTT` and `Single NTT`
|
||||
|
||||
Deciding weather to use `batch NTT` vs `single NTT` is highly dependent on your application and use case.
|
||||
Deciding whether to use `batch NTT` vs `single NTT` is highly dependent on your application and use case.
|
||||
|
||||
#### Single NTT
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Poseidon
|
||||
|
||||
[Poseidon](https://eprint.iacr.org/2019/458.pdf) is a popular hash in the ZK ecosystem primarily because its optimized to work over large prime fields, a common setting for ZK proofs, thereby minimizing the number of multiplicative operations required.
|
||||
[Poseidon](https://eprint.iacr.org/2019/458.pdf) is a popular hash in the ZK ecosystem primarily because it's optimized to work over large prime fields, a common setting for ZK proofs, thereby minimizing the number of multiplicative operations required.
|
||||
|
||||
Poseidon has also been specifically designed to be efficient when implemented within ZK circuits, Poseidon uses far less constraints compared to other hash functions like Keccak or SHA-256 in the context of ZK circuits.
|
||||
|
||||
@@ -42,7 +42,7 @@ To generate a secure hash output, the algorithm goes through a series of "full r
|
||||
|
||||
**Linear Transformation and Round Constants:** A linear transformation is performed and round constants are added. The linear transformation in partial rounds can be designed to be less computationally intensive (this is done by using a sparse matrix) than in full rounds, further optimizing the function's efficiency.
|
||||
|
||||
The user of Poseidon can often choose how many partial or full rounds he wishes to apply; more full rounds will increase security but degrade performance. The choice and balance is highly dependent on the use case.
|
||||
The user of Poseidon can often choose how many partial or full rounds he wishes to apply; more full rounds will increase security but degrade performance. The choice and balance are highly dependent on the use case.
|
||||
|
||||
## Using Poseidon
|
||||
|
||||
@@ -53,13 +53,14 @@ So for Poseidon of arity 2 and input of size 1024 * 2, we would expect 1024 elem
|
||||
|
||||
### Supported Bindings
|
||||
|
||||
[`Go`](https://github.com/ingonyama-zk/icicle/blob/main/wrappers/golang/curves/bn254/poseidon/poseidon.go)
|
||||
[`Rust`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-core/src/poseidon)
|
||||
|
||||
### Constants
|
||||
|
||||
Poseidon is extremely customizable and using different constants will produce different hashes, security levels and performance results.
|
||||
|
||||
We support pre-calculated and optimized constants for each of the [supported curves](../core#supported-curves-and-operations).The constants can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon/constants) and are labeled clearly per curve `<curve_name>_poseidon.h`.
|
||||
We support pre-calculated and optimized constants for each of the [supported curves](../core#supported-curves-and-operations). The constants can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon/constants) and are labeled clearly per curve `<curve_name>_poseidon.h`.
|
||||
|
||||
If you wish to generate your own constants you can use our python script which can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon/constants/generate_parameters.py).
|
||||
|
||||
@@ -91,8 +92,6 @@ primitive_element = 7 # bls12-381
|
||||
# primitive_element = 15 # bw6-761
|
||||
```
|
||||
|
||||
We only support `alpha = 5` so if you want to use another alpha for S-box please reach out on discord or open a github issue.
|
||||
|
||||
### Rust API
|
||||
|
||||
This is the most basic way to use the Poseidon API.
|
||||
@@ -101,71 +100,58 @@ This is the most basic way to use the Poseidon API.
|
||||
let test_size = 1 << 10;
|
||||
let arity = 2u32;
|
||||
let ctx = get_default_device_context();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();
|
||||
let config = PoseidonConfig::default();
|
||||
let poseidon = Poseidon::load(arity, &ctx).unwrap();
|
||||
let config = HashConfig::default();
|
||||
|
||||
let inputs = vec![F::one(); test_size * arity as usize];
|
||||
let outputs = vec![F::zero(); test_size];
|
||||
let mut input_slice = HostOrDeviceSlice::on_host(inputs);
|
||||
let mut output_slice = HostOrDeviceSlice::on_host(outputs);
|
||||
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon.hash_many::<F>(
|
||||
&mut input_slice,
|
||||
&mut output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
1, // Output length
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
The `PoseidonConfig::default()` can be modified, by default the inputs and outputs are set to be on `Host` for example.
|
||||
The `HashConfig` can be modified, by default the inputs and outputs are set to be on `Host` for example.
|
||||
|
||||
```rust
|
||||
impl<'a> Default for PoseidonConfig<'a> {
|
||||
impl<'a> Default for HashConfig<'a> {
|
||||
fn default() -> Self {
|
||||
let ctx = get_default_device_context();
|
||||
Self {
|
||||
ctx,
|
||||
are_inputs_on_device: false,
|
||||
are_outputs_on_device: false,
|
||||
input_is_a_state: false,
|
||||
aligned: false,
|
||||
loop_state: false,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
In the example above `load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();` is used which will load the correct constants based on arity and curve. Its possible to [generate](#constants) your own constants and load them.
|
||||
In the example above `Poseidon::load(arity, &ctx).unwrap();` is used which will load the correct constants based on arity and curve. It's possible to [generate](#constants) your own constants and load them.
|
||||
|
||||
```rust
|
||||
let ctx = get_default_device_context();
|
||||
let cargo_manifest_dir = env!("CARGO_MANIFEST_DIR");
|
||||
let constants_file = PathBuf::from(cargo_manifest_dir)
|
||||
.join("tests")
|
||||
.join(format!("{}_constants.bin", field_prefix));
|
||||
let mut constants_buf = vec![];
|
||||
File::open(constants_file)
|
||||
.unwrap()
|
||||
.read_to_end(&mut constants_buf)
|
||||
.unwrap();
|
||||
|
||||
let mut custom_constants = vec![];
|
||||
for chunk in constants_buf.chunks(field_bytes) {
|
||||
custom_constants.push(F::from_bytes_le(chunk));
|
||||
}
|
||||
|
||||
let custom_constants = create_optimized_poseidon_constants::<F>(
|
||||
arity as u32,
|
||||
&ctx,
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
&mut custom_constants,
|
||||
)
|
||||
.unwrap();
|
||||
let custom_poseidon = Poseidon::new(
|
||||
arity, // The arity of poseidon hash. The width will be equal to arity + 1
|
||||
alpha, // The S-box power
|
||||
full_rounds_half,
|
||||
partial_rounds,
|
||||
round_constants,
|
||||
mds_matrix,
|
||||
non_sparse_matrix,
|
||||
sparse_matrices,
|
||||
domain_tag,
|
||||
ctx,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
## The Tree Builder
|
||||
@@ -175,21 +161,34 @@ The tree builder allows you to build Merkle trees using Poseidon.
|
||||
You can define both the tree's `height` and its `arity`. The tree `height` determines the number of layers in the tree, including the root and the leaf layer. The `arity` determines how many children each internal node can have.
|
||||
|
||||
```rust
|
||||
let height = 20;
|
||||
let arity = 2;
|
||||
let leaves = vec![F::one(); 1 << (height - 1)];
|
||||
let mut digests = vec![F::zero(); merkle_tree_digests_len(height, arity)];
|
||||
|
||||
let mut leaves_slice = HostOrDeviceSlice::on_host(leaves);
|
||||
|
||||
let ctx = get_default_device_context();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap()
|
||||
use icicle_bn254::tree::Bn254TreeBuilder;
|
||||
use icicle_bn254::poseidon::Poseidon;
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.keep_rows = 1;
|
||||
build_poseidon_merkle_tree::<F>(&mut leaves_slice, &mut digests, height, arity, &constants, &config).unwrap();
|
||||
let arity = 2;
|
||||
config.arity = arity as u32;
|
||||
let input_block_len = arity;
|
||||
let leaves = vec![F::one(); (1 << height) * arity];
|
||||
let mut digests = vec![F::zero(); merkle_tree_digests_len((height + 1) as u32, arity as u32, 1)];
|
||||
|
||||
println!("Root: {:?}", digests[0..1][0]);
|
||||
let leaves_slice = HostSlice::from_slice(&leaves);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
let ctx = device_context::DeviceContext::default();
|
||||
let hash = Poseidon::load(2, &ctx).unwrap();
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.keep_rows = 5;
|
||||
Bn254TreeBuilder::build_merkle_tree(
|
||||
leaves_slice,
|
||||
digests_slice,
|
||||
height,
|
||||
input_block_len,
|
||||
&hash,
|
||||
&hash,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
Similar to Poseidon, you can also configure the Tree Builder `TreeBuilderConfig::default()`
|
||||
|
||||
88
docs/docs/icicle/primitives/poseidon2.md
Normal file
88
docs/docs/icicle/primitives/poseidon2.md
Normal file
@@ -0,0 +1,88 @@
|
||||
# Poseidon2
|
||||
|
||||
[Poseidon2](https://eprint.iacr.org/2023/323) is a recently released optimized version of Poseidon1. The two versions differ in two crucial points. First, Poseidon is a sponge hash function, while Poseidon2 can be either a sponge or a compression function depending on the use case. Secondly, Poseidon2 is instantiated by new and more efficient linear layers with respect to Poseidon. These changes decrease the number of multiplications in the linear layer by up to 90% and the number of constraints in Plonk circuits by up to 70%. This makes Poseidon2 currently the fastest arithmetization-oriented hash function without lookups.
|
||||
|
||||
|
||||
## Using Poseidon2
|
||||
|
||||
ICICLE Poseidon2 is implemented for GPU and parallelization is performed for each state.
|
||||
We calculate multiple hash-sums over multiple pre-images in parallel, rather than going block by block over the input vector.
|
||||
|
||||
For example, for Poseidon2 of width 16, input rate 8, output elements 8 and input of size 1024 * 8, we would expect 1024 * 8 elements of output. Which means each input block would be of size 8, resulting in 1024 Poseidon2 hashes being performed.
|
||||
|
||||
### Supported Bindings
|
||||
|
||||
[`Rust`](https://github.com/ingonyama-zk/icicle/tree/main/wrappers/rust/icicle-core/src/poseidon2)
|
||||
|
||||
### Constants
|
||||
|
||||
Poseidon2 is also extremely customizable and using different constants will produce different hashes, security levels and performance results.
|
||||
|
||||
We support pre-calculated constants for each of the [supported curves](../core#supported-curves-and-operations). The constants can be found [here](https://github.com/ingonyama-zk/icicle/tree/main/icicle/include/poseidon2/constants) and are labeled clearly per curve `<curve_name>_poseidon2.h`.
|
||||
|
||||
You can also use your own set of constants as shown [here](https://github.com/ingonyama-zk/icicle/blob/main/wrappers/rust/icicle-fields/icicle-babybear/src/poseidon2/mod.rs#L290)
|
||||
|
||||
### Rust API
|
||||
|
||||
This is the most basic way to use the Poseidon2 API.
|
||||
|
||||
```rust
|
||||
let test_size = 1 << 10;
|
||||
let width = 16;
|
||||
let rate = 8;
|
||||
let ctx = get_default_device_context();
|
||||
let poseidon = Poseidon2::load(width, rate, MdsType::Default, DiffusionStrategy::Default, &ctx).unwrap();
|
||||
let config = HashConfig::default();
|
||||
|
||||
let inputs = vec![F::one(); test_size * rate as usize];
|
||||
let outputs = vec![F::zero(); test_size];
|
||||
let mut input_slice = HostOrDeviceSlice::on_host(inputs);
|
||||
let mut output_slice = HostOrDeviceSlice::on_host(outputs);
|
||||
|
||||
poseidon.hash_many::<F>(
|
||||
&mut input_slice,
|
||||
&mut output_slice,
|
||||
test_size as u32,
|
||||
rate as u32,
|
||||
8, // Output length
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
|
||||
In the example above `Poseidon2::load(width, rate, MdsType::Default, DiffusionStrategy::Default, &ctx).unwrap();` is used to load the correct constants based on width and curve. Here, the default MDS matrices and diffusion are used. If you want to get a Plonky3 compliant version, set them to `MdsType::Plonky` and `DiffusionStrategy::Montgomery` respectively.
|
||||
|
||||
## The Tree Builder
|
||||
|
||||
Similar to Poseidon1, you can use Poseidon2 in a tree builder.
|
||||
|
||||
```rust
|
||||
use icicle_bn254::tree::Bn254TreeBuilder;
|
||||
use icicle_bn254::poseidon2::Poseidon2;
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
let arity = 2;
|
||||
config.arity = arity as u32;
|
||||
let input_block_len = arity;
|
||||
let leaves = vec![F::one(); (1 << height) * arity];
|
||||
let mut digests = vec![F::zero(); merkle_tree_digests_len((height + 1) as u32, arity as u32, 1)];
|
||||
|
||||
let leaves_slice = HostSlice::from_slice(&leaves);
|
||||
let digests_slice = HostSlice::from_mut_slice(&mut digests);
|
||||
|
||||
let ctx = device_context::DeviceContext::default();
|
||||
let hash = Poseidon2::load(arity, arity, MdsType::Default, DiffusionStrategy::Default, &ctx).unwrap();
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.keep_rows = 5;
|
||||
Bn254TreeBuilder::build_merkle_tree(
|
||||
leaves_slice,
|
||||
digests_slice,
|
||||
height,
|
||||
input_block_len,
|
||||
&hash,
|
||||
&hash,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
```
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
```rust
|
||||
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
|
||||
use icicle_hash::keccak::{keccak256, KeccakConfig};
|
||||
use icicle_hash::keccak::{keccak256, HashConfig};
|
||||
use rand::{self, Rng};
|
||||
|
||||
fn main() {
|
||||
@@ -14,7 +14,7 @@ fn main() {
|
||||
let input = HostSlice::<u8>::from_slice(initial_data.as_slice());
|
||||
let mut output = DeviceVec::<u8>::cuda_malloc(32).unwrap();
|
||||
|
||||
let mut config = KeccakConfig::default();
|
||||
let mut config = HashConfig::default();
|
||||
keccak256(input, initial_data.len() as i32, 1, &mut output[..], &mut config).expect("Failed to execute keccak256 hashing");
|
||||
|
||||
let mut output_host = vec![0_u8; 32];
|
||||
@@ -32,7 +32,7 @@ pub fn keccak256(
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: &mut KeccakConfig,
|
||||
config: &mut HashConfig,
|
||||
) -> IcicleResult<()>
|
||||
|
||||
pub fn keccak512(
|
||||
@@ -40,7 +40,7 @@ pub fn keccak512(
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: &mut KeccakConfig,
|
||||
config: &mut HashConfig,
|
||||
) -> IcicleResult<()>
|
||||
```
|
||||
|
||||
@@ -50,18 +50,18 @@ pub fn keccak512(
|
||||
- **`input_block_size`**: An integer specifying the size of the input data for a single hash.
|
||||
- **`number_of_blocks`**: An integer specifying the number of results in the hash batch.
|
||||
- **`output`**: A slice where the resulting hash will be stored. This slice can be in host or device memory.
|
||||
- **`config`**: A pointer to a `KeccakConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
- **`config`**: A pointer to a `HashConfig` object, which contains various configuration options for the Keccak256 operation.
|
||||
|
||||
### Return Value
|
||||
|
||||
- **`IcicleResult`**: Returns a CUDA error code indicating the success or failure of the Keccak256/Keccak512 operation.
|
||||
|
||||
## KeccakConfig
|
||||
## HashConfig
|
||||
|
||||
The `KeccakConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
The `HashConfig` structure holds configuration parameters for the Keccak256/Keccak512 operation, allowing customization of its behavior to optimize performance based on the specifics of the operation or the underlying hardware.
|
||||
|
||||
```rust
|
||||
pub struct KeccakConfig<'a> {
|
||||
pub struct HashConfig<'a> {
|
||||
pub ctx: DeviceContext<'a>,
|
||||
pub are_inputs_on_device: bool,
|
||||
pub are_outputs_on_device: bool,
|
||||
@@ -81,7 +81,7 @@ pub struct KeccakConfig<'a> {
|
||||
Example initialization with default settings:
|
||||
|
||||
```rust
|
||||
let default_config = KeccakConfig::default();
|
||||
let default_config = HashConfig::default();
|
||||
```
|
||||
|
||||
Customizing the configuration:
|
||||
|
||||
19
docs/package-lock.json
generated
19
docs/package-lock.json
generated
@@ -3680,6 +3680,8 @@
|
||||
"version": "8.12.0",
|
||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz",
|
||||
"integrity": "sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"dependencies": {
|
||||
"fast-deep-equal": "^3.1.1",
|
||||
"json-schema-traverse": "^1.0.0",
|
||||
@@ -3694,7 +3696,9 @@
|
||||
"node_modules/ajv-formats/node_modules/json-schema-traverse": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/ajv-keywords": {
|
||||
"version": "3.5.2",
|
||||
@@ -16340,14 +16344,13 @@
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-2.1.1.tgz",
|
||||
"integrity": "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA==",
|
||||
"requires": {
|
||||
"ajv": "^8.0.0"
|
||||
},
|
||||
"requires": {},
|
||||
"dependencies": {
|
||||
"ajv": {
|
||||
"version": "8.12.0",
|
||||
"resolved": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz",
|
||||
"version": "https://registry.npmjs.org/ajv/-/ajv-8.12.0.tgz",
|
||||
"integrity": "sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==",
|
||||
"optional": true,
|
||||
"peer": true,
|
||||
"requires": {
|
||||
"fast-deep-equal": "^3.1.1",
|
||||
"json-schema-traverse": "^1.0.0",
|
||||
@@ -16358,7 +16361,9 @@
|
||||
"json-schema-traverse": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz",
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug=="
|
||||
"integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==",
|
||||
"optional": true,
|
||||
"peer": true
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
@@ -53,6 +53,11 @@ module.exports = {
|
||||
label: "Poseidon Hash",
|
||||
id: "icicle/primitives/poseidon",
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
label: "Poseidon2 Hash",
|
||||
id: "icicle/primitives/poseidon2",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -186,11 +191,6 @@ module.exports = {
|
||||
},
|
||||
]
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
label: "ZK Containers",
|
||||
id: "ZKContainers",
|
||||
},
|
||||
{
|
||||
type: "doc",
|
||||
label: "Ingonyama Grant program",
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: Muli-Scalar Multiplication (MSM)
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template function `MSM` to accelerate [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
|
||||
|
||||
@@ -6,6 +6,9 @@
|
||||
#include "api/bn254.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -20,31 +23,20 @@ void checkCudaError(cudaError_t error)
|
||||
// these global constants go into template calls
|
||||
const int size_col = 11;
|
||||
|
||||
// this function executes the Poseidon thread
|
||||
void threadPoseidon(
|
||||
device_context::DeviceContext ctx,
|
||||
unsigned size_partition,
|
||||
scalar_t* layers,
|
||||
scalar_t* column_hashes,
|
||||
PoseidonConstants<scalar_t>* constants)
|
||||
Poseidon<scalar_t> * poseidon)
|
||||
{
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
// CHK_IF_RETURN(); I can't use it in a standard thread function
|
||||
PoseidonConfig column_config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
cudaError_t err =
|
||||
bn254_poseidon_hash_cuda(layers, column_hashes, (size_t)size_partition, size_col, *constants, column_config);
|
||||
HashConfig column_config = default_hash_config(ctx);
|
||||
cudaError_t err = poseidon->hash_many(layers, column_hashes, (size_t) size_partition, size_col, 1, column_config);
|
||||
checkCudaError(err);
|
||||
}
|
||||
|
||||
@@ -59,6 +51,11 @@ using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::p
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#define CHECK_ALLOC(ptr) if ((ptr) == nullptr) { \
|
||||
std::cerr << "Memory allocation for '" #ptr "' failed." << std::endl; \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
const unsigned size_row = (1 << 30);
|
||||
@@ -116,19 +113,18 @@ int main()
|
||||
scalar_t* column_hash1 = static_cast<scalar_t*>(malloc(size_partition * sizeof(scalar_t)));
|
||||
CHECK_ALLOC(column_hash1);
|
||||
|
||||
PoseidonConstants<scalar_t> column_constants0, column_constants1;
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx0, &column_constants0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
bn254_init_optimized_poseidon_constants_cuda(size_col, ctx1, &column_constants1);
|
||||
Poseidon<scalar_t> column_poseidon0(size_col, ctx0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
Poseidon<scalar_t> column_poseidon1(size_col, ctx1);
|
||||
|
||||
std::cout << "Parallel execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(parallel);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_constants1);
|
||||
std::thread thread0(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
std::thread thread1(threadPoseidon, ctx1, size_partition, layers1, column_hash1, &column_poseidon1);
|
||||
|
||||
// Wait for the threads to finish
|
||||
thread0.join();
|
||||
@@ -141,9 +137,9 @@ int main()
|
||||
|
||||
std::cout << "Sequential execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(sequential);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_constants0);
|
||||
std::thread thread2(threadPoseidon, ctx0, size_partition, layers0, column_hash0, &column_poseidon0);
|
||||
thread2.join();
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_constants0);
|
||||
std::thread thread3(threadPoseidon, ctx0, size_partition, layers1, column_hash1, &column_poseidon0);
|
||||
thread3.join();
|
||||
END_TIMER(sequential, "1 GPU");
|
||||
std::cout << "Output Data from Thread 2: ";
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: Multiplication
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: Number-Theoretical Transform (NTT)
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template function NTT for [Number Theoretical Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md), also known as Discrete Fourier Transform.
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# ICICLE example: Pedersen Commitment
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
A Pedersen Commitment is a cryptographic primitive to commit to a value or a vector of values while keeping it hidden, yet enabling the committer to reveal the value later. It provides both hiding (the commitment does not reveal any information about the value) and binding properties (once a value is committed, it cannot be changed without detection).
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# ICICLE examples: computations with polynomials
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
Polynomials are crucial for Zero-Knowledge Proofs (ZKPs): they enable efficient representation and verification of computational statements, facilitate privacy-preserving protocols, and support complex mathematical operations essential for constructing and verifying proofs without revealing underlying data. Polynomial API is documented [here](https://dev.ingonyama.com/icicle/polynomials/overview)
|
||||
|
||||
@@ -3,13 +3,11 @@
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include <chrono>
|
||||
|
||||
// using namespace field_config;
|
||||
using namespace polynomials;
|
||||
using namespace merkle;
|
||||
using namespace bn254;
|
||||
|
||||
// define the polynomial type
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# Icicle example: build a Merkle tree using Poseidon hash
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template `poseidon_hash` to accelerate the popular [Poseidon hash function](https://www.poseidon-hash.info/).
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace poseidon;
|
||||
using namespace bn254;
|
||||
|
||||
@@ -14,13 +16,12 @@ inline uint32_t tree_index(uint32_t level, uint32_t offset) { return (1 << level
|
||||
|
||||
// We assume the tree has leaves already set, compute all other levels
|
||||
void build_tree(
|
||||
const uint32_t tree_height, scalar_t* tree, PoseidonConstants<scalar_t>* constants, PoseidonConfig config)
|
||||
const uint32_t tree_height, scalar_t* tree, Poseidon<scalar_t> &poseidon, HashConfig &config)
|
||||
{
|
||||
for (uint32_t level = tree_height - 1; level > 0; level--) {
|
||||
const uint32_t next_level = level - 1;
|
||||
const uint32_t next_level_width = 1 << next_level;
|
||||
bn254_poseidon_hash_cuda(
|
||||
&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, *constants, config);
|
||||
poseidon.hash_many(&tree[tree_index(level, 0)], &tree[tree_index(next_level, 0)], next_level_width, 2, 1, config);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -65,8 +66,8 @@ uint32_t validate_proof(
|
||||
const uint32_t tree_height,
|
||||
const uint32_t* proof_lr,
|
||||
const scalar_t* proof_hash,
|
||||
PoseidonConstants<scalar_t>* constants,
|
||||
PoseidonConfig config)
|
||||
Poseidon<scalar_t> &poseidon,
|
||||
HashConfig &config)
|
||||
{
|
||||
scalar_t hashes_in[2], hash_out[1], level_hash;
|
||||
level_hash = hash;
|
||||
@@ -79,7 +80,7 @@ uint32_t validate_proof(
|
||||
hashes_in[1] = level_hash;
|
||||
}
|
||||
// next level hash
|
||||
bn254_poseidon_hash_cuda(hashes_in, hash_out, 1, 2, *constants, config);
|
||||
poseidon.hash_many(hashes_in, hash_out, 1, 2, 1, config);
|
||||
level_hash = hash_out[0];
|
||||
}
|
||||
return proof_hash[0] == level_hash;
|
||||
@@ -109,16 +110,15 @@ int main(int argc, char* argv[])
|
||||
d = d + scalar_t::one();
|
||||
}
|
||||
std::cout << "Hashing blocks into tree leaves..." << std::endl;
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(data_arity, ctx, &constants);
|
||||
PoseidonConfig config = default_poseidon_config(data_arity + 1);
|
||||
bn254_poseidon_hash_cuda(data, &tree[tree_index(leaf_level, 0)], tree_width, 4, constants, config);
|
||||
|
||||
Poseidon<scalar_t> poseidon(data_arity, ctx);
|
||||
HashConfig config = default_hash_config(ctx);
|
||||
poseidon.hash_many(data, &tree[tree_index(leaf_level, 0)], tree_width, data_arity, 1, config);
|
||||
|
||||
std::cout << "3. Building Merkle tree" << std::endl;
|
||||
PoseidonConstants<scalar_t> tree_constants;
|
||||
bn254_init_optimized_poseidon_constants_cuda(tree_arity, ctx, &tree_constants);
|
||||
PoseidonConfig tree_config = default_poseidon_config(tree_arity + 1);
|
||||
build_tree(tree_height, tree, &tree_constants, tree_config);
|
||||
Poseidon<scalar_t> tree_poseidon(tree_arity, ctx);
|
||||
HashConfig tree_config = default_hash_config(ctx);
|
||||
build_tree(tree_height, tree, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "4. Generate membership proof" << std::endl;
|
||||
uint32_t position = tree_width - 1;
|
||||
@@ -133,13 +133,13 @@ int main(int argc, char* argv[])
|
||||
std::cout << "5. Validate the hash membership" << std::endl;
|
||||
uint32_t validated;
|
||||
const scalar_t hash = tree[tree_index(leaf_level, query_position)];
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
validated = validate_proof(hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
|
||||
std::cout << "6. Tamper the hash" << std::endl;
|
||||
const scalar_t tampered_hash = hash + scalar_t::one();
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, &tree_constants, tree_config);
|
||||
|
||||
validated = validate_proof(tampered_hash, tree_height, proof_lr, proof_hash, tree_poseidon, tree_config);
|
||||
|
||||
std::cout << "7. Invalidate tamper hash membership" << std::endl;
|
||||
std::cout << "Validated: " << validated << std::endl;
|
||||
return 0;
|
||||
|
||||
@@ -2,10 +2,6 @@
|
||||
|
||||
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
|
||||
|
||||
## Best Practices
|
||||
|
||||
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
|
||||
@@ -4,10 +4,6 @@
|
||||
|
||||
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Number Theoretic Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md).
|
||||
|
||||
## Best Practices
|
||||
|
||||
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
|
||||
@@ -2,7 +2,8 @@ use icicle_bls12_381::curve::ScalarField as F;
|
||||
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
|
||||
use icicle_core::poseidon::{load_optimized_poseidon_constants, poseidon_hash_many, PoseidonConfig};
|
||||
use icicle_core::hash::{SpongeHash, HashConfig};
|
||||
use icicle_core::poseidon::Poseidon;
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
@@ -24,14 +25,14 @@ fn main() {
|
||||
let test_size = 1 << size;
|
||||
|
||||
println!("Running Icicle Examples: Rust Poseidon Hash");
|
||||
let arity = 2u32;
|
||||
let arity = 2;
|
||||
println!(
|
||||
"---------------------- Loading optimized Poseidon constants for arity={} ------------------------",
|
||||
arity
|
||||
);
|
||||
let ctx = DeviceContext::default();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();
|
||||
let config = PoseidonConfig::default();
|
||||
let poseidon = Poseidon::load(arity, &ctx).unwrap();
|
||||
let config = HashConfig::default();
|
||||
|
||||
println!(
|
||||
"---------------------- Input size 2^{}={} ------------------------",
|
||||
@@ -45,12 +46,12 @@ fn main() {
|
||||
println!("Executing BLS12-381 Poseidon Hash on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
poseidon_hash_many::<F>(
|
||||
poseidon.hash_many(
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
test_size,
|
||||
arity,
|
||||
1,
|
||||
&config,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -9,58 +9,67 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/babybear.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" cudaError_t babybear_extension_ntt_cuda(
|
||||
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_initialize_domain(
|
||||
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t babybear_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const babybear::scalar_t* round_constants,
|
||||
const babybear::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_ntt_cuda(
|
||||
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);
|
||||
extern "C" cudaError_t babybear_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<babybear::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<babybear::scalar_t>* poseidon,
|
||||
const babybear::scalar_t* inputs,
|
||||
babybear::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t
|
||||
babybear_poseidon2_delete_cuda(poseidon2::Poseidon2<babybear::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_scalar_convert_montgomery(
|
||||
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t babybear_build_merkle_tree(
|
||||
const babybear::scalar_t* leaves,
|
||||
babybear::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_mul_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_add_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_accumulate_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_sub_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
||||
const babybear::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
babybear::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
|
||||
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);
|
||||
|
||||
|
||||
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
|
||||
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t babybear_mmcs_commit_cuda(
|
||||
const matrix::Matrix<babybear::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
babybear::scalar_t* digests,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* hasher,
|
||||
const hash::Hasher<babybear::scalar_t, babybear::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t babybear_mul_cuda(
|
||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||
@@ -87,35 +96,47 @@ extern "C" cudaError_t babybear_bit_reverse_cuda(
|
||||
const babybear::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const babybear::scalar_t* round_constants,
|
||||
const babybear::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t babybear_scalar_convert_montgomery(
|
||||
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_initialize_domain(
|
||||
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t babybear_ntt_cuda(
|
||||
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
|
||||
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_mul_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_add_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_accumulate_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_sub_cuda(
|
||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
||||
const babybear::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
babybear::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t babybear_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
|
||||
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" cudaError_t babybear_poseidon2_hash_cuda(
|
||||
const babybear::scalar_t* input,
|
||||
babybear::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<babybear::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
|
||||
extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,26 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_377.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
|
||||
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
|
||||
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_precompute_msm_bases_cuda(
|
||||
bls12_377::g2_affine_t* bases,
|
||||
@@ -48,6 +35,20 @@ extern "C" cudaError_t bls12_377_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bls12_377_msm_cuda(
|
||||
const bls12_377::scalar_t* scalars, const bls12_377::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_377::projective_t* out);
|
||||
|
||||
extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
|
||||
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
|
||||
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_ecntt_cuda(
|
||||
const bls12_377::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::projective_t* output);
|
||||
|
||||
@@ -65,18 +66,52 @@ extern "C" cudaError_t bls12_377_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_377_projective_convert_montgomery(
|
||||
bls12_377::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_initialize_domain(
|
||||
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bls12_377_build_merkle_tree(
|
||||
const bls12_377::scalar_t* leaves,
|
||||
bls12_377::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_ntt_cuda(
|
||||
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);
|
||||
extern "C" cudaError_t bls12_377_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_377::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_377::scalar_t* digests,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* hasher,
|
||||
const hash::Hasher<bls12_377::scalar_t, bls12_377::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_377_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_377::scalar_t* round_constants,
|
||||
const bls12_377::scalar_t* mds_matrix,
|
||||
const bls12_377::scalar_t* non_sparse_matrix,
|
||||
const bls12_377::scalar_t* sparse_matrices,
|
||||
const bls12_377::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t bls12_377_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_377::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
|
||||
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_377::scalar_t>* poseidon,
|
||||
const bls12_377::scalar_t* inputs,
|
||||
bls12_377::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_377_poseidon_delete_cuda(poseidon::Poseidon<bls12_377::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_377_mul_cuda(
|
||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||
@@ -103,31 +138,17 @@ extern "C" cudaError_t bls12_377_bit_reverse_cuda(
|
||||
const bls12_377::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_377::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_377::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>* poseidon_constants);
|
||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_377_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_377::scalar_t>* constants);
|
||||
extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
|
||||
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_377_poseidon_hash_cuda(
|
||||
bls12_377::scalar_t* input,
|
||||
bls12_377::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
extern "C" cudaError_t bls12_377_initialize_domain(
|
||||
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
|
||||
const bls12_377::scalar_t* leaves,
|
||||
bls12_377::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bls12_377_ntt_cuda(
|
||||
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,26 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bls12_381.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" bool bls12_381_g2_eq(bls12_381::g2_projective_t* point1, bls12_381::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_381_g2_to_affine(bls12_381::g2_projective_t* point, bls12_381::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_projective_points(bls12_381::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_affine_points(bls12_381::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_affine_convert_montgomery(
|
||||
bls12_381::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_projective_convert_montgomery(
|
||||
bls12_381::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_precompute_msm_bases_cuda(
|
||||
bls12_381::g2_affine_t* bases,
|
||||
@@ -48,6 +35,20 @@ extern "C" cudaError_t bls12_381_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bls12_381_msm_cuda(
|
||||
const bls12_381::scalar_t* scalars, const bls12_381::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_381::projective_t* out);
|
||||
|
||||
extern "C" bool bls12_381_g2_eq(bls12_381::g2_projective_t* point1, bls12_381::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_381_g2_to_affine(bls12_381::g2_projective_t* point, bls12_381::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_projective_points(bls12_381::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_affine_points(bls12_381::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_affine_convert_montgomery(
|
||||
bls12_381::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_g2_projective_convert_montgomery(
|
||||
bls12_381::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_ecntt_cuda(
|
||||
const bls12_381::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::projective_t* output);
|
||||
|
||||
@@ -65,18 +66,52 @@ extern "C" cudaError_t bls12_381_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bls12_381_projective_convert_montgomery(
|
||||
bls12_381::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_initialize_domain(
|
||||
bls12_381::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bls12_381_build_merkle_tree(
|
||||
const bls12_381::scalar_t* leaves,
|
||||
bls12_381::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_ntt_cuda(
|
||||
const bls12_381::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::scalar_t* output);
|
||||
extern "C" cudaError_t bls12_381_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bls12_381::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bls12_381::scalar_t* digests,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* hasher,
|
||||
const hash::Hasher<bls12_381::scalar_t, bls12_381::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bls12_381_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_381_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bls12_381::scalar_t* round_constants,
|
||||
const bls12_381::scalar_t* mds_matrix,
|
||||
const bls12_381::scalar_t* non_sparse_matrix,
|
||||
const bls12_381::scalar_t* sparse_matrices,
|
||||
const bls12_381::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t bls12_381_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bls12_381::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_scalar_convert_montgomery(
|
||||
bls12_381::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bls12_381::scalar_t>* poseidon,
|
||||
const bls12_381::scalar_t* inputs,
|
||||
bls12_381::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bls12_381_poseidon_delete_cuda(poseidon::Poseidon<bls12_381::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bls12_381_mul_cuda(
|
||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||
@@ -103,31 +138,17 @@ extern "C" cudaError_t bls12_381_bit_reverse_cuda(
|
||||
const bls12_381::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_381::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bls12_381_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bls12_381::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>* poseidon_constants);
|
||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bls12_381_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bls12_381::scalar_t>* constants);
|
||||
extern "C" cudaError_t bls12_381_scalar_convert_montgomery(
|
||||
bls12_381::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bls12_381_poseidon_hash_cuda(
|
||||
bls12_381::scalar_t* input,
|
||||
bls12_381::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
extern "C" cudaError_t bls12_381_initialize_domain(
|
||||
bls12_381::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bls12_381_build_poseidon_merkle_tree(
|
||||
const bls12_381::scalar_t* leaves,
|
||||
bls12_381::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bls12_381_ntt_cuda(
|
||||
const bls12_381::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bls12_381_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,28 +9,15 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
extern "C" bool bn254_g2_eq(bn254::g2_projective_t* point1, bn254::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bn254_g2_to_affine(bn254::g2_projective_t* point, bn254::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bn254_g2_generate_projective_points(bn254::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bn254_g2_generate_affine_points(bn254::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_affine_convert_montgomery(
|
||||
bn254::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_projective_convert_montgomery(
|
||||
bn254::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
|
||||
bn254::g2_affine_t* bases,
|
||||
int msm_size,
|
||||
@@ -49,6 +36,20 @@ extern "C" cudaError_t bn254_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bn254_msm_cuda(
|
||||
const bn254::scalar_t* scalars, const bn254::affine_t* points, int msm_size, msm::MSMConfig& config, bn254::projective_t* out);
|
||||
|
||||
extern "C" bool bn254_g2_eq(bn254::g2_projective_t* point1, bn254::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bn254_g2_to_affine(bn254::g2_projective_t* point, bn254::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bn254_g2_generate_projective_points(bn254::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bn254_g2_generate_affine_points(bn254::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_affine_convert_montgomery(
|
||||
bn254::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_g2_projective_convert_montgomery(
|
||||
bn254::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_ecntt_cuda(
|
||||
const bn254::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::projective_t* output);
|
||||
|
||||
@@ -66,18 +67,87 @@ extern "C" cudaError_t bn254_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bn254_projective_convert_montgomery(
|
||||
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_initialize_domain(
|
||||
bn254::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bn254_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_ntt_cuda(
|
||||
const bn254::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);
|
||||
extern "C" cudaError_t bn254_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<bn254::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t bn254_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon2_delete_cuda(poseidon2::Poseidon2<bn254::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_scalar_convert_montgomery(
|
||||
bn254::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bn254_build_merkle_tree(
|
||||
const bn254::scalar_t* leaves,
|
||||
bn254::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bn254::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bn254::scalar_t* digests,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* hasher,
|
||||
const hash::Hasher<bn254::scalar_t, bn254::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* mds_matrix,
|
||||
const bn254::scalar_t* non_sparse_matrix,
|
||||
const bn254::scalar_t* sparse_matrices,
|
||||
const bn254::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bn254::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bn254::scalar_t>* poseidon,
|
||||
const bn254::scalar_t* inputs,
|
||||
bn254::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bn254_poseidon_delete_cuda(poseidon::Poseidon<bn254::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bn254_mul_cuda(
|
||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||
@@ -104,62 +174,17 @@ extern "C" cudaError_t bn254_bit_reverse_cuda(
|
||||
const bn254::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bn254::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const bn254::scalar_t* round_constants,
|
||||
const bn254::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bn254_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t bn254_scalar_convert_montgomery(
|
||||
bn254::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon2_hash_cuda(
|
||||
const bn254::scalar_t* input,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<bn254::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
extern "C" cudaError_t bn254_initialize_domain(
|
||||
bn254::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bn254_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<bn254::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bn254_ntt_cuda(
|
||||
const bn254::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bn254_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bn254::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>* poseidon_constants);
|
||||
|
||||
extern "C" cudaError_t bn254_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bn254::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t bn254_poseidon_hash_cuda(
|
||||
bn254::scalar_t* input,
|
||||
bn254::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t bn254_build_poseidon_merkle_tree(
|
||||
const bn254::scalar_t* leaves,
|
||||
bn254::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bn254_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,26 +9,13 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/bw6_761.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" bool bw6_761_g2_eq(bw6_761::g2_projective_t* point1, bw6_761::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bw6_761_g2_to_affine(bw6_761::g2_projective_t* point, bw6_761::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_projective_points(bw6_761::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_affine_points(bw6_761::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_affine_convert_montgomery(
|
||||
bw6_761::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_projective_convert_montgomery(
|
||||
bw6_761::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_precompute_msm_bases_cuda(
|
||||
bw6_761::g2_affine_t* bases,
|
||||
@@ -48,6 +35,20 @@ extern "C" cudaError_t bw6_761_precompute_msm_bases_cuda(
|
||||
extern "C" cudaError_t bw6_761_msm_cuda(
|
||||
const bw6_761::scalar_t* scalars, const bw6_761::affine_t* points, int msm_size, msm::MSMConfig& config, bw6_761::projective_t* out);
|
||||
|
||||
extern "C" bool bw6_761_g2_eq(bw6_761::g2_projective_t* point1, bw6_761::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bw6_761_g2_to_affine(bw6_761::g2_projective_t* point, bw6_761::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_projective_points(bw6_761::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_affine_points(bw6_761::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_affine_convert_montgomery(
|
||||
bw6_761::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_g2_projective_convert_montgomery(
|
||||
bw6_761::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_ecntt_cuda(
|
||||
const bw6_761::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::projective_t* output);
|
||||
|
||||
@@ -65,18 +66,52 @@ extern "C" cudaError_t bw6_761_affine_convert_montgomery(
|
||||
extern "C" cudaError_t bw6_761_projective_convert_montgomery(
|
||||
bw6_761::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_initialize_domain(
|
||||
bw6_761::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t bw6_761_build_merkle_tree(
|
||||
const bw6_761::scalar_t* leaves,
|
||||
bw6_761::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_ntt_cuda(
|
||||
const bw6_761::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::scalar_t* output);
|
||||
extern "C" cudaError_t bw6_761_mmcs_commit_cuda(
|
||||
const matrix::Matrix<bw6_761::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
bw6_761::scalar_t* digests,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* hasher,
|
||||
const hash::Hasher<bw6_761::scalar_t, bw6_761::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t bw6_761_release_domain(device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bw6_761_poseidon_create_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const bw6_761::scalar_t* round_constants,
|
||||
const bw6_761::scalar_t* mds_matrix,
|
||||
const bw6_761::scalar_t* non_sparse_matrix,
|
||||
const bw6_761::scalar_t* sparse_matrices,
|
||||
const bw6_761::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t bw6_761_poseidon_load_cuda(
|
||||
poseidon::Poseidon<bw6_761::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_scalar_convert_montgomery(
|
||||
bw6_761::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<bw6_761::scalar_t>* poseidon,
|
||||
const bw6_761::scalar_t* inputs,
|
||||
bw6_761::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
bw6_761_poseidon_delete_cuda(poseidon::Poseidon<bw6_761::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t bw6_761_mul_cuda(
|
||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||
@@ -103,31 +138,17 @@ extern "C" cudaError_t bw6_761_bit_reverse_cuda(
|
||||
const bw6_761::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bw6_761::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t bw6_761_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const bw6_761::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>* poseidon_constants);
|
||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t bw6_761_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<bw6_761::scalar_t>* constants);
|
||||
extern "C" cudaError_t bw6_761_scalar_convert_montgomery(
|
||||
bw6_761::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t bw6_761_poseidon_hash_cuda(
|
||||
bw6_761::scalar_t* input,
|
||||
bw6_761::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
extern "C" cudaError_t bw6_761_initialize_domain(
|
||||
bw6_761::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t bw6_761_build_poseidon_merkle_tree(
|
||||
const bw6_761::scalar_t* leaves,
|
||||
bw6_761::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t bw6_761_ntt_cuda(
|
||||
const bw6_761::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t bw6_761_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -9,11 +9,12 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "curves/params/grumpkin.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t grumpkin_precompute_msm_bases_cuda(
|
||||
grumpkin::affine_t* bases,
|
||||
@@ -38,10 +39,52 @@ extern "C" cudaError_t grumpkin_affine_convert_montgomery(
|
||||
extern "C" cudaError_t grumpkin_projective_convert_montgomery(
|
||||
grumpkin::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t grumpkin_build_merkle_tree(
|
||||
const grumpkin::scalar_t* leaves,
|
||||
grumpkin::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_scalar_convert_montgomery(
|
||||
grumpkin::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t grumpkin_mmcs_commit_cuda(
|
||||
const matrix::Matrix<grumpkin::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
grumpkin::scalar_t* digests,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* hasher,
|
||||
const hash::Hasher<grumpkin::scalar_t, grumpkin::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_create_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const grumpkin::scalar_t* round_constants,
|
||||
const grumpkin::scalar_t* mds_matrix,
|
||||
const grumpkin::scalar_t* non_sparse_matrix,
|
||||
const grumpkin::scalar_t* sparse_matrices,
|
||||
const grumpkin::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_load_cuda(
|
||||
poseidon::Poseidon<grumpkin::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<grumpkin::scalar_t>* poseidon,
|
||||
const grumpkin::scalar_t* inputs,
|
||||
grumpkin::scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t
|
||||
grumpkin_poseidon_delete_cuda(poseidon::Poseidon<grumpkin::scalar_t>* poseidon);
|
||||
|
||||
extern "C" cudaError_t grumpkin_mul_cuda(
|
||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||
@@ -68,31 +111,9 @@ extern "C" cudaError_t grumpkin_bit_reverse_cuda(
|
||||
const grumpkin::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, grumpkin::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" cudaError_t grumpkin_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const grumpkin::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>* poseidon_constants);
|
||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t grumpkin_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<grumpkin::scalar_t>* constants);
|
||||
|
||||
extern "C" cudaError_t grumpkin_poseidon_hash_cuda(
|
||||
grumpkin::scalar_t* input,
|
||||
grumpkin::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
|
||||
extern "C" cudaError_t grumpkin_build_poseidon_merkle_tree(
|
||||
const grumpkin::scalar_t* leaves,
|
||||
grumpkin::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t grumpkin_scalar_convert_montgomery(
|
||||
grumpkin::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -6,11 +6,25 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config);
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config);
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::KeccakConfig& config);
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, keccak::HashConfig& config);
|
||||
|
||||
extern "C" cudaError_t build_keccak256_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t build_keccak512_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
#endif
|
||||
@@ -9,43 +9,27 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/m31.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" void m31_generate_scalars(m31::scalar_t* scalars, int size);
|
||||
extern "C" cudaError_t m31_build_merkle_tree(
|
||||
const m31::scalar_t* leaves,
|
||||
m31::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_scalar_convert_montgomery(
|
||||
m31::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t m31_extension_mul_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_add_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_accumulate_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_extension_sub_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_transpose_matrix_cuda(
|
||||
const m31::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_extension_bit_reverse_cuda(
|
||||
const m31::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::extension_t* output);
|
||||
|
||||
|
||||
extern "C" void m31_extension_generate_scalars(m31::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_extension_scalar_convert_montgomery(
|
||||
m31::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t m31_mmcs_commit_cuda(
|
||||
const matrix::Matrix<m31::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
m31::scalar_t* digests,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* hasher,
|
||||
const hash::Hasher<m31::scalar_t, m31::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t m31_mul_cuda(
|
||||
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||
@@ -72,4 +56,39 @@ extern "C" cudaError_t m31_bit_reverse_cuda(
|
||||
const m31::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void m31_generate_scalars(m31::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_scalar_convert_montgomery(
|
||||
m31::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void m31_extension_generate_scalars(m31::extension_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t m31_extension_scalar_convert_montgomery(
|
||||
m31::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t m31_extension_mul_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_add_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_accumulate_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||
|
||||
extern "C" cudaError_t m31_extension_sub_cuda(
|
||||
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||
|
||||
extern "C" cudaError_t m31_extension_transpose_matrix_cuda(
|
||||
const m31::extension_t* input,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
m31::extension_t* output,
|
||||
device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
extern "C" cudaError_t m31_extension_bit_reverse_cuda(
|
||||
const m31::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::extension_t* output);
|
||||
|
||||
|
||||
#endif
|
||||
@@ -9,22 +9,28 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "fields/stark_fields/stark252.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
extern "C" cudaError_t stark252_initialize_domain(
|
||||
stark252::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
extern "C" cudaError_t stark252_build_merkle_tree(
|
||||
const stark252::scalar_t* leaves,
|
||||
stark252::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_ntt_cuda(
|
||||
const stark252::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<stark252::scalar_t>& config, stark252::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t stark252_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t stark252_scalar_convert_montgomery(
|
||||
stark252::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t stark252_mmcs_commit_cuda(
|
||||
const matrix::Matrix<stark252::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
stark252::scalar_t* digests,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* hasher,
|
||||
const hash::Hasher<stark252::scalar_t, stark252::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t stark252_mul_cuda(
|
||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
||||
@@ -51,4 +57,17 @@ extern "C" cudaError_t stark252_bit_reverse_cuda(
|
||||
const stark252::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, stark252::scalar_t* output);
|
||||
|
||||
|
||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" cudaError_t stark252_scalar_convert_montgomery(
|
||||
stark252::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t stark252_initialize_domain(
|
||||
stark252::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||
|
||||
extern "C" cudaError_t stark252_ntt_cuda(
|
||||
const stark252::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<stark252::scalar_t>& config, stark252::scalar_t* output);
|
||||
|
||||
extern "C" cudaError_t stark252_release_domain(device_context::DeviceContext& ctx);
|
||||
|
||||
#endif
|
||||
@@ -1,26 +1,29 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_optimized_poseidon_constants_cuda(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const ${FIELD}::scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_create_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* mds_matrix,
|
||||
const ${FIELD}::scalar_t* non_sparse_matrix,
|
||||
const ${FIELD}::scalar_t* sparse_matrices,
|
||||
const ${FIELD}::scalar_t domain_tag,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_optimized_poseidon_constants_cuda(
|
||||
int arity, device_context::DeviceContext& ctx, poseidon::PoseidonConstants<${FIELD}::scalar_t>* constants);
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_load_cuda(
|
||||
poseidon::Poseidon<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int arity,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_cuda(
|
||||
${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon_hash_many_cuda(
|
||||
const poseidon::Poseidon<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
poseidon::PoseidonConfig& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_build_poseidon_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
uint32_t height,
|
||||
int arity,
|
||||
poseidon::PoseidonConstants<${FIELD}::scalar_t>& constants,
|
||||
merkle::TreeBuilderConfig& config);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon_delete_cuda(poseidon::Poseidon<${FIELD}::scalar_t>* poseidon);
|
||||
@@ -1,30 +1,34 @@
|
||||
extern "C" cudaError_t ${FIELD}_create_poseidon2_constants_cuda(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_create_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const ${FIELD}::scalar_t* round_constants,
|
||||
const ${FIELD}::scalar_t* internal_matrix_diag,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_init_poseidon2_constants_cuda(
|
||||
int width,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_load_cuda(
|
||||
poseidon2::Poseidon2<${FIELD}::scalar_t>** poseidon,
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
poseidon2::MdsType mds_type,
|
||||
poseidon2::DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* poseidon_constants);
|
||||
device_context::DeviceContext& ctx
|
||||
);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_cuda(
|
||||
const ${FIELD}::scalar_t* input,
|
||||
extern "C" cudaError_t ${FIELD}_poseidon2_hash_many_cuda(
|
||||
const poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon,
|
||||
const ${FIELD}::scalar_t* inputs,
|
||||
${FIELD}::scalar_t* output,
|
||||
int number_of_states,
|
||||
int width,
|
||||
const poseidon2::Poseidon2Constants<${FIELD}::scalar_t>& constants,
|
||||
poseidon2::Poseidon2Config& config);
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
hash::HashConfig& cfg);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_release_poseidon2_constants_cuda(
|
||||
poseidon2::Poseidon2Constants<${FIELD}::scalar_t>* constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
extern "C" cudaError_t
|
||||
${FIELD}_poseidon2_delete_cuda(poseidon2::Poseidon2<${FIELD}::scalar_t>* poseidon, device_context::DeviceContext& ctx);
|
||||
16
icicle/include/api/templates/fields/tree.h
Normal file
16
icicle/include/api/templates/fields/tree.h
Normal file
@@ -0,0 +1,16 @@
|
||||
extern "C" cudaError_t ${FIELD}_build_merkle_tree(
|
||||
const ${FIELD}::scalar_t* leaves,
|
||||
${FIELD}::scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* bottom_layer,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
|
||||
extern "C" cudaError_t ${FIELD}_mmcs_commit_cuda(
|
||||
const matrix::Matrix<${FIELD}::scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
${FIELD}::scalar_t* digests,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* hasher,
|
||||
const hash::Hasher<${FIELD}::scalar_t, ${FIELD}::scalar_t>* compression,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config);
|
||||
@@ -44,7 +44,7 @@ public:
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field from(uint32_t value)
|
||||
{
|
||||
storage<TLC> scalar;
|
||||
storage<TLC> scalar{};
|
||||
scalar.limbs[0] = value;
|
||||
for (int i = 1; i < TLC; i++) {
|
||||
scalar.limbs[i] = 0;
|
||||
@@ -58,8 +58,10 @@ public:
|
||||
|
||||
if (logn > CONFIG::omegas_count) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega index"); }
|
||||
|
||||
storage_array<CONFIG::omegas_count, TLC> const omega = CONFIG::omega;
|
||||
return Field{omega.storages[logn - 1]};
|
||||
Field omega = Field{CONFIG::rou};
|
||||
for (int i = 0; i < CONFIG::omegas_count - logn; i++)
|
||||
omega = sqr(omega);
|
||||
return omega;
|
||||
}
|
||||
|
||||
static HOST_INLINE Field omega_inv(uint32_t logn)
|
||||
@@ -70,8 +72,10 @@ public:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Field: Invalid omega_inv index");
|
||||
}
|
||||
|
||||
storage_array<CONFIG::omegas_count, TLC> const omega_inv = CONFIG::omega_inv;
|
||||
return Field{omega_inv.storages[logn - 1]};
|
||||
Field omega = inverse(Field{CONFIG::rou});
|
||||
for (int i = 0; i < CONFIG::omegas_count - logn; i++)
|
||||
omega = sqr(omega);
|
||||
return omega;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Field inv_log_size(uint32_t logn)
|
||||
@@ -120,6 +124,19 @@ public:
|
||||
*/
|
||||
static constexpr HOST_DEVICE_INLINE unsigned num_of_reductions() { return CONFIG::num_of_reductions; }
|
||||
|
||||
// count number of bits of the field element without leading zeros.
|
||||
static constexpr HOST_DEVICE_INLINE unsigned num_bits(const Field& x)
|
||||
{
|
||||
size_t size = sizeof(x.limbs_storage.limbs[0]) * 8;
|
||||
unsigned ret = size * TLC;
|
||||
for (unsigned i = TLC; i-- > 0;) {
|
||||
int leading = __clz(x.limbs_storage.limbs[i]);
|
||||
ret -= leading;
|
||||
if (leading != size) { break; }
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static constexpr unsigned slack_bits = 32 * TLC - NBITS;
|
||||
|
||||
struct Wide {
|
||||
@@ -182,7 +199,7 @@ public:
|
||||
if (REDUCTION_SIZE == 0) return xs;
|
||||
const ff_wide_storage modulus = get_modulus_squared<REDUCTION_SIZE>();
|
||||
Wide rs = {};
|
||||
return sub_limbs<true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
return sub_limbs<2 * TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
@@ -190,24 +207,24 @@ public:
|
||||
{
|
||||
const ff_wide_storage modulus = get_modulus_squared<MODULUS_MULTIPLE>();
|
||||
Wide rs = {};
|
||||
sub_limbs<false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
sub_limbs<2 * TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
|
||||
{
|
||||
Wide rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
add_limbs<2 * TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return sub_modulus_squared<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
|
||||
{
|
||||
Wide rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
uint32_t carry = sub_limbs<2 * TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0) return rs;
|
||||
const ff_wide_storage modulus = get_modulus_squared<1>();
|
||||
add_limbs<false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
add_limbs<2 * TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
};
|
||||
@@ -228,12 +245,6 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned MULTIPLIER = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ff_wide_storage modulus_wide()
|
||||
{
|
||||
return CONFIG::modulus_wide;
|
||||
}
|
||||
|
||||
// return m
|
||||
static constexpr HOST_DEVICE_INLINE ff_storage get_m() { return CONFIG::m; }
|
||||
|
||||
@@ -253,12 +264,11 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r, size_t n = (TLC >> 1))
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r)
|
||||
{
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
for (unsigned i = 1; i < n; i++)
|
||||
for (unsigned i = 1; i < NLIMBS; i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
ptx::addc(0, 0);
|
||||
@@ -267,71 +277,35 @@ public:
|
||||
return SUBTRACT ? ptx::subc(0, 0) : ptx::addc(0, 0);
|
||||
}
|
||||
|
||||
// add or subtract limbs
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_limbs_device(const ff_storage& xs, const ff_storage& ys, ff_storage& rs)
|
||||
add_sub_limbs_device(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
return add_sub_u32_device<SUBTRACT, CARRY_OUT>(x, y, r, TLC);
|
||||
return add_sub_u32_device<NLIMBS, SUBTRACT, CARRY_OUT>(x, y, r);
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr DEVICE_INLINE uint32_t
|
||||
add_sub_limbs_device(const ff_wide_storage& xs, const ff_wide_storage& ys, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
return add_sub_u32_device<SUBTRACT, CARRY_OUT>(x, y, r, 2 * TLC);
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t add_sub_limbs_host(const ff_storage& xs, const ff_storage& ys, ff_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
host_math::carry_chain<TLC, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < TLC; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t
|
||||
add_sub_limbs_host(const ff_wide_storage& xs, const ff_wide_storage& ys, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
host_math::carry_chain<2 * TLC, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < 2 * TLC; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <bool CARRY_OUT, typename T>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t add_limbs(const T& xs, const T& ys, T& rs)
|
||||
template <unsigned NLIMBS, bool CARRY_OUT>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t
|
||||
add_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return add_sub_limbs_device<false, CARRY_OUT>(xs, ys, rs);
|
||||
return add_sub_limbs_device<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
|
||||
#else
|
||||
return add_sub_limbs_host<false, CARRY_OUT>(xs, ys, rs);
|
||||
return host_math::template add_sub_limbs<NLIMBS, false, CARRY_OUT>(xs, ys, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <bool CARRY_OUT, typename T>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t sub_limbs(const T& xs, const T& ys, T& rs)
|
||||
template <unsigned NLIMBS, bool CARRY_OUT>
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t
|
||||
sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return add_sub_limbs_device<true, CARRY_OUT>(xs, ys, rs);
|
||||
return add_sub_limbs_device<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
|
||||
#else
|
||||
return add_sub_limbs_host<true, CARRY_OUT>(xs, ys, rs);
|
||||
return host_math::template add_sub_limbs<NLIMBS, true, CARRY_OUT>(xs, ys, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -531,7 +505,7 @@ public:
|
||||
// are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
|
||||
// less efficient SASS generated by nvcc, so this case needed separate handling.
|
||||
if (b[0] == UINT32_MAX) {
|
||||
add_sub_u32_device<true, false>(c, a, even, TLC);
|
||||
add_sub_u32_device<TLC, true, false>(c, a, even);
|
||||
for (i = 0; i < TLC - 1; i++)
|
||||
odd[i] = a[i];
|
||||
} else {
|
||||
@@ -639,17 +613,18 @@ public:
|
||||
__align__(16) uint32_t diffs[TLC];
|
||||
// Differences of halves \f$ a_{hi} - a_{lo}; b_{lo} - b_{hi} \$f are written into `diffs`, signs written to
|
||||
// `carry1` and `carry2`.
|
||||
uint32_t carry1 = add_sub_u32_device<true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
uint32_t carry1 = add_sub_u32_device<(TLC >> 1), true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<(TLC >> 1), true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
// Compute the "middle part" of Karatsuba: \f$ a_{lo} \cdot b_{hi} + b_{lo} \cdot a_{hi} \f$.
|
||||
// This is where the assumption about unset high bit of `a` and `b` is relevant.
|
||||
multiply_and_add_short_raw_device(diffs, &diffs[TLC >> 1], middle_part, r, &r[TLC]);
|
||||
// Corrections that need to be performed when differences are negative.
|
||||
// Again, carry doesn't need to be propagated due to unset high bits of `a` and `b`.
|
||||
if (carry1) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
if (carry1)
|
||||
add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<(TLC >> 1), true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
// Now that middle part is fully correct, it can be added to the result.
|
||||
add_sub_u32_device<false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1], TLC);
|
||||
add_sub_u32_device<TLC, false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1]);
|
||||
|
||||
// Carry from adding middle part has to be propagated to the highest limb.
|
||||
for (size_t i = TLC + (TLC >> 1); i < 2 * TLC; i++)
|
||||
@@ -673,25 +648,12 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
static HOST_INLINE void multiply_raw_host(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
for (unsigned i = 0; i < TLC; i++) {
|
||||
uint32_t carry = 0;
|
||||
for (unsigned j = 0; j < TLC; j++)
|
||||
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
|
||||
r[TLC + i] = carry;
|
||||
}
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE void multiply_raw(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
return host_math::template multiply_raw<TLC>(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -702,9 +664,9 @@ public:
|
||||
return multiply_and_add_lsb_neg_modulus_raw_device(as, cs, rs);
|
||||
#else
|
||||
Wide r_wide = {};
|
||||
multiply_raw_host(as, get_neg_modulus(), r_wide.limbs_storage);
|
||||
host_math::template multiply_raw<TLC>(as, get_neg_modulus(), r_wide.limbs_storage);
|
||||
Field r = Wide::get_lower(r_wide);
|
||||
add_limbs<false>(cs, r.limbs_storage, rs);
|
||||
add_limbs<TLC, false>(cs, r.limbs_storage, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -713,7 +675,7 @@ public:
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_msb_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
return host_math::template multiply_raw<TLC>(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -759,7 +721,7 @@ public:
|
||||
if (REDUCTION_SIZE == 0) return xs;
|
||||
const ff_storage modulus = get_modulus<REDUCTION_SIZE>();
|
||||
Field rs = {};
|
||||
return sub_limbs<true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
return sub_limbs<TLC, true>(xs.limbs_storage, modulus, rs.limbs_storage) ? xs : rs;
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const Field& xs)
|
||||
@@ -778,17 +740,17 @@ public:
|
||||
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys)
|
||||
{
|
||||
Field rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
add_limbs<TLC, false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return sub_modulus<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys)
|
||||
{
|
||||
Field rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0) return rs;
|
||||
const ff_storage modulus = get_modulus<1>();
|
||||
add_limbs<false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
add_limbs<TLC, false>(rs.limbs_storage, modulus, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
@@ -838,15 +800,23 @@ public:
|
||||
uint32_t carry;
|
||||
// As mentioned, either 2 or 1 reduction can be performed depending on the field in question.
|
||||
if (num_of_reductions() == 2) {
|
||||
carry = sub_limbs<true>(r.limbs_storage, get_modulus<2>(), r_reduced);
|
||||
carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<2>(), r_reduced);
|
||||
if (carry == 0) r = Field{r_reduced};
|
||||
}
|
||||
carry = sub_limbs<true>(r.limbs_storage, get_modulus<1>(), r_reduced);
|
||||
carry = sub_limbs<TLC, true>(r.limbs_storage, get_modulus<1>(), r_reduced);
|
||||
if (carry == 0) r = Field{r_reduced};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE Field& operator=(Field const& other)
|
||||
{
|
||||
for (int i = 0; i < TLC; i++) {
|
||||
this->limbs_storage.limbs[i] = other.limbs_storage.limbs[i];
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys)
|
||||
{
|
||||
Wide xy = mul_wide(xs, ys); // full mult
|
||||
@@ -933,7 +903,7 @@ public:
|
||||
{
|
||||
const ff_storage modulus = get_modulus<MODULUS_MULTIPLE>();
|
||||
Field rs = {};
|
||||
sub_limbs<false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
sub_limbs<TLC, false>(modulus, xs.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
@@ -963,7 +933,7 @@ public:
|
||||
static constexpr HOST_DEVICE_INLINE bool lt(const Field& xs, const Field& ys)
|
||||
{
|
||||
ff_storage dummy = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, dummy);
|
||||
uint32_t carry = sub_limbs<TLC, true>(xs.limbs_storage, ys.limbs_storage, dummy);
|
||||
return carry;
|
||||
}
|
||||
|
||||
@@ -983,12 +953,12 @@ public:
|
||||
while (!(u == one) && !(v == one)) {
|
||||
while (is_even(u)) {
|
||||
u = div2(u);
|
||||
if (is_odd(b)) add_limbs<false>(b.limbs_storage, modulus, b.limbs_storage);
|
||||
if (is_odd(b)) add_limbs<TLC, false>(b.limbs_storage, modulus, b.limbs_storage);
|
||||
b = div2(b);
|
||||
}
|
||||
while (is_even(v)) {
|
||||
v = div2(v);
|
||||
if (is_odd(c)) add_limbs<false>(c.limbs_storage, modulus, c.limbs_storage);
|
||||
if (is_odd(c)) add_limbs<TLC, false>(c.limbs_storage, modulus, c.limbs_storage);
|
||||
c = div2(c);
|
||||
}
|
||||
if (lt(v, u)) {
|
||||
|
||||
@@ -5,25 +5,29 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "storage.cuh"
|
||||
|
||||
namespace host_math {
|
||||
|
||||
// return x + y with uint32_t operands
|
||||
static __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
static constexpr __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
|
||||
// return x + y + carry with uint32_t operands
|
||||
static __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry) { return x + y + carry; }
|
||||
static constexpr __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry)
|
||||
{
|
||||
return x + y + carry;
|
||||
}
|
||||
|
||||
// return x + y and carry out with uint32_t operands
|
||||
static __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x + y;
|
||||
uint32_t result = x + y;
|
||||
carry = x > result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x + y + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
const uint32_t result = x + y + carry;
|
||||
carry = carry && x >= result || !carry && x > result;
|
||||
@@ -31,22 +35,24 @@ namespace host_math {
|
||||
}
|
||||
|
||||
// return x - y with uint32_t operands
|
||||
static __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
static constexpr __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow) { return x - y - borrow; }
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static constexpr __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x - y;
|
||||
return x - y - borrow;
|
||||
}
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static constexpr __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
uint32_t result = x - y;
|
||||
borrow = x < result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static constexpr __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
const uint32_t result = x - y - borrow;
|
||||
borrow = borrow && x <= result || !borrow && x < result;
|
||||
@@ -54,12 +60,11 @@ namespace host_math {
|
||||
}
|
||||
|
||||
// return x * y + z + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
|
||||
static constexpr __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
uint64_t r = static_cast<uint64_t>(x) * y + z + carry;
|
||||
carry = (uint32_t)(r >> 32);
|
||||
result = r & 0xffffffff;
|
||||
uint32_t result = r & 0xffffffff;
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -69,7 +74,7 @@ namespace host_math {
|
||||
|
||||
constexpr HOST_INLINE carry_chain() : index(0) {}
|
||||
|
||||
HOST_INLINE uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
constexpr HOST_INLINE uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
@@ -82,7 +87,7 @@ namespace host_math {
|
||||
return host_math::addc(x, y, carry);
|
||||
}
|
||||
|
||||
HOST_INLINE uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
constexpr HOST_INLINE uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
@@ -95,6 +100,89 @@ namespace host_math {
|
||||
return host_math::subc(x, y, carry);
|
||||
}
|
||||
};
|
||||
|
||||
template <unsigned NLIMBS_A, unsigned NLIMBS_B = NLIMBS_A>
|
||||
static constexpr HOST_INLINE void
|
||||
multiply_raw(const storage<NLIMBS_A>& as, const storage<NLIMBS_B>& bs, storage<NLIMBS_A + NLIMBS_B>& rs)
|
||||
{
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
for (unsigned i = 0; i < NLIMBS_B; i++) {
|
||||
uint32_t carry = 0;
|
||||
for (unsigned j = 0; j < NLIMBS_A; j++)
|
||||
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
|
||||
r[NLIMBS_A + i] = carry;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, bool SUBTRACT, bool CARRY_OUT>
|
||||
static constexpr HOST_INLINE uint32_t
|
||||
add_sub_limbs(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys, storage<NLIMBS>& rs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs;
|
||||
const uint32_t* y = ys.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
uint32_t carry = 0;
|
||||
carry_chain<NLIMBS, false, CARRY_OUT> chain;
|
||||
for (unsigned i = 0; i < NLIMBS; i++)
|
||||
r[i] = SUBTRACT ? chain.sub(x[i], y[i], carry) : chain.add(x[i], y[i], carry);
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned BITS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> left_shift(const storage<NLIMBS>& xs)
|
||||
{
|
||||
if constexpr (BITS == 0)
|
||||
return xs;
|
||||
else {
|
||||
constexpr unsigned BITS32 = BITS % 32;
|
||||
constexpr unsigned LIMBS_GAP = BITS / 32;
|
||||
storage<NLIMBS> out{};
|
||||
if constexpr (LIMBS_GAP < NLIMBS) {
|
||||
out.limbs[LIMBS_GAP] = xs.limbs[0] << BITS32;
|
||||
for (unsigned i = 1; i < NLIMBS - LIMBS_GAP; i++)
|
||||
out.limbs[i + LIMBS_GAP] = (xs.limbs[i] << BITS32) + (xs.limbs[i - 1] >> (32 - BITS32));
|
||||
}
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned BITS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> right_shift(const storage<NLIMBS>& xs)
|
||||
{
|
||||
if constexpr (BITS == 0)
|
||||
return xs;
|
||||
else {
|
||||
constexpr unsigned BITS32 = BITS % 32;
|
||||
constexpr unsigned LIMBS_GAP = BITS / 32;
|
||||
storage<NLIMBS> out{};
|
||||
if constexpr (LIMBS_GAP < NLIMBS - 1) {
|
||||
for (unsigned i = 0; i < NLIMBS - LIMBS_GAP - 1; i++)
|
||||
out.limbs[i] = (xs.limbs[i + LIMBS_GAP] >> BITS32) + (xs.limbs[i + LIMBS_GAP + 1] << (32 - BITS32));
|
||||
}
|
||||
if constexpr (LIMBS_GAP < NLIMBS) out.limbs[NLIMBS - LIMBS_GAP - 1] = (xs.limbs[NLIMBS - 1] >> BITS32);
|
||||
return out;
|
||||
}
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS_NUM, unsigned NLIMBS_DENOM, unsigned NLIMBS_Q = (NLIMBS_NUM - NLIMBS_DENOM)>
|
||||
static constexpr HOST_INLINE void integer_division(
|
||||
const storage<NLIMBS_NUM>& num, const storage<NLIMBS_DENOM>& denom, storage<NLIMBS_Q>& q, storage<NLIMBS_DENOM>& r)
|
||||
{
|
||||
storage<NLIMBS_DENOM> temp = {};
|
||||
for (int limb_idx = NLIMBS_NUM - 1; limb_idx >= 0; limb_idx--) {
|
||||
for (int bit_idx = 31; bit_idx >= 0; bit_idx--) {
|
||||
r = left_shift<NLIMBS_DENOM, 1>(r);
|
||||
r.limbs[0] |= ((num.limbs[limb_idx] >> bit_idx) & 1);
|
||||
uint32_t c = add_sub_limbs<NLIMBS_DENOM, true, true>(r, denom, temp);
|
||||
if (limb_idx < NLIMBS_Q & !c) {
|
||||
r = temp;
|
||||
q.limbs[limb_idx] |= 1 << bit_idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace host_math
|
||||
|
||||
#endif
|
||||
#endif
|
||||
129
icicle/include/fields/params_gen.cuh
Normal file
129
icicle/include/fields/params_gen.cuh
Normal file
@@ -0,0 +1,129 @@
|
||||
#pragma once
|
||||
#ifndef PARAMS_GEN_H
|
||||
#define PARAMS_GEN_H
|
||||
|
||||
#include "storage.cuh"
|
||||
#include "host_math.cuh"
|
||||
|
||||
namespace params_gen {
|
||||
template <unsigned NLIMBS, unsigned BIT_SHIFT>
|
||||
static constexpr HOST_INLINE storage<2 * NLIMBS> get_square(const storage<NLIMBS>& xs)
|
||||
{
|
||||
storage<2 * NLIMBS> rs = {};
|
||||
host_math::template multiply_raw<NLIMBS>(xs, xs, rs);
|
||||
return host_math::template left_shift<2 * NLIMBS, BIT_SHIFT>(rs);
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS>
|
||||
static constexpr HOST_INLINE storage<NLIMBS>
|
||||
get_difference_no_carry(const storage<NLIMBS>& xs, const storage<NLIMBS>& ys)
|
||||
{
|
||||
storage<NLIMBS> rs = {};
|
||||
host_math::template add_sub_limbs<NLIMBS, true, false>(xs, ys, rs);
|
||||
return rs;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned EXP>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> get_m(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage<NLIMBS> rs = {};
|
||||
storage<NLIMBS> qs = {};
|
||||
storage<2 * NLIMBS> wide_one = {1};
|
||||
storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, EXP>(wide_one);
|
||||
host_math::template integer_division<2 * NLIMBS, NLIMBS>(pow_of_2, modulus, qs, rs);
|
||||
return qs;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, bool INV>
|
||||
static constexpr HOST_INLINE storage<NLIMBS> get_montgomery_constant(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage<NLIMBS> rs = {1};
|
||||
for (int i = 0; i < 32 * NLIMBS; i++) {
|
||||
if (INV) {
|
||||
if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
|
||||
rs = host_math::template right_shift<NLIMBS, 1>(rs);
|
||||
} else {
|
||||
rs = host_math::template left_shift<NLIMBS, 1>(rs);
|
||||
storage<NLIMBS> temp = {};
|
||||
rs = host_math::template add_sub_limbs<NLIMBS, true, true>(rs, modulus, temp) ? rs : temp;
|
||||
}
|
||||
}
|
||||
return rs;
|
||||
}
|
||||
|
||||
constexpr unsigned floorlog2(uint32_t x) { return x == 1 ? 0 : 1 + floorlog2(x >> 1); }
|
||||
|
||||
template <unsigned NLIMBS, unsigned NBITS>
|
||||
constexpr unsigned num_of_reductions(const storage<NLIMBS>& modulus, const storage<NLIMBS>& m)
|
||||
{
|
||||
storage<2 * NLIMBS> x1 = {};
|
||||
storage<3 * NLIMBS> x2 = {};
|
||||
storage<3 * NLIMBS> x3 = {};
|
||||
host_math::template multiply_raw<NLIMBS>(modulus, m, x1);
|
||||
host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, x1, x2);
|
||||
storage<2 * NLIMBS> one = {1};
|
||||
storage<2 * NLIMBS> pow_of_2 = host_math::template left_shift<2 * NLIMBS, NBITS>(one);
|
||||
host_math::template multiply_raw<NLIMBS, 2 * NLIMBS>(modulus, pow_of_2, x3);
|
||||
host_math::template add_sub_limbs<3 * NLIMBS, true, false>(x3, x2, x2);
|
||||
double err = (double)x2.limbs[2 * NLIMBS - 1] / pow_of_2.limbs[2 * NLIMBS - 1];
|
||||
err += (double)m.limbs[NLIMBS - 1] / 0xffffffff;
|
||||
err += (double)NLIMBS / 0x80000000;
|
||||
return unsigned(err) + 1;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS>
|
||||
constexpr unsigned two_adicity(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
unsigned two_adicity = 1;
|
||||
storage<NLIMBS> temp = host_math::template right_shift<NLIMBS, 1>(modulus);
|
||||
while (!(temp.limbs[0] & 1)) {
|
||||
temp = host_math::template right_shift<NLIMBS, 1>(temp);
|
||||
two_adicity++;
|
||||
}
|
||||
return two_adicity;
|
||||
}
|
||||
|
||||
template <unsigned NLIMBS, unsigned TWO_ADICITY>
|
||||
constexpr storage_array<TWO_ADICITY, NLIMBS> get_invs(const storage<NLIMBS>& modulus)
|
||||
{
|
||||
storage_array<TWO_ADICITY, NLIMBS> invs = {};
|
||||
storage<NLIMBS> rs = {1};
|
||||
for (int i = 0; i < TWO_ADICITY; i++) {
|
||||
if (rs.limbs[0] & 1) host_math::template add_sub_limbs<NLIMBS, false, false>(rs, modulus, rs);
|
||||
rs = host_math::template right_shift<NLIMBS, 1>(rs);
|
||||
invs.storages[i] = rs;
|
||||
}
|
||||
return invs;
|
||||
}
|
||||
} // namespace params_gen
|
||||
|
||||
#define PARAMS(modulus) \
|
||||
static constexpr unsigned limbs_count = modulus.LC; \
|
||||
static constexpr unsigned modulus_bit_count = \
|
||||
32 * (limbs_count - 1) + params_gen::floorlog2(modulus.limbs[limbs_count - 1]) + 1; \
|
||||
static constexpr storage<limbs_count> zero = {}; \
|
||||
static constexpr storage<limbs_count> one = {1}; \
|
||||
static constexpr storage<limbs_count> modulus_2 = host_math::template left_shift<limbs_count, 1>(modulus); \
|
||||
static constexpr storage<limbs_count> modulus_4 = host_math::template left_shift<limbs_count, 1>(modulus_2); \
|
||||
static constexpr storage<limbs_count> neg_modulus = \
|
||||
params_gen::template get_difference_no_carry<limbs_count>(zero, modulus); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = \
|
||||
params_gen::template get_square<limbs_count, 0>(modulus); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = \
|
||||
host_math::template left_shift<2 * limbs_count, 1>(modulus_squared); \
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = \
|
||||
host_math::template left_shift<2 * limbs_count, 1>(modulus_squared_2); \
|
||||
static constexpr storage<limbs_count> m = params_gen::template get_m<limbs_count, 2 * modulus_bit_count>(modulus); \
|
||||
static constexpr storage<limbs_count> montgomery_r = \
|
||||
params_gen::template get_montgomery_constant<limbs_count, false>(modulus); \
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = \
|
||||
params_gen::template get_montgomery_constant<limbs_count, true>(modulus); \
|
||||
static constexpr unsigned num_of_reductions = \
|
||||
params_gen::template num_of_reductions<limbs_count, 2 * modulus_bit_count>(modulus, m);
|
||||
|
||||
#define TWIDDLES(modulus, rou) \
|
||||
static constexpr unsigned omegas_count = params_gen::template two_adicity<limbs_count>(modulus); \
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = \
|
||||
params_gen::template get_invs<limbs_count, omegas_count>(modulus);
|
||||
|
||||
#endif
|
||||
@@ -3,337 +3,17 @@
|
||||
#define BLS12_377_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned omegas_count = 48;
|
||||
static constexpr unsigned modulus_bit_count = 377;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44,
|
||||
0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88,
|
||||
0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6,
|
||||
0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510,
|
||||
0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc,
|
||||
0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x7af73fff, 0xcfffffff, 0xe8f4a2bb,
|
||||
0x45f6b7ff, 0xe10c9dd0, 0xff0aec70, 0xe5dd260c,
|
||||
0x935eb6c4, 0x39c4fa3f, 0xe83aef15, 0xfe51c5b9};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2,
|
||||
0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af,
|
||||
0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4,
|
||||
0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f,
|
||||
0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48,
|
||||
0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be,
|
||||
0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488,
|
||||
0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffff68, 0x02cdffff, 0x7fffffb1, 0x51409f83,
|
||||
0x8a7d3ff2, 0x9f7db3a9, 0x6e7c6305, 0x7b4e97b7,
|
||||
0x803c84e8, 0x4cf495bf, 0xe2fdf49a, 0x008d6661};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x451269e8, 0xef129093, 0xe65839f5, 0x6e20bbcd,
|
||||
0xa5582c93, 0x852e3c88, 0xf7f2e657, 0xeeaaf41d,
|
||||
0xa4c49351, 0xeb89746c, 0x436b0736, 0x014212fc};
|
||||
static constexpr storage<12> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f,
|
||||
0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
|
||||
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
|
||||
{0xf1391c63, 0x6e76d5ec, 0xbff27d8e, 0x99588459, 0x436b0f62, 0xbce649cf, 0x0ad1dec1, 0x400398f5, 0x1a79beb1,
|
||||
0xc0c534db, 0x796537ca, 0x01680a40},
|
||||
{0x554c85ba, 0x6cbff0e3, 0x0be8ff9d, 0xc07c7a91, 0x9dde4fa2, 0xc3c79f67, 0xb5726bde, 0x44bc6d1a, 0x76d6d607,
|
||||
0xad812919, 0x95e8fd0e, 0x001bc0c2},
|
||||
{0x6d5db237, 0xb8c206b0, 0xcabde6ba, 0x08fed85d, 0xcd92eb6f, 0xf2f54ffc, 0xe39c1788, 0xee81121f, 0x88e82edb,
|
||||
0x852def4d, 0xb95fdb80, 0x00bf1268},
|
||||
{0x192bf14f, 0x3663c26a, 0xe6351854, 0x99c859be, 0x159361b8, 0xf9430828, 0xfbe33d7d, 0x478ed715, 0xdb79c984,
|
||||
0x41e220cf, 0xd961f2be, 0x00cedb38},
|
||||
{0xcc724685, 0xb99caa69, 0x1388a46d, 0xc24087ba, 0x08f03491, 0xeb13a05a, 0x98fb0ff7, 0x558ab21e, 0x86bbd802,
|
||||
0x0166d08d, 0xf5b5728a, 0x00d1dec9},
|
||||
{0x92db32a2, 0x2e3951fe, 0x6014b201, 0x8f5a16c9, 0xa91fbb38, 0xa9e942b9, 0x17b4dbd2, 0xf7bf5b43, 0x81325c7d,
|
||||
0x57f3934a, 0x615ad019, 0x012be78e},
|
||||
{0xdce33f04, 0xb42b84a2, 0x0db0b91c, 0x7a0c1423, 0x88d9f8c8, 0xaed11a0c, 0xd484c501, 0x712d6bc0, 0xfa3f7633,
|
||||
0x50aca1e5, 0xb90f34d0, 0x01002f29},
|
||||
{0xf012f6a0, 0xbc3db054, 0x0d332ea7, 0x00d66897, 0xfd416167, 0x8278ef44, 0x20268e84, 0x1a1a3c4d, 0x4b11d215,
|
||||
0x7c976aa6, 0x63b6e925, 0x00949581},
|
||||
{0x339637c6, 0x9d73cf29, 0xa5642677, 0x8257d1a2, 0xcafd597c, 0xcb48f07f, 0x081435a3, 0x7a505010, 0xacbb9c39,
|
||||
0xaaa45ce1, 0x7431b9c8, 0x013f2b13},
|
||||
{0xd4710c0b, 0x9ef8bddb, 0x85047671, 0xb4c73188, 0x134695ba, 0x87a51d65, 0x022416dd, 0x67f3bc43, 0xcb2a157b,
|
||||
0x21d965b2, 0x5ce4195d, 0x013a57e4},
|
||||
{0xd2461368, 0xf2db3a9f, 0x3802aef2, 0x0595c232, 0x5ea85bd6, 0xa53d621a, 0xa34ee943, 0xce930fbc, 0x6b372bee,
|
||||
0x1d216665, 0xa4535740, 0x009f0159},
|
||||
{0x656bf68d, 0x73cf953a, 0xeac5c1d7, 0x50a5a5b5, 0xaa5355a9, 0x2697b2e1, 0x08de37d2, 0x6be70306, 0x44c5afab,
|
||||
0x907f6976, 0xd4ec46b1, 0x0155cfa2},
|
||||
{0x090e3e20, 0x034160c4, 0xf77a6fbb, 0xbc73cc59, 0x188e54f6, 0x437cd23b, 0x17e42614, 0x5a788edd, 0xebdc8eae,
|
||||
0xf1ad4f54, 0x2f129bcd, 0x005d1440},
|
||||
{0x4e269ee5, 0x5626c031, 0x0d1501ec, 0x5f97673e, 0x86d31c18, 0x4fe089bd, 0x62d1259a, 0x3e9fffcb, 0x1ff89d01,
|
||||
0xe1898f32, 0x59d01a38, 0x00fa1331},
|
||||
{0x38d427b1, 0xda80661b, 0xa814f14b, 0x1913027d, 0xcda4061d, 0xd3f61e24, 0x5da8fcb2, 0x9509e69d, 0x1f05e6d3,
|
||||
0x0e7493a5, 0xa5c6bd06, 0x00dcb8db},
|
||||
{0x61cff9ed, 0x88499d0a, 0x53718444, 0x0b317da2, 0x4b7eec5f, 0xc1624bfd, 0x5af10e6f, 0x6ffc3241, 0xd6c66ff2,
|
||||
0x27d0edf3, 0x73ab0f4a, 0x013019b5},
|
||||
{0x06027b24, 0x42dc7673, 0x3341b9e7, 0x018f8bbd, 0xa435f7e2, 0xd3b389d9, 0xea031176, 0x279739a5, 0x74c35801,
|
||||
0x3555ca51, 0x049dcf87, 0x00748c30},
|
||||
{0x81fe14de, 0x731b16f0, 0x333cc61a, 0x528d6ada, 0x5736dc15, 0x7ae87278, 0xc8bfd40c, 0xa94b9fd2, 0x299b0487,
|
||||
0x714dd8ed, 0xf1a53233, 0x00642b62},
|
||||
{0x5bc45170, 0x31270ddf, 0x7f72c758, 0x7efb6b06, 0xcf4973a8, 0x2eb9f2aa, 0xe556d234, 0xdcb534c9, 0x0e043fef,
|
||||
0xf0b1a210, 0x54dda04e, 0x00e79c44},
|
||||
{0x2d5f1bc2, 0x213b3f52, 0xfd933428, 0x9e115ba7, 0x434c9e2a, 0x7f77d57e, 0xcdb944ef, 0x47a78418, 0x699aa559,
|
||||
0x8cb01cbb, 0xb064c4d7, 0x0075bf81},
|
||||
{0x3fbfc66c, 0x0b6c2e65, 0x6fcab2f8, 0x7bece031, 0xb79dcd4d, 0x2ba7e325, 0xa5c6881b, 0x8c18f66a, 0x7283805a,
|
||||
0x4d893e5a, 0xfc296bfe, 0x0107d3c5},
|
||||
{0x948c881a, 0x53fbdbb4, 0x16803d18, 0xf27a9c14, 0xeddfafef, 0x8490f6c5, 0x3e57fa15, 0xfe068e1d, 0xd26b296b,
|
||||
0xbe923119, 0x9fa377a1, 0x00d56016},
|
||||
{0x6f5b2ad1, 0xb3bbaeb3, 0x11886a1c, 0x0efd4ba9, 0xdedb7083, 0x5911498f, 0x5bd0a90f, 0x0921fe19, 0x83d379cb,
|
||||
0x38e05d4e, 0xb7ba3c73, 0x006b39e2},
|
||||
{0xa55550ba, 0x61b560e4, 0xe7288461, 0xd9ac545b, 0xc6e3e282, 0xde8d2826, 0x7e49dd2c, 0x9e87a310, 0xc43080b7,
|
||||
0xf2edfc44, 0x95b7d300, 0x012b4875},
|
||||
{0x27591e60, 0x4048ddc3, 0xc5d21791, 0xb77c9738, 0x49826bea, 0xf2f82033, 0x42f97e95, 0xf60bb703, 0x5966139d,
|
||||
0xef8f6f16, 0xc0e95e39, 0x00327618},
|
||||
{0x441e395f, 0xf9059c8f, 0xbd087238, 0x29eab35f, 0x7dee5ff1, 0x5d4abeff, 0x771e60e9, 0x7222499b, 0x7ac324a2,
|
||||
0xb70c1ea3, 0x0da51ce8, 0x015b3af9},
|
||||
{0xe9a70026, 0xf7aa576b, 0x01c4a126, 0xb28733ef, 0xa3307647, 0x06b8e768, 0xe12588ce, 0x115500e1, 0x6c9f9b1d,
|
||||
0x7e8dd6b9, 0x6ec020b3, 0x014d091e},
|
||||
{0x8e5bbc8d, 0xd318265d, 0x141bee9b, 0x70b460ba, 0x1aa9df5b, 0x145dd6a6, 0xe3478cb3, 0xd9da2548, 0x7b509387,
|
||||
0x47250509, 0xe967973c, 0x00de53d3},
|
||||
{0xd2aa57b8, 0x5ff4399c, 0xa6ae9b07, 0x90360194, 0x6cfcdb7a, 0x68979991, 0x64e56abb, 0xf517467c, 0xad7a6573,
|
||||
0x44227491, 0xa35ebf55, 0x0001da0b},
|
||||
{0x4d80f6da, 0xd8b22d5a, 0x10ee1a06, 0x6e7b2bfb, 0x17faeac0, 0xac8d97e5, 0x7a12c923, 0x8b75540b, 0x5b42ce02,
|
||||
0xa2787368, 0xe98d9998, 0x008d30a5},
|
||||
{0x9dc292bb, 0xee29c02a, 0xc5b7e1c9, 0x9e7ea016, 0x9a908e5f, 0x62daf95d, 0x3e98eae9, 0x80a71c61, 0xfdda3bba,
|
||||
0x2d514723, 0x068ef829, 0x00f65844},
|
||||
{0x185b1ad6, 0xf62fdfa4, 0xf90ccbe6, 0x2ae7f104, 0x972ce78e, 0xfa435fb6, 0x45e59f91, 0x53a75d3c, 0x2f320b7a,
|
||||
0x7290cac2, 0xe7cb5108, 0x01a2022a},
|
||||
{0xd59dda24, 0xcf0a15be, 0xf2ec72b4, 0xbc77f6d4, 0x96c31202, 0xa8df0caf, 0xbb4f8842, 0xb95429c0, 0xd0087306,
|
||||
0xb989b210, 0x5571e9f0, 0x002b1694},
|
||||
{0x67ae536e, 0x7e84d4b5, 0xc8fb9b80, 0x3a920871, 0x1948ee86, 0x1a82df2b, 0xb3c66ed3, 0xdef79467, 0xef64d05a,
|
||||
0x58fd84f2, 0xd999f400, 0x00c6d5b7},
|
||||
{0x81ee0d53, 0x7639f9a2, 0xb5747565, 0x8ade807d, 0xe6235609, 0xfd9d6266, 0x53730f18, 0xea1948a3, 0xd890142e,
|
||||
0xa356108a, 0xe3e8a723, 0x00a48ac6},
|
||||
{0xd0ca5e04, 0x531c4b83, 0x2ba0a328, 0xff35ced6, 0xa4e563aa, 0x01613079, 0x1442dcd1, 0x6f52b3a3, 0x9e19b0a6,
|
||||
0x813b4616, 0x9536db26, 0x004828c5},
|
||||
{0x0bce1b4e, 0x8a9321a9, 0xae85d6ff, 0xb9759dbe, 0x5cb206e0, 0x1ce1d522, 0x35a1607a, 0x87df044f, 0x94e1329a,
|
||||
0x2ebabee7, 0x73586cc9, 0x01a73170},
|
||||
{0x3dd667f3, 0x69824754, 0x28fd63a2, 0x61a081a7, 0x99499385, 0x0b9f6d2e, 0x5c253e16, 0x6d45622b, 0x765a7f5f,
|
||||
0xcd672e4d, 0x7150d847, 0x01182798},
|
||||
{0x2742d2f6, 0x0af0bfd2, 0x3a02631d, 0x93616956, 0xac8a2203, 0x32dae751, 0x85cf4e2d, 0xea4ffbe7, 0x7dba6eb9,
|
||||
0x673424f4, 0x61f4060d, 0x002ec230},
|
||||
{0x5a5b5c2b, 0x226293ca, 0x0684dbc9, 0xbc0ca23e, 0x7d637c4f, 0x4510cf3a, 0x9b2f4a52, 0x7869c488, 0x2fd73a53,
|
||||
0xec009b90, 0xa8c99cca, 0x003499d6},
|
||||
{0xfd745afc, 0x9da60b0a, 0x41c5362e, 0xff0769ec, 0xfa9fd8ee, 0x487621e9, 0xab04558f, 0x138910d1, 0xc1ed03ce,
|
||||
0x870903cf, 0xed3ffb51, 0x002c1cfa},
|
||||
{0x42870c46, 0x271b1ff3, 0x13b4b491, 0x1e0a9cd1, 0x3c55c65e, 0x2d58cb1a, 0x74756f6e, 0xa6e12c32, 0x2e313bc4,
|
||||
0xf774a43d, 0xcc386ffc, 0x00ca156d},
|
||||
{0x4a67741c, 0x588f79b6, 0xc3590b63, 0xc0ae78b5, 0xc3576385, 0xad0bb97d, 0xb8473137, 0x0583dd49, 0x515d8604,
|
||||
0xb31d9631, 0xd3ba3b12, 0x015337bc},
|
||||
{0x8a458e8c, 0x976a14f5, 0xc3a26ae8, 0xc90809b4, 0x089acf15, 0x270a1575, 0x5013d4b1, 0x614a0d25, 0x6d09901e,
|
||||
0x1314e076, 0xf208945e, 0x0022f414},
|
||||
{0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a, 0x4512a3d4, 0x40fbe05b, 0x8aeffc9b,
|
||||
0x30f15248, 0x05198a80, 0x0036a92e}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
|
||||
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
|
||||
{0x0ec6e39e, 0x1691ea13, 0x700d8272, 0x7db2d8ea, 0x769e389d, 0x620d1860, 0xf62334cd, 0xda1f40fd, 0x52278a89,
|
||||
0x0575d0e5, 0x9e5fd920, 0x00463005},
|
||||
{0x93997f11, 0x9403412c, 0xdfb2323f, 0x845557b3, 0x2d50c7fc, 0x66f2eaaa, 0xc103f92f, 0x992358fb, 0x5d7a3179,
|
||||
0x01d60217, 0xd2af5da0, 0x0077b354},
|
||||
{0xc1000ea4, 0x7ac2ca7a, 0x7f8d9495, 0x937db751, 0x0de62931, 0x401b3873, 0x980129ba, 0x59be7529, 0xa545a303,
|
||||
0x2ba8f85d, 0xb6705512, 0x00573e3a},
|
||||
{0x2c1b22e6, 0xb55712f9, 0x0f91cddd, 0x66cfc0f3, 0x8bb345d8, 0x8d5fcd42, 0x86c0abc3, 0x61e4cf98, 0x432fe8f3,
|
||||
0x93556354, 0xad005fb6, 0x00ff87d5},
|
||||
{0x7aba560e, 0x05065a97, 0x7918b9db, 0x333ff005, 0xdf6be708, 0x03938ae1, 0x7410a77b, 0x922d3376, 0x03a15063,
|
||||
0xa5aeaa56, 0x4aea89e5, 0x01542cb6},
|
||||
{0xe4d6a772, 0x61a6a2d6, 0x6e6239a7, 0xc18c9ef7, 0x04cac70f, 0x8772bb3f, 0x16c5916b, 0x8bbb4185, 0x46335dc0,
|
||||
0x4aa656e2, 0x842c1664, 0x008187ac},
|
||||
{0xdd4e93c5, 0xa002ea0a, 0x07458704, 0xb40a45e8, 0xbaa65f2a, 0xee9ee3ea, 0x8f3b8a87, 0xeffa4f9e, 0x95b5feba,
|
||||
0xb6e03897, 0x81751c63, 0x003c41de},
|
||||
{0x13043a4a, 0x50221a3b, 0xda73331a, 0x6537fca8, 0x8e85077c, 0x8b74cef4, 0x0e5bbe67, 0x65705341, 0xefa22d23,
|
||||
0xf0f56caa, 0xd1865d98, 0x001f8eb5},
|
||||
{0x3e26a605, 0xd9af8944, 0x6970166f, 0xad0efb6e, 0x2c7464ec, 0xc16d7972, 0xf788281b, 0xe0de4b04, 0xaa878b0e,
|
||||
0x0c049e55, 0x63e2e7cd, 0x0135383a},
|
||||
{0x6f6893f7, 0x6b12c42e, 0x44bbbf63, 0x831f38c0, 0x191be6c9, 0xa57797d4, 0x447475cb, 0x6af7f695, 0x4b8be189,
|
||||
0x3295e9e7, 0x350d0aad, 0x00a9a32b},
|
||||
{0x7656ef1d, 0xc2243f86, 0xf4211219, 0x3e4c3bc3, 0x3c9a3d21, 0xaa4db6e0, 0xe8a4c946, 0x29ac638a, 0xa4cf856e,
|
||||
0x21449f8b, 0x7d4c9c67, 0x018cf097},
|
||||
{0x6a8e0139, 0x18e472a2, 0xd6b1c835, 0xcc7c80fd, 0x6546fc0a, 0x1f760883, 0x4ea3417c, 0x5bcfc1fb, 0xe9acb8b0,
|
||||
0x52c9a29b, 0xd9f265a2, 0x01a6d8b2},
|
||||
{0xebb83ac0, 0x95eb1dc8, 0x9f390cf2, 0x1e8d70f5, 0xb0d85145, 0xf9e4955d, 0x89720ee1, 0xe9690d30, 0x50fc879f,
|
||||
0x629972a5, 0x69ccd670, 0x00456e23},
|
||||
{0x83f38be4, 0xfbfb11a1, 0x388e6726, 0xb90a19b9, 0xc860d62c, 0x3fc10bc7, 0xc3c4e575, 0xc9fe043e, 0x7396d780,
|
||||
0x67aeff74, 0x01cadaee, 0x019059fa},
|
||||
{0xfd581be8, 0x43506d6e, 0x018b1b76, 0xf09563e6, 0xe87f9d80, 0x5cd193b2, 0x0a933402, 0x18ba3260, 0x50524c77,
|
||||
0x4de839d9, 0xd90315ce, 0x0018c2ed},
|
||||
{0xa737701d, 0xf900eb81, 0x995e6672, 0x6874c90e, 0xa495900b, 0x69ade94a, 0xd07bd4b1, 0xd5f358e7, 0x6f88e8e4,
|
||||
0xbd437e9d, 0x1d6b88cf, 0x0130d706},
|
||||
{0xfc29b95f, 0x064629bd, 0xb546585c, 0x0a897bff, 0x54a80d9a, 0x856c8d4f, 0x944568ff, 0x85410cc4, 0x59fc4370,
|
||||
0xc1978c65, 0xc668dc52, 0x017c86c8},
|
||||
{0xf6109131, 0x65cecd55, 0x7d2f52e5, 0x6d7e892e, 0xb90b2403, 0xe9a09007, 0xae0a060d, 0x92ca9aac, 0xa22b1e96,
|
||||
0x5ce1cc4f, 0x45201e6f, 0x012eb33c},
|
||||
{0x20d1aac5, 0x9d2cb4cf, 0xded22997, 0x3e4a1e77, 0x07fae2e2, 0x09d692f7, 0xd49bdcbe, 0x6a6aa4f8, 0x09c01cab,
|
||||
0xa8e21ead, 0x6b03b72e, 0x01a19e81},
|
||||
{0x935650ca, 0xf3d94623, 0x2ffd937e, 0x4a688a46, 0xa622b139, 0xf55fd53a, 0x7a1a1e40, 0x227406aa, 0x9a3fea60,
|
||||
0x40dd4504, 0x1edbb584, 0x00fc2332},
|
||||
{0xf28db3fc, 0x9707402f, 0xc28593f1, 0x3d898bd7, 0xb30effcd, 0xcaee2dfd, 0x4fb6ec9d, 0xff1b0790, 0x09ed1120,
|
||||
0x9cb0597e, 0xb78d15e9, 0x005c73a5},
|
||||
{0xb0a8a3b9, 0x739a4c2e, 0xc57196ae, 0x083bde21, 0xba602f29, 0x247eb070, 0x1c2c7132, 0x4ba1dd6a, 0xe2187c6c,
|
||||
0x4ce59fb6, 0x606880b1, 0x0014a7b5},
|
||||
{0x484baf56, 0xdd0eccab, 0x4541b101, 0xe6c80eaf, 0xf7964f64, 0x35b8a558, 0xc50ccf94, 0xb3b824d4, 0x21c71aeb,
|
||||
0xe1f6b4c8, 0x23031df0, 0x01a8a647},
|
||||
{0x592a9620, 0x5338dc01, 0xd94a401b, 0xb217f96d, 0xf830b00e, 0xfefb6601, 0xafd3dee4, 0x1ec061b5, 0x05a199bd,
|
||||
0x0d5d4d3c, 0xc8489913, 0x0196c768},
|
||||
{0x1f980ca0, 0x4acb430e, 0x71c6821c, 0x8973a3cc, 0xb3e9aa75, 0x74414c20, 0x0c13f042, 0x79212a5f, 0x375c705b,
|
||||
0x5c44d226, 0x29439af2, 0x000a2fdd},
|
||||
{0xa387b60c, 0xf01901e6, 0x4561ff3d, 0xa7b1b7dc, 0x0558e085, 0x5d82d374, 0xf2bc1d29, 0x519298e5, 0x3d332207,
|
||||
0x0ad719a8, 0xea19a807, 0x0150a138},
|
||||
{0x9deb8e06, 0x7c6b3eb1, 0x28206b6c, 0x3a8f53c4, 0x7fed1065, 0x039f575f, 0x40c1f898, 0x31be74ba, 0x790ac003,
|
||||
0x76db938e, 0x5508c5e4, 0x0096d5e1},
|
||||
{0xb83f8358, 0x3e940e0e, 0x372a4b8b, 0x204d80e0, 0xa820b2ec, 0x956454b2, 0x2cc8078c, 0x8e2cb3d4, 0xc6f81363,
|
||||
0xdd0d3e12, 0x49041a64, 0x0052f327},
|
||||
{0x2aec0be2, 0x37ca2eb7, 0x555cc652, 0x05093570, 0xd2588d31, 0xe62f1adb, 0x798be240, 0x2fd2518e, 0x0ff6b579,
|
||||
0x9302d4e3, 0x6ee95e5d, 0x0025ca57},
|
||||
{0x233eed68, 0xcc664858, 0xece3a327, 0x600ca1ac, 0x93a2e34f, 0x330d1102, 0xdb5e3bb4, 0xc84ab55f, 0xe4d5576e,
|
||||
0x5179c101, 0x0938f714, 0x00efb20e},
|
||||
{0xfdddaf5c, 0x907f96e7, 0x1ffe49da, 0x348dab77, 0xc14ab779, 0x3eca44ad, 0x4cdc5d98, 0xe9b10b2e, 0xa95c5a36,
|
||||
0x65a25d16, 0x6e616518, 0x00c9f759},
|
||||
{0x7a5aff62, 0x9497d331, 0xb57cd01d, 0x21896195, 0x6c7ba745, 0xe09e22f7, 0x5a7acff0, 0xcc9f1064, 0xc93c46b0,
|
||||
0x7b867cdf, 0x23eba5ae, 0x01a05dcb},
|
||||
{0x4dcc71f4, 0xa56a8e33, 0xcbebdba2, 0xc480b083, 0x36ea43af, 0x748448fa, 0xe7859f3c, 0xee9b4b0e, 0x5af41919,
|
||||
0x9ab2bb09, 0x65caa0ea, 0x0127262d},
|
||||
{0x352a05cc, 0x77c7d12f, 0xdc7160c9, 0xb91ca5be, 0x5a3feda0, 0x245106da, 0x7669f7cd, 0xfd45012d, 0xdc5489fa,
|
||||
0xc4774629, 0x2872daa0, 0x00241273},
|
||||
{0x0d3e0b0b, 0x1838ae6f, 0xff67fc2c, 0x7fcc9b21, 0x23956100, 0xaedca59e, 0x1e79aa4b, 0x572ed634, 0xc7f0673c,
|
||||
0xaeeda160, 0xc8047256, 0x00360e2c},
|
||||
{0xe05044f9, 0xec5e4514, 0x7ec9b4ef, 0xe915b7e7, 0x9c4bec48, 0x9fb78cd8, 0xa38d95a3, 0xd7b84113, 0xb86fd119,
|
||||
0x7be64440, 0xe4f9e70a, 0x009e3a60},
|
||||
{0xc7435591, 0xc61cc546, 0xe5e94dc4, 0xea99a96f, 0xdb8ff17d, 0x5b10e2b4, 0x3dd0ff10, 0x13f8fb9d, 0xe118b9e9,
|
||||
0xcbb1c0ce, 0x7ebf8a0d, 0x00b37258},
|
||||
{0xce5943e7, 0xd44fdb9d, 0x79fa927a, 0xcb7d41ea, 0xdcee72ca, 0x9a4bcebf, 0x11634905, 0x2317799d, 0x584055ac,
|
||||
0x3f1c302e, 0xdc2d0017, 0x013ef021},
|
||||
{0xa78a1578, 0x345cb052, 0x5961b8fe, 0x1ed4d48a, 0x74a5e2af, 0x5858e93c, 0x0fd17e9f, 0xaf643f0a, 0x79d94009,
|
||||
0x61530753, 0xde7b2f53, 0x010a3393},
|
||||
{0x813925df, 0x548b1d28, 0xca3e79b6, 0xabab3a4e, 0x7e51071a, 0xb3c9c068, 0x6c5fcedb, 0x8014e879, 0x95d9facc,
|
||||
0x3ba5db77, 0x7f5c3d2f, 0x0105c419},
|
||||
{0x26bc1104, 0xbb9cbd28, 0xe03cc852, 0x27f09abb, 0x22e5be61, 0x02763b4a, 0xb94fa254, 0xa3940542, 0xff34c35f,
|
||||
0xcf058850, 0x1482533c, 0x019f538f},
|
||||
{0xb3f42de9, 0xf2126047, 0xbeb0a1b8, 0xdb0451c4, 0x9aabc291, 0x1a945bc0, 0x7fe3a6f2, 0x13d08312, 0x390e1c07,
|
||||
0xd8fb13f1, 0x6b30562b, 0x005a41c4},
|
||||
{0xe8b3d5dd, 0x1c60fcc5, 0x75b3a464, 0x5d7babba, 0xf3989910, 0x0d9f52c7, 0x9beec571, 0x464a2840, 0x79689d4b,
|
||||
0x139c496f, 0x099e64c4, 0x0022c6a3},
|
||||
{0x023e0cd1, 0x9df6c2d5, 0xa6b747de, 0x8e23def9, 0x90da6876, 0x7bc83eee, 0xc88bb007, 0xdaeac352, 0x68bb6a7f,
|
||||
0x45cabb6f, 0x94697b34, 0x001e7154},
|
||||
{0x0203d905, 0xffcee91d, 0xc99df56d, 0xd878ee01, 0x210d754c, 0xa0e882f9, 0x7d0aec6a, 0x26c96db8, 0x8ff7afe4,
|
||||
0x46e2e145, 0x54749283, 0x015cd1b0}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x42846000, 0x18000000, 0x0b85aea2, 0xdd04a400, 0x8f79b117, 0x807a89c7, 0x8d116cf9, 0x3650a49d,
|
||||
0x631d82e0, 0x0be28875, 0x00d71d23},
|
||||
{0x00000001, 0x63c69000, 0x24000000, 0x114885f3, 0xcb86f600, 0x573689a3, 0x40b7ceab, 0x539a2376, 0x5178f6ec,
|
||||
0x14ac4450, 0x91d3ccb0, 0x0142abb4},
|
||||
{0x00000001, 0x7467a800, 0xaa000000, 0x1429f19b, 0xc2c81f00, 0x3b14f5e9, 0xa0d6711d, 0xb6de7eb4, 0x5f0d2013,
|
||||
0x6d73a508, 0x54cc6ecd, 0x017872fd},
|
||||
{0x00000001, 0x7cb83400, 0xed000000, 0x159aa76f, 0xbe68b380, 0x2d042c0c, 0xd0e5c256, 0x6880ac53, 0x65d734a7,
|
||||
0x19d75564, 0xb648bfdc, 0x019356a1},
|
||||
{0x00000001, 0x80e07a00, 0x0e800000, 0x1653025a, 0x3c38fdc0, 0xa5fbc71e, 0x68ed6af2, 0x4151c323, 0x693c3ef1,
|
||||
0x70092d92, 0xe706e863, 0x01a0c873},
|
||||
{0x00000001, 0x82f49d00, 0x1f400000, 0x16af2fcf, 0xfb2122e0, 0xe27794a6, 0x34f13f40, 0x2dba4e8b, 0x6aeec416,
|
||||
0x1b2219a9, 0xff65fca7, 0x01a7815c},
|
||||
{0x00000001, 0x83feae80, 0xa7a00000, 0x16dd4689, 0x5a953570, 0x00b57b6b, 0x1af32968, 0xa3ee943f, 0xebc806a8,
|
||||
0xf0ae8fb4, 0x8b9586c8, 0x01aaddd1},
|
||||
{0x00000001, 0x8483b740, 0xebd00000, 0x16f451e6, 0x8a4f3eb8, 0x8fd46ecd, 0x0df41e7b, 0xdf08b719, 0xac34a7f1,
|
||||
0xdb74caba, 0xd1ad4bd9, 0x01ac8c0b},
|
||||
{0x00000001, 0x84c63ba0, 0x8de80000, 0x16ffd795, 0xa22c435c, 0x5763e87e, 0x07749905, 0x7c95c886, 0x8c6af896,
|
||||
0x50d7e83d, 0xf4b92e62, 0x01ad6328},
|
||||
{0x00000001, 0x84e77dd0, 0xdef40000, 0x17059a6c, 0x2e1ac5ae, 0x3b2ba557, 0x8434d64a, 0xcb5c513c, 0xfc8620e8,
|
||||
0x8b8976fe, 0x863f1fa6, 0x01adceb7},
|
||||
{0x00000001, 0x84f81ee8, 0x877a0000, 0x17087bd8, 0x741206d7, 0xad0f83c3, 0xc294f4ec, 0xf2bf9597, 0xb493b511,
|
||||
0xa8e23e5f, 0xcf021848, 0x01ae047e},
|
||||
{0x00000001, 0x85006f74, 0x5bbd0000, 0x9709ec8e, 0x970da76b, 0xe60172f9, 0x61c5043d, 0x867137c5, 0x109a7f26,
|
||||
0xb78ea210, 0x73639499, 0x01ae1f62},
|
||||
{0x00000001, 0x850497ba, 0x45de8000, 0xd70aa4e9, 0xa88b77b5, 0x827a6a94, 0x315d0be6, 0xd04a08dc, 0x3e9de430,
|
||||
0x3ee4d3e8, 0x459452c2, 0x01ae2cd4},
|
||||
{0x00000001, 0x8506abdd, 0xbaef4000, 0xf70b0116, 0x314a5fda, 0xd0b6e662, 0x99290fba, 0xf5367167, 0x559f96b5,
|
||||
0x828fecd4, 0x2eacb1d6, 0x01ae338d},
|
||||
{0x80000001, 0x8507b5ee, 0x7577a000, 0x870b2f2d, 0xf5a9d3ed, 0xf7d52448, 0x4d0f11a4, 0x87aca5ad, 0x61206ff8,
|
||||
0xa465794a, 0xa338e160, 0x01ae36e9},
|
||||
{0x40000001, 0x85083af7, 0xd2bbd000, 0xcf0b4638, 0x57d98df6, 0x0b64433c, 0x2702129a, 0xd0e7bfd0, 0x66e0dc99,
|
||||
0xb5503f85, 0xdd7ef925, 0x01ae3897},
|
||||
{0xa0000001, 0x85087d7b, 0x815de800, 0x730b51be, 0x08f16afb, 0x952bd2b6, 0x93fb9314, 0x75854ce1, 0xe9c112ea,
|
||||
0x3dc5a2a2, 0xfaa20508, 0x01ae396e},
|
||||
{0xd0000001, 0x85089ebd, 0x58aef400, 0xc50b5781, 0xe17d597d, 0xda0f9a72, 0x4a785351, 0xc7d4136a, 0xab312e12,
|
||||
0x82005431, 0x89338af9, 0x01ae39da},
|
||||
{0xe8000001, 0x8508af5e, 0xc4577a00, 0xee0b5a62, 0x4dc350be, 0x7c817e51, 0xa5b6b370, 0xf0fb76ae, 0x0be93ba6,
|
||||
0x241dacf9, 0x507c4df2, 0x01ae3a10},
|
||||
{0x74000001, 0x8508b7af, 0x7a2bbd00, 0x828b5bd3, 0x83e64c5f, 0xcdba7040, 0xd355e37f, 0x058f2850, 0xbc454271,
|
||||
0x752c595c, 0x3420af6e, 0x01ae3a2b},
|
||||
{0xba000001, 0x8508bbd7, 0xd515de80, 0xcccb5c8b, 0x1ef7ca2f, 0x7656e938, 0xea257b87, 0x0fd90121, 0x947345d6,
|
||||
0x9db3af8e, 0xa5f2e02c, 0x01ae3a38},
|
||||
{0xdd000001, 0x8508bdeb, 0x028aef40, 0xf1eb5ce8, 0xec808917, 0x4aa525b3, 0x758d478b, 0x94fded8a, 0x808a4788,
|
||||
0xb1f75aa7, 0x5edbf88b, 0x01ae3a3f},
|
||||
{0xee800001, 0x8508bef5, 0x194577a0, 0x047b5d16, 0xd344e88c, 0x34cc43f1, 0xbb412d8d, 0xd79063be, 0xf695c861,
|
||||
0x3c193033, 0xbb5084bb, 0x01ae3a42},
|
||||
{0xf7400001, 0x8508bf7a, 0x24a2bbd0, 0x0dc35d2d, 0xc6a71846, 0x29dfd310, 0xde1b208e, 0x78d99ed8, 0x319b88ce,
|
||||
0x012a1afa, 0x698acad3, 0x01ae3a44},
|
||||
{0x7ba00001, 0x8508bfbd, 0xaa515de8, 0x12675d38, 0x40583023, 0xa4699aa0, 0xef881a0e, 0xc97e3c65, 0x4f1e6904,
|
||||
0xe3b2905d, 0x40a7edde, 0x01ae3a45},
|
||||
{0xbdd00001, 0x8508bfde, 0x6d28aef4, 0x94b95d3e, 0xfd30bc11, 0xe1ae7e67, 0x783e96ce, 0xf1d08b2c, 0xdddfd91f,
|
||||
0xd4f6cb0e, 0xac367f64, 0x01ae3a45},
|
||||
{0x5ee80001, 0x8508bfef, 0x4e94577a, 0xd5e25d41, 0xdb9d0208, 0x0050f04b, 0xbc99d52f, 0x85f9b28f, 0xa540912d,
|
||||
0xcd98e867, 0xe1fdc827, 0x01ae3a45},
|
||||
{0xaf740001, 0x8508bff7, 0xbf4a2bbd, 0x7676dd42, 0xcad32504, 0x0fa2293d, 0x5ec7745f, 0x500e4641, 0x08f0ed34,
|
||||
0x49e9f714, 0xfce16c89, 0x01ae3a45},
|
||||
{0xd7ba0001, 0x0508bffb, 0x77a515df, 0x46c11d43, 0xc26e3682, 0x174ac5b6, 0x2fde43f7, 0xb518901a, 0x3ac91b37,
|
||||
0x08127e6a, 0x0a533eba, 0x01ae3a46},
|
||||
{0xebdd0001, 0xc508bffd, 0xd3d28aef, 0x2ee63d43, 0x3e3bbf41, 0x1b1f13f3, 0x9869abc3, 0x679db506, 0x53b53239,
|
||||
0x6726c215, 0x110c27d2, 0x01ae3a46},
|
||||
{0xf5ee8001, 0x2508bffe, 0x01e94578, 0xa2f8cd44, 0x7c2283a0, 0x1d093b11, 0xccaf5fa9, 0x40e0477c, 0xe02b3dba,
|
||||
0x96b0e3ea, 0x14689c5e, 0x01ae3a46},
|
||||
{0x7af74001, 0x5508bfff, 0x18f4a2bc, 0x5d021544, 0x9b15e5d0, 0x1dfe4ea0, 0xe6d2399c, 0xad8190b7, 0xa666437a,
|
||||
0xae75f4d5, 0x1616d6a4, 0x01ae3a46},
|
||||
{0xbd7ba001, 0x6d08bfff, 0x247a515e, 0x3a06b944, 0x2a8f96e8, 0x9e78d868, 0x73e3a695, 0xe3d23555, 0x0983c65a,
|
||||
0xba587d4b, 0x16edf3c7, 0x01ae3a46},
|
||||
{0xdebdd001, 0x7908bfff, 0x2a3d28af, 0x28890b44, 0xf24c6f74, 0x5eb61d4b, 0x3a6c5d12, 0xfefa87a4, 0xbb1287ca,
|
||||
0x4049c185, 0x17598259, 0x01ae3a46},
|
||||
{0xef5ee801, 0xff08bfff, 0x2d1e9457, 0x1fca3444, 0xd62adbba, 0xbed4bfbd, 0x9db0b850, 0x0c8eb0cb, 0x13d9e883,
|
||||
0x034263a3, 0x178f49a2, 0x01ae3a46},
|
||||
{0xf7af7401, 0x4208bfff, 0x2e8f4a2c, 0x1b6ac8c4, 0xc81a11dd, 0xeee410f6, 0x4f52e5ef, 0x1358c55f, 0xc03d98df,
|
||||
0x64beb4b1, 0x17aa2d46, 0x01ae3a46},
|
||||
{0xfbd7ba01, 0x6388bfff, 0x2f47a516, 0x993b1304, 0x4111acee, 0x86ebb993, 0x2823fcbf, 0x16bdcfa9, 0x166f710d,
|
||||
0x957cdd39, 0x17b79f18, 0x01ae3a46},
|
||||
{0xfdebdd01, 0x7448bfff, 0x2fa3d28b, 0x58233824, 0x7d8d7a77, 0x52ef8de1, 0x148c8827, 0x187054ce, 0xc1885d24,
|
||||
0xaddbf17c, 0x17be5801, 0x01ae3a46},
|
||||
{0xfef5ee81, 0xfca8bfff, 0x2fd1e945, 0xb7974ab4, 0x9bcb613b, 0x38f17808, 0x8ac0cddb, 0x99499760, 0x9714d32f,
|
||||
0x3a0b7b9e, 0x17c1b476, 0x01ae3a46},
|
||||
{0xff7af741, 0x40d8bfff, 0x2fe8f4a3, 0xe75153fc, 0x2aea549d, 0x2bf26d1c, 0xc5daf0b5, 0x59b638a9, 0x81db0e35,
|
||||
0x802340af, 0x17c362b0, 0x01ae3a46},
|
||||
{0xffbd7ba1, 0xe2f0bfff, 0x2ff47a51, 0xff2e58a0, 0xf279ce4e, 0x2572e7a5, 0x63680222, 0x39ec894e, 0xf73e2bb8,
|
||||
0xa32f2337, 0x17c439cd, 0x01ae3a46},
|
||||
{0xffdebdd1, 0x33fcbfff, 0x2ffa3d29, 0x8b1cdaf2, 0xd6418b27, 0xa23324ea, 0xb22e8ad8, 0xaa07b1a0, 0x31efba79,
|
||||
0x34b5147c, 0x17c4a55c, 0x01ae3a46},
|
||||
{0xffef5ee9, 0xdc82bfff, 0x2ffd1e94, 0xd1141c1b, 0x48256993, 0xe093438d, 0xd991cf33, 0x621545c9, 0x4f4881da,
|
||||
0x7d780d1e, 0x17c4db23, 0x01ae3a46},
|
||||
{0xfff7af75, 0xb0c5bfff, 0xaffe8f4a, 0xf40fbcaf, 0x811758c9, 0x7fc352de, 0x6d437161, 0xbe1c0fde, 0x5df4e58a,
|
||||
0x21d9896f, 0x17c4f607, 0x01ae3a46},
|
||||
{0xfffbd7bb, 0x9ae73fff, 0xefff47a5, 0x058d8cf9, 0x1d905065, 0x4f5b5a87, 0xb71c4278, 0xec1f74e8, 0xe54b1762,
|
||||
0xf40a4797, 0x17c50378, 0x01ae3a46},
|
||||
{0xfffdebde, 0x0ff7ffff, 0x0fffa3d3, 0x8e4c751f, 0x6bcccc32, 0xb7275e5b, 0xdc08ab03, 0x0321276d, 0x28f6304f,
|
||||
0xdd22a6ac, 0x17c50a31, 0x01ae3a46}}};
|
||||
static constexpr storage<12> rou = {0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a,
|
||||
0x4512a3d4, 0x40fbe05b, 0x8aeffc9b, 0x30f15248, 0x05198a80, 0x0036a92e};
|
||||
TWIDDLES(modulus, rou)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 5;
|
||||
|
||||
@@ -4,193 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 47;
|
||||
static constexpr unsigned modulus_bit_count = 253;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
|
||||
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
|
||||
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xf5ee7fff, 0x2ffffffe, 0xa6558901,
|
||||
0xa3c84ffe, 0x9f4bb2e1, 0x65d35aa9, 0xed549aa1};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3,
|
||||
0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7,
|
||||
0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f,
|
||||
0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b,
|
||||
0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f,
|
||||
0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95,
|
||||
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000},
|
||||
{0xfbfa0a01, 0x0f830f7e, 0xd75769a0, 0x20f8b46c, 0xf05d5033, 0x7108bd18, 0x0788de01, 0x07405e08},
|
||||
{0x60b9bdae, 0xc78085a6, 0x789094f5, 0x3116ec22, 0xce87d660, 0x0a02a81d, 0xc2a94856, 0x0ead8236},
|
||||
{0x3e83a7cc, 0x6ffc39d9, 0x958a0a74, 0x117d996e, 0x0b92e8c9, 0xc242289d, 0x29d977d6, 0x0484efb4},
|
||||
{0x0111ec3f, 0x15455b00, 0xc5f6be6f, 0x6b62d7af, 0x337f2d07, 0xfcba0365, 0x43fccd26, 0x0f151842},
|
||||
{0xc31ec69b, 0x57951b2e, 0x2a37ce1f, 0x3e0a4be7, 0xcf3b198a, 0x960aeb4a, 0x341fd5cd, 0x04fb0673},
|
||||
{0xa921851f, 0x71c1b78e, 0x7808f239, 0x3c26340c, 0x976fb990, 0xbcc8f69b, 0xe880dc71, 0x06a5edb2},
|
||||
{0xc0f5679e, 0x7619eab5, 0x0dc0b9cd, 0x1f4cd10e, 0xbf6a480a, 0x7e1b70aa, 0x7f5461bb, 0x0ffc66da},
|
||||
{0xec5cbab2, 0x8159806d, 0x498264a3, 0x14ea1333, 0xe3abfaa6, 0x56bbe1d8, 0x02aa031f, 0x09d2b5c4},
|
||||
{0xc010c48a, 0xd2aa9562, 0x3b004b60, 0x447e5c11, 0x11e243bb, 0xd5a21c13, 0x0ab418b1, 0x01eab23e},
|
||||
{0xacff6986, 0x08715ee8, 0xa93924d0, 0xab01878a, 0x6e9ae5c4, 0xbfbc5e71, 0x26b08d6e, 0x0f8000bf},
|
||||
{0x3ddbc679, 0x06bc13b0, 0x615256ce, 0x7269a1f1, 0x1f5221a2, 0xf7716fbf, 0x8c66c14f, 0x0fa1f02c},
|
||||
{0x906f531f, 0xdd40f131, 0x30728eff, 0xb06b29c7, 0x88839294, 0xc891fd19, 0x646978e8, 0x04e88447},
|
||||
{0x6e259cdc, 0xb1e4b769, 0x00514e5e, 0xbcb0b709, 0x05113e7f, 0x74edb7c0, 0xe92e22af, 0x10c88511},
|
||||
{0x240ede5b, 0xebb2e898, 0x42cd84c6, 0xc2639185, 0x9408f956, 0xf79e8391, 0x94e87a7d, 0x06872fa1},
|
||||
{0x260678ff, 0xf8522249, 0xa8de9973, 0x6148cb16, 0x5a4e8d56, 0x5750f3f4, 0xbaeaf0c3, 0x0e805156},
|
||||
{0x3d766f80, 0x1b4b71cf, 0x1069012d, 0x47d21195, 0x9151ebec, 0x5635235f, 0x2b13c808, 0x093f7d91},
|
||||
{0x4637701d, 0x0848f958, 0x4c8353af, 0x8a750076, 0x0ef6174a, 0x485f4e4f, 0xf38db632, 0x078d97a1},
|
||||
{0x66a16869, 0x50c487c1, 0xd1fd4525, 0x380a66ab, 0x265e8539, 0xd455a01a, 0x064b5334, 0x0cd62875},
|
||||
{0x3358eb25, 0xdbc547bc, 0x722037db, 0x8909d398, 0x5e705b6d, 0x8b7075b5, 0x9bdaf407, 0x02694bb2},
|
||||
{0xf45b9621, 0x102fbfb0, 0xf04faac0, 0xe80f4241, 0x7ca61177, 0x0b830bfd, 0x7033169d, 0x10521892},
|
||||
{0xcc943028, 0xed2576ad, 0xfa4c6090, 0x846e49bc, 0x0049d8e6, 0xc74c1865, 0x665d7be5, 0x0e9c5a12},
|
||||
{0xafeb494b, 0x97319dcd, 0x1d78404c, 0xab30c83e, 0xf26ffe90, 0x452d8a48, 0xa36452c7, 0x0bfc2e92},
|
||||
{0xedc626c3, 0xf30e312d, 0xcf1f3a94, 0x8367a7ca, 0x917a1b28, 0x621e15e1, 0xf2e93b82, 0x07cd59f8},
|
||||
{0xf02ba42c, 0x553085d9, 0x1119b10d, 0x59662159, 0x6b8ea03f, 0xaa670958, 0x7ce92983, 0x066f6f5f},
|
||||
{0x4dd87a5e, 0xf423a283, 0xd9a4c364, 0x1fe46601, 0xbfdc7e9b, 0xda4addbf, 0x3bf94b2b, 0x0a7f2bd8},
|
||||
{0xe5f8848a, 0x270a2326, 0xa727567d, 0x97d14afa, 0x48746fc7, 0x1a3a5a4e, 0xa42f077a, 0x0044e4b1},
|
||||
{0x20b7298a, 0xd7652451, 0x65013b06, 0xc7c9a0b7, 0xad0d8457, 0x479b82a9, 0x0c99f5ce, 0x0bef1e5a},
|
||||
{0x1912f7fa, 0x77d7da1d, 0x299fd7d6, 0xbcb7a5b2, 0x142a4480, 0x705e45dd, 0xb492dbd8, 0x0dc835fd},
|
||||
{0xa0234d2d, 0xe943054c, 0xe5f5be5e, 0x673b0ee0, 0x5048a19a, 0xcdd48e41, 0xabc3cb99, 0x0997d277},
|
||||
{0xa9966ac4, 0x1ae0ea67, 0xda83fb3b, 0x4e2dbb1c, 0x0b51380e, 0xf77cf749, 0xb28a7670, 0x048b4b0e},
|
||||
{0xb14361d4, 0x7f1db43f, 0x25ab6d51, 0x7927e578, 0x383bf21e, 0xb43e52a5, 0xd27fa99f, 0x077595e9},
|
||||
{0xa90a2740, 0xfe3ca4f0, 0x512a7c7a, 0xd259ff36, 0xb41fe696, 0xbca3176a, 0xf33132ce, 0x05bd5ea3},
|
||||
{0xf284f768, 0xdeee484b, 0xe26a0475, 0x2a02e015, 0x88d968c2, 0xf0eb4925, 0x82a391c9, 0x0620ce9e},
|
||||
{0xbd83a3da, 0xd3b69b29, 0xe02ce197, 0x9543950f, 0xc2f87783, 0x80799665, 0xc15be215, 0x11ce8199},
|
||||
{0x1b29736e, 0x8f267f19, 0x1d5a0c3a, 0xa2e04d58, 0x1ae99514, 0x76803064, 0x57f7c806, 0x12129439},
|
||||
{0xf32d6bac, 0xa0b973d4, 0xf0d81b72, 0xae951889, 0x2e2daa0a, 0x51dbe098, 0x40d9af8f, 0x04679474},
|
||||
{0x22df9f13, 0x56313de8, 0x599e7536, 0xe2e75200, 0x6d163e50, 0xa1b4fce7, 0xc8111763, 0x0aec2172},
|
||||
{0x355dd694, 0x4258374d, 0x44c76a20, 0x5c31e8ac, 0xaa5fd062, 0x9b473969, 0x1a37b6b4, 0x0a693d77},
|
||||
{0x44ddbbdc, 0xbafb92a6, 0x26b01974, 0x63c7a02d, 0x5f28a274, 0x0ff86e13, 0x867f2e29, 0x0a7b462a},
|
||||
{0xd5fba57b, 0x90684fea, 0xe0defe98, 0xed237883, 0x030ae924, 0xc502b692, 0xe7a1ec2c, 0x08aa58e8},
|
||||
{0x5e9020dd, 0xade9d4b4, 0x87db8813, 0x489259d2, 0x25051238, 0x5ddce740, 0xb5bc4d11, 0x0c775db1},
|
||||
{0x293f8481, 0xd52cc17a, 0x6f133205, 0x041178fb, 0xb2961832, 0xbbc70d18, 0x481760cd, 0x073d34d1},
|
||||
{0xfdacff58, 0x8215b91d, 0x98331645, 0xd8d9177d, 0x439e803c, 0xe85223ad, 0xcca42c1f, 0x04aa8ef0},
|
||||
{0x01ab3a4d, 0x006f60fa, 0x814ba450, 0xe6600e15, 0xdf9eb147, 0xbde4df36, 0x33760d7b, 0x055d58fa},
|
||||
{0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3, 0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e},
|
||||
{0xd60fb046, 0xc9fa190c, 0xc5b4674e, 0xdb5c179b, 0xbc7b8726, 0x2b2bce0b, 0xbf6e69bf, 0x0e4eb338},
|
||||
{0x8ffc4ed5, 0x74732d1f, 0xb7f2eefc, 0x42d9f590, 0xa24dd4dd, 0xf70461e5, 0xef64676f, 0x03b6eba4},
|
||||
{0x102bbab0, 0x5a21f98a, 0x8d8e2efb, 0xa6a147a9, 0x7612906f, 0x0eb4f005, 0x47d8d2e3, 0x0e1a5481},
|
||||
{0xd01e5aa8, 0x6e509add, 0x6e3f123d, 0xe1582468, 0x8274db24, 0xbd6313ee, 0xd173a634, 0x05d5836e},
|
||||
{0xe975c0cf, 0x6aab3344, 0x6f1dc38e, 0xca362e0e, 0x1dd1743a, 0x2fe72cda, 0xc1b4c4c2, 0x0c1c956e},
|
||||
{0xec89a64f, 0x59fe97a0, 0xe8de5d4c, 0x579617d7, 0xc9c1ea7b, 0x256a305b, 0x53fa131b, 0x01ffae4e},
|
||||
{0x29bcb088, 0x463a73ff, 0xe1438e80, 0xee9e9a5e, 0x3c9369e4, 0x2a00951f, 0x80a32052, 0x09711183},
|
||||
{0x4bec8dd2, 0xa36899db, 0x96393687, 0x2946872e, 0x842df3c8, 0xd4b5734f, 0x5f5cd8fb, 0x0834098f},
|
||||
{0xe3c711b9, 0x4bc485f6, 0x648d1d7e, 0xf43a2598, 0xee88abaa, 0x7f981a0e, 0xec6a3f27, 0x0c88c9c3},
|
||||
{0x49046b52, 0x42bcc6c2, 0x56ab9ecc, 0xcc77294a, 0xe4df3ddd, 0x02ecb41a, 0x67f76726, 0x0e567d22},
|
||||
{0x91c64fc2, 0x1cc56cc3, 0xd16a490b, 0x8cb71e65, 0x14fac366, 0x984be37e, 0xa25d7ba5, 0x0a08e032},
|
||||
{0xd4f5941e, 0x966d9739, 0xe5772a73, 0x5805deb6, 0x5c1f970c, 0xe4eb0d33, 0xbdf35409, 0x039715db},
|
||||
{0xcc6518ac, 0x8419686c, 0x9c7a2366, 0x96dec3a8, 0x71724384, 0xefbfcac6, 0xaf34c239, 0x0c44b99a},
|
||||
{0xc18ff4fd, 0xcb66fe1b, 0x86c8d586, 0x588e18b3, 0x1dfab57c, 0xc6e6d2a3, 0x7d7d4efd, 0x10918ad2},
|
||||
{0x97a18f58, 0x56d6cf22, 0xd0d7abd9, 0x11710758, 0x5eb7a9c5, 0xd1a6608b, 0xc4937e38, 0x04059bdb},
|
||||
{0x4b1b63a9, 0x12998cbc, 0xcf420c9f, 0x0f780c6c, 0x129289ad, 0xa5e48723, 0x240a141d, 0x0a3a1223},
|
||||
{0x00db2b48, 0xa43c0e02, 0x933d10ee, 0x76585489, 0xc0ba6a80, 0x12d64af1, 0x2fad8d8e, 0x01940f43},
|
||||
{0x1d75bec9, 0xe29ef6c0, 0xd4b0183b, 0xead287a2, 0xedfd3795, 0x75a017cf, 0x64427c8e, 0x107f8d0f},
|
||||
{0xa26c8c12, 0xa6f4e1d1, 0xf6610f7e, 0x13571553, 0x56701caf, 0xd95e5df6, 0x2263d69d, 0x050e7b89},
|
||||
{0xc161761f, 0x271d7caf, 0xc369a371, 0xf1001d6f, 0x00e60f51, 0x65286415, 0xb74d14b8, 0x00b918f9},
|
||||
{0x03ad3139, 0x01d3f431, 0xa137ce16, 0xe56f6002, 0x1deb42e8, 0x97f53369, 0xaa37cddd, 0x033fa9ac},
|
||||
{0x60cf1330, 0x840f913b, 0x1df5ed87, 0x5610cde6, 0x72b36ddf, 0x858381b0, 0x6f64e0b7, 0x109bf66c},
|
||||
{0x930cee0b, 0x432d3626, 0xf26e8ba3, 0x55ed3efb, 0x14c5457f, 0x802eebcc, 0xe2310f22, 0x00d300e3},
|
||||
{0x4b9ac952, 0x3d29f5ba, 0xc8ea8f94, 0x7c7f2662, 0xcefc3052, 0x736ccb63, 0x0981f3cb, 0x04bfce2f},
|
||||
{0x5d4e643c, 0x3da791ea, 0x85bff013, 0xb6a956ef, 0xd73de6a3, 0x86c629a8, 0x6b8c48a9, 0x0a5a5f55},
|
||||
{0x49c6284a, 0x9ba6aa00, 0xeacbdc63, 0x0b8429fb, 0xedafdf37, 0x9b9c6c5b, 0xad0c78c6, 0x009907e8},
|
||||
{0x3e47b53f, 0x50380ce2, 0x3a9613fc, 0x6ea3c2d3, 0x4c87ab50, 0xfe743105, 0xd192221c, 0x07871979},
|
||||
{0xe978594b, 0x4ddd3320, 0x3abe3f79, 0xe5f36fbe, 0xe4dcff8e, 0x5dba9ef2, 0x7105148f, 0x0bfc27e2},
|
||||
{0x498fb549, 0xd5993cd5, 0x09da9272, 0x718adcee, 0x72bd5bc0, 0x9e03cbb4, 0xc592813f, 0x07206942},
|
||||
{0x78fd3239, 0xaf29730b, 0x40c3e723, 0xbd907ac9, 0x77f214f7, 0x5dcc0aad, 0xb05fb3a1, 0x02d958da},
|
||||
{0xdf80223d, 0x55f432c9, 0x11a2fed9, 0x23daf2f6, 0x41ae8c34, 0x9e43e003, 0x95f22373, 0x0d51533b},
|
||||
{0x7998b62c, 0xbb53132b, 0x22c9b4aa, 0x064a9186, 0x71d61334, 0xd56de253, 0x04e416f6, 0x10fcf25f},
|
||||
{0xdddb58ec, 0x41f8042f, 0x10886d85, 0x7dd54384, 0x622ff4b4, 0x19544f90, 0x050cc539, 0x02f0b49a},
|
||||
{0xa39b02a3, 0x8a3de898, 0xdc94422c, 0x068b2992, 0xf493db31, 0x1c5f019a, 0x11b0f668, 0x066b1790},
|
||||
{0x78500f1a, 0x98310dd7, 0x735ccb27, 0x1c6050bf, 0xb2081df4, 0x07b6fa7f, 0xfa0f1e20, 0x003edf24},
|
||||
{0x89b0ca6f, 0xb4d938e2, 0x2c897570, 0x0214eb59, 0x2d4cf27a, 0x56c45327, 0x3ed546a4, 0x10a2f358},
|
||||
{0xef01ed78, 0xf2828212, 0xf103c9ca, 0xa66094ac, 0x7a2d5573, 0xdceb481d, 0x8af46aab, 0x0190fcde},
|
||||
{0x526bf9fc, 0x023031cc, 0x79c209ba, 0x0e4136c0, 0x3ec42e5c, 0xe5234df1, 0x1d455234, 0x00cb9592},
|
||||
{0x33bf2a1c, 0x842b0c9c, 0xa29b9236, 0x1fd43c95, 0xc06795d3, 0x6b37a603, 0x0c1b712a, 0x00017b17},
|
||||
{0xaf858193, 0x2b955be2, 0x5fb5e378, 0xa513d8be, 0xa326aeb9, 0x88c4ebeb, 0xf3d45990, 0x00c378e2},
|
||||
{0x6464580f, 0x33e6c8c0, 0x3c4aa09f, 0x9d560eb3, 0xcc98f404, 0xb3f1a899, 0x8ca24b48, 0x012c1ea5},
|
||||
{0xe3b4dc56, 0xa0594a67, 0x91b698e1, 0xc8e6b582, 0x8df78057, 0x711cadbf, 0x396466f8, 0x0049abdf},
|
||||
{0x4ffa086a, 0xecc89610, 0xca06afc6, 0x4db82291, 0x8f3a6426, 0x9ae7c68c, 0x2a874432, 0x0b3dae8c},
|
||||
{0x3b3625b6, 0x1e62401f, 0x28471e5a, 0xd0692164, 0x5cad6b77, 0xb85aa9ec, 0xaa95acf2, 0x063e4b66},
|
||||
{0xb9112c51, 0x2542c2b2, 0x6e23b3ce, 0x36ead8da, 0x76476754, 0x9a268d13, 0xa1ad7cf1, 0x121f44ad}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af},
|
||||
{0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06},
|
||||
{0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2},
|
||||
{0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08},
|
||||
{0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33},
|
||||
{0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9},
|
||||
{0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93},
|
||||
{0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9},
|
||||
{0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab},
|
||||
{0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85},
|
||||
{0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1},
|
||||
{0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8},
|
||||
{0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003},
|
||||
{0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1},
|
||||
{0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007},
|
||||
{0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3},
|
||||
{0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08},
|
||||
{0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3},
|
||||
{0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309},
|
||||
{0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433},
|
||||
{0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9},
|
||||
{0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513},
|
||||
{0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539},
|
||||
{0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b},
|
||||
{0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555},
|
||||
{0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559},
|
||||
{0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c},
|
||||
{0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d},
|
||||
{0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e},
|
||||
{0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e},
|
||||
{0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e},
|
||||
{0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e},
|
||||
{0x7af74001, 0xa2117fff, 0x232ac481, 0x2b8e9efe, 0x2bdd8972, 0x139dfa73, 0x90d6f2a7, 0x12ab655e},
|
||||
{0xbd7ba001, 0x56117fff, 0x79956241, 0xc29c8afe, 0xc40a9cb9, 0xba2923c8, 0x9581cbfe, 0x12ab655e},
|
||||
{0xdebdd001, 0x30117fff, 0xa4cab121, 0x8e2380fe, 0x9021265d, 0x8d6eb873, 0x97d738aa, 0x12ab655e},
|
||||
{0xef5ee801, 0x1d117fff, 0xba655891, 0x73e6fbfe, 0xf62c6b2f, 0x771182c8, 0x9901ef00, 0x12ab655e},
|
||||
{0xf7af7401, 0x13917fff, 0xc532ac49, 0x66c8b97e, 0xa9320d98, 0x6be2e7f3, 0x99974a2b, 0x12ab655e},
|
||||
{0xfbd7ba01, 0x0ed17fff, 0xca995625, 0xe039983e, 0x02b4decc, 0xe64b9a89, 0x99e1f7c0, 0x12ab655e},
|
||||
{0xfdebdd01, 0x0c717fff, 0xcd4cab13, 0x1cf2079e, 0xaf764767, 0xa37ff3d3, 0x9a074e8b, 0x12ab655e},
|
||||
{0xfef5ee81, 0x0b417fff, 0xcea6558a, 0x3b4e3f4e, 0x05d6fbb4, 0x021a2079, 0x9a19f9f1, 0x12ab655e},
|
||||
{0xff7af741, 0x8aa97fff, 0xcf532ac5, 0xca7c5b26, 0xb10755da, 0xb16736cb, 0x9a234fa3, 0x12ab655e},
|
||||
{0xffbd7ba1, 0x4a5d7fff, 0xcfa99563, 0x12136912, 0x069f82ee, 0x090dc1f5, 0x9a27fa7d, 0x12ab655e},
|
||||
{0xffdebdd1, 0x2a377fff, 0xcfd4cab2, 0xb5def008, 0xb16b9977, 0xb4e10789, 0x9a2a4fe9, 0x12ab655e},
|
||||
{0xffef5ee9, 0x9a247fff, 0xcfea6559, 0x87c4b383, 0x06d1a4bc, 0x0acaaa54, 0x9a2b7aa0, 0x12ab655e},
|
||||
{0xfff7af75, 0x521affff, 0x4ff532ad, 0xf0b79541, 0x3184aa5e, 0x35bf7bb9, 0x9a2c0ffb, 0x12ab655e},
|
||||
{0xfffbd7bb, 0x2e163fff, 0x0ffa9957, 0x25310620, 0xc6de2d30, 0xcb39e46b, 0x9a2c5aa8, 0x12ab655e},
|
||||
{0xfffdebde, 0x1c13dfff, 0x6ffd4cac, 0xbf6dbe8f, 0x118aee98, 0x95f718c5, 0x9a2c7fff, 0x12ab655e}}};
|
||||
static constexpr storage<8> rou = {0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3,
|
||||
0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,54 +3,14 @@
|
||||
#define BLS12_381_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned modulus_bit_count = 381;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
|
||||
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd,
|
||||
0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709,
|
||||
0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
|
||||
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
|
||||
0x0d2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x00005555, 0x46010000, 0x4eac0000, 0xe1540001,
|
||||
0x094f09db, 0x98cf2d5f, 0x0c7aed40, 0x9b88b47b,
|
||||
0xbcb45328, 0xb4e45849, 0xc6801965, 0xe5feee15};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7,
|
||||
0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x0002fffd, 0x76090000, 0xc40c0002, 0xebf4000b,
|
||||
0x53c758ba, 0x5f489857, 0x70525745, 0x77ce5853,
|
||||
0xa256ec6d, 0x5c071a97, 0xfa80e493, 0x15f65ec3};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x380b4820, 0xf4d38259, 0xd898fafb, 0x7fe11274,
|
||||
0x14956dc8, 0x343ea979, 0x58a88de9, 0x1797ab14,
|
||||
0x3c4f538b, 0xed5e6427, 0xe8fb0ce9, 0x14fec701};
|
||||
static constexpr storage<12> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0,
|
||||
0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
PARAMS(modulus)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
@@ -58,4 +18,4 @@ namespace bls12_381 {
|
||||
};
|
||||
} // namespace bls12_381
|
||||
|
||||
#endif
|
||||
#endif
|
||||
@@ -4,148 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
static constexpr unsigned modulus_bit_count = 255;
|
||||
static constexpr unsigned num_of_reductions = 2;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805,
|
||||
0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0xfffffffc, 0xfff96ffb, 0x4ef6900b,
|
||||
0x26876015, 0xcce76020, 0xa675f520, 0xcfb69d4c};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0x00000000, 0x0001a401, 0xac425bfd,
|
||||
0xf65e27fa, 0xccc627f7, 0xd66282b7, 0x8c1258ac};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0xfffffff8, 0xfff2dffb, 0x9dfa401f, 0x736ec016, 0xa62b8008, 0x50cfdee1, 0x22a90579,
|
||||
0x2aa71985, 0x09847dbd, 0x664d2877, 0x3a564fe5, 0xbcb3083c, 0x48304f6f, 0xd2f09de1, 0xd1fd83cf};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad,
|
||||
0xc1f823b4, 0x0e2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffffe, 0x00000001, 0x00034802, 0x5884b7fa,
|
||||
0xecbc4ff5, 0x998c4fef, 0xacc5056f, 0x1824b159};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0xfe75c040, 0x13f75b69, 0x09dc705f, 0xab6fca8f,
|
||||
0x4f77266a, 0x7204078a, 0x30009d57, 0x1bbe8693};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
|
||||
{0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000},
|
||||
{0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d},
|
||||
{0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e},
|
||||
{0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb},
|
||||
{0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac},
|
||||
{0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802},
|
||||
{0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59},
|
||||
{0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667},
|
||||
{0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098},
|
||||
{0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b},
|
||||
{0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0},
|
||||
{0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8},
|
||||
{0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8},
|
||||
{0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911},
|
||||
{0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd},
|
||||
{0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333},
|
||||
{0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db},
|
||||
{0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83},
|
||||
{0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f},
|
||||
{0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5},
|
||||
{0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3},
|
||||
{0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd},
|
||||
{0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc},
|
||||
{0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd},
|
||||
{0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580},
|
||||
{0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d},
|
||||
{0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d},
|
||||
{0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f},
|
||||
{0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b},
|
||||
{0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72},
|
||||
{0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
|
||||
{0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753},
|
||||
{0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e},
|
||||
{0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4},
|
||||
{0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c},
|
||||
{0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee},
|
||||
{0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d},
|
||||
{0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25},
|
||||
{0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e},
|
||||
{0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508},
|
||||
{0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d},
|
||||
{0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63},
|
||||
{0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7},
|
||||
{0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1},
|
||||
{0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac},
|
||||
{0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003},
|
||||
{0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c},
|
||||
{0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7},
|
||||
{0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950},
|
||||
{0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2},
|
||||
{0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6},
|
||||
{0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5},
|
||||
{0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9},
|
||||
{0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960},
|
||||
{0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6},
|
||||
{0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf},
|
||||
{0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f},
|
||||
{0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533},
|
||||
{0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff},
|
||||
{0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287},
|
||||
{0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6},
|
||||
{0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9},
|
||||
{0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e},
|
||||
{0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268},
|
||||
{0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd},
|
||||
{0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18},
|
||||
{0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5},
|
||||
{0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04},
|
||||
{0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab},
|
||||
{0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f},
|
||||
{0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9},
|
||||
{0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e},
|
||||
{0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878},
|
||||
{0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5},
|
||||
{0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c},
|
||||
{0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77},
|
||||
{0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365},
|
||||
{0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c},
|
||||
{0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57},
|
||||
{0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5},
|
||||
{0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014},
|
||||
{0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3},
|
||||
{0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583},
|
||||
{0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b},
|
||||
{0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df},
|
||||
{0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719},
|
||||
{0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736},
|
||||
{0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744},
|
||||
{0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b},
|
||||
{0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f},
|
||||
{0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751},
|
||||
{0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752},
|
||||
{0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}}};
|
||||
static constexpr storage<8> rou = {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2,
|
||||
0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,42 +3,14 @@
|
||||
#define BN254_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522,
|
||||
0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45,
|
||||
0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x278302b9, 0xc3df73e9, 0x978e3572, 0x687e956e,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95,
|
||||
0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a,
|
||||
0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55,
|
||||
0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17,
|
||||
0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0x0a78eb28,
|
||||
0x7879462c, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x014afa37, 0xed84884a, 0x0278edf8, 0xeb202285,
|
||||
0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
|
||||
static constexpr storage<8> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
PARAMS(modulus)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
|
||||
@@ -4,136 +4,17 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<8> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090,
|
||||
0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121,
|
||||
0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x0fffffff, 0xbc1e0a6c, 0x86468f6e, 0xd7cc17b7,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975,
|
||||
0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb,
|
||||
0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7,
|
||||
0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520,
|
||||
0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x4ffffffb, 0xac96341c, 0x9f60cd29, 0x36fc7695,
|
||||
0x7879462e, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x6db1194e, 0xdc5ba005, 0xe111ec87, 0x090ef5a9,
|
||||
0xaeb85d5d, 0xc8260de4, 0x82c5551c, 0x15ebf951};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72},
|
||||
{0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1},
|
||||
{0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2},
|
||||
{0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6},
|
||||
{0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d},
|
||||
{0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd},
|
||||
{0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561},
|
||||
{0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e},
|
||||
{0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1},
|
||||
{0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584},
|
||||
{0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596},
|
||||
{0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49},
|
||||
{0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651},
|
||||
{0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f},
|
||||
{0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb},
|
||||
{0xce8f58e5, 0x276e5858, 0x5655210e, 0x0512eca9, 0xe70e61f3, 0xc3708cc6, 0xa7d74902, 0x1bf82deb},
|
||||
{0x7dcdc0e0, 0x84c6bfa5, 0x13f4d1bd, 0xc57088ff, 0xb5b95e4d, 0x5c0176fb, 0x3a8d46c1, 0x19ddbcaf},
|
||||
{0x613f6cbd, 0x5c1d597f, 0x8357473a, 0x30525841, 0x968e4915, 0x51829353, 0x844bca52, 0x2260e724},
|
||||
{0x53337857, 0x53422da9, 0xdbed349f, 0xac616632, 0x06d1e303, 0x27508aba, 0x0a0ed063, 0x26125da1},
|
||||
{0xfcd0b523, 0xb2c87885, 0xca5a5ce3, 0x58f50577, 0x8598fc8c, 0x4222150e, 0xae2bdd1a, 0x1ded8980},
|
||||
{0xa219447e, 0xa76dde56, 0x359eebbb, 0xec1a1f05, 0x8be08215, 0xcda0ceb6, 0xb1f8d9a7, 0x1ad92f46},
|
||||
{0xab80c59d, 0xb54d4506, 0x22dd991f, 0x5680c640, 0xbc23a139, 0x6b7bcf70, 0x5ab4c74d, 0x0210fe63},
|
||||
{0xe32b045b, 0x1c25f1e3, 0x2e832696, 0x145e0db8, 0x71c6441f, 0x852e2a03, 0x845d50d2, 0x0c9fabc7},
|
||||
{0xb878331a, 0xeccd4f3e, 0x8dc6d26e, 0x7b26b748, 0xd9130cd4, 0xa19b0361, 0x326341ef, 0x2a734ebb},
|
||||
{0x2f4e9212, 0x1c79bd57, 0x3d68f9ae, 0x605b52b6, 0xb8d89d4a, 0x0113eff9, 0xf1ff73b2, 0x1067569a},
|
||||
{0x80928c44, 0x034afc45, 0xf6437da2, 0xb4823532, 0x6dc6e364, 0x5f256a9f, 0xb363ebe8, 0x049ae702},
|
||||
{0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e, 0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000},
|
||||
{0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711},
|
||||
{0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf},
|
||||
{0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136},
|
||||
{0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2},
|
||||
{0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053},
|
||||
{0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327},
|
||||
{0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821},
|
||||
{0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7},
|
||||
{0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21},
|
||||
{0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2},
|
||||
{0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58},
|
||||
{0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f},
|
||||
{0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766},
|
||||
{0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7},
|
||||
{0xd96f0d22, 0x20e75251, 0x6bd4e8c9, 0xc01c7f08, 0xf9dd50c4, 0x37d8b00b, 0xc43ca872, 0x244cf010},
|
||||
{0x66c5174c, 0x7a823174, 0x22d5ad70, 0x7dbe118c, 0x111119c5, 0xf8d7c71d, 0x83780e87, 0x036853f0},
|
||||
{0xca535321, 0xd98f9924, 0xe66e6c81, 0x22dbc0ef, 0x664ae1b7, 0xa15cf806, 0xa314fb67, 0x06e402c0},
|
||||
{0xe26c91f3, 0x0852a8fd, 0x3baca626, 0x521f45cb, 0x2c51bfca, 0xab6473bc, 0x2100895f, 0x100c332d},
|
||||
{0xa376d0f0, 0xf5fac783, 0x940797d3, 0x50fd246e, 0x145f5278, 0xab14ecc1, 0x41091b14, 0x19c6dfb8},
|
||||
{0x7faa1396, 0x43dc52e2, 0x4beced23, 0xd437be9d, 0x6d3c38c3, 0xecc11e9c, 0x0c74a876, 0x2eb58439},
|
||||
{0xd69ca83b, 0x811b03e7, 0xa1a6eadf, 0x126a786b, 0x4e2b8e61, 0x1dd75c9f, 0xbda6792b, 0x2165a1a5},
|
||||
{0x110b737b, 0x02e1d4d1, 0xb323a164, 0x7be1488d, 0x9cd06163, 0xa334d317, 0xdb50e9cd, 0x2710c370},
|
||||
{0x9550fe47, 0x45d2f3cb, 0xf6a8efc4, 0x5f43327b, 0xe993ee18, 0x5bcd0d50, 0xb21de952, 0x27f035bd},
|
||||
{0x232e3983, 0x1d63cbae, 0xaa1b58e2, 0xac815161, 0x6aeb019e, 0x531f42a5, 0x03ca2ef5, 0x2dcd51d9},
|
||||
{0x980db869, 0xa8b64ba8, 0xc9718f6c, 0x4c787f72, 0x15d27ced, 0x7746a25a, 0x435a46e9, 0x110bf78f},
|
||||
{0x9d18157e, 0x72394277, 0xfd399d5d, 0xec9d51f8, 0x49d5387f, 0x6117635d, 0x9c229cd5, 0x01b77519}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739},
|
||||
{0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6},
|
||||
{0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4},
|
||||
{0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b},
|
||||
{0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff},
|
||||
{0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39},
|
||||
{0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5},
|
||||
{0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24},
|
||||
{0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b},
|
||||
{0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f},
|
||||
{0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9},
|
||||
{0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d},
|
||||
{0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50},
|
||||
{0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1},
|
||||
{0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa},
|
||||
{0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e},
|
||||
{0xf5360801, 0x8b9953a2, 0x859533b4, 0x7c05542e, 0x5ea6179c, 0xe83b698e, 0xb9f82f90, 0x30643640},
|
||||
{0x729b0401, 0xe7bda49b, 0x7fa75222, 0xd21c9e3b, 0x7013b7fc, 0x5045d7a2, 0xcd94e7dd, 0x30644259},
|
||||
{0xb14d8201, 0x15cfcd17, 0xfcb0615a, 0xfd284341, 0x78ca882c, 0x844b0eac, 0x57634403, 0x30644866},
|
||||
{0xd0a6c101, 0xacd8e155, 0x3b34e8f5, 0x12ae15c5, 0x7d25f045, 0x9e4daa31, 0x9c4a7216, 0x30644b6c},
|
||||
{0xe0536081, 0x785d6b74, 0xda772cc3, 0x1d70ff06, 0xff53a451, 0x2b4ef7f3, 0xbebe0920, 0x30644cef},
|
||||
{0x6829b041, 0x5e1fb084, 0xaa184eaa, 0x22d273a7, 0x406a7e57, 0xf1cf9ed5, 0x4ff7d4a4, 0x30644db1},
|
||||
{0x2c14d821, 0xd100d30c, 0x11e8df9d, 0x25832df8, 0xe0f5eb5a, 0x550ff245, 0x1894ba67, 0x30644e12},
|
||||
{0x0e0a6c11, 0x8a716450, 0x45d12817, 0xa6db8b20, 0x313ba1db, 0x86b01bfe, 0x7ce32d48, 0x30644e42},
|
||||
{0xff053609, 0x6729acf1, 0x5fc54c54, 0x6787b9b4, 0x595e7d1c, 0x1f8030da, 0xaf0a66b9, 0x30644e5a},
|
||||
{0xf7829b05, 0xd585d142, 0x6cbf5e72, 0xc7ddd0fe, 0x6d6feabc, 0x6be83b48, 0xc81e0371, 0x30644e66},
|
||||
{0x73c14d83, 0x0cb3e36b, 0x733c6782, 0xf808dca3, 0x7778a18c, 0x921c407f, 0xd4a7d1cd, 0x30644e6c},
|
||||
{0xb1e0a6c2, 0xa84aec7f, 0xf67aec09, 0x101e6275, 0xfc7cfcf5, 0xa536431a, 0xdaecb8fb, 0x30644e6f}}};
|
||||
static constexpr storage<8> rou = {0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e,
|
||||
0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,76 +3,15 @@
|
||||
#define BW6_761_BASE_BASE_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 24;
|
||||
static constexpr unsigned modulus_bit_count = 761;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {
|
||||
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
static constexpr storage<limbs_count> modulus_2 = {
|
||||
0x00000116, 0xe93a0000, 0xe0000105, 0xcd227cd0, 0xd5e1486f, 0x2c19f15d, 0xaccf51f0, 0x31422d84,
|
||||
0xe7d7fe5d, 0xe3b9a7b8, 0x25f3fb20, 0x0d1391da, 0x4b684609, 0x079d75fe, 0xcb09d232, 0xe0f74c71,
|
||||
0x010f7c82, 0xa504ebdf, 0x03a28d10, 0x724c30d5, 0x09f5fe7d, 0xa30f9280, 0xf7079c15, 0x0245d049};
|
||||
static constexpr storage<limbs_count> modulus_4 = {
|
||||
0x0000022c, 0xd2740000, 0xc000020b, 0x9a44f9a1, 0xabc290df, 0x5833e2bb, 0x599ea3e0, 0x62845b09,
|
||||
0xcfaffcba, 0xc7734f71, 0x4be7f641, 0x1a2723b4, 0x96d08c12, 0x0f3aebfc, 0x9613a464, 0xc1ee98e3,
|
||||
0x021ef905, 0x4a09d7be, 0x07451a21, 0xe49861aa, 0x13ebfcfa, 0x461f2500, 0xee0f382b, 0x048ba093};
|
||||
static constexpr storage<limbs_count> neg_modulus = {
|
||||
0xffffff75, 0x0b62ffff, 0x8fffff7d, 0x196ec197, 0x150f5bc8, 0xe9f30751, 0xa9985707, 0x675ee93d,
|
||||
0x8c1400d1, 0x8e232c23, 0xed06026f, 0x79763712, 0xda4bdcfb, 0xfc314500, 0x1a7b16e6, 0x8f8459c7,
|
||||
0x7f7841be, 0xad7d8a10, 0x7e2eb977, 0x46d9e795, 0xfb0500c1, 0x2e7836bf, 0x047c31f5, 0xfedd17db};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00004b79, 0xa27e0000, 0xa0008e35, 0xbae96db2, 0x82ebf7b1, 0x4aaf1d22, 0x7224cb3d, 0x7908fd92,
|
||||
0x29b17ed1, 0x6fe68290, 0xafc968db, 0xfe1b7282, 0x9028bbf0, 0xe1e548cb, 0x3a8ffc03, 0x09094ed6,
|
||||
0x61e9cf95, 0xd63ea631, 0x54918abf, 0xe834ca62, 0x52aa651e, 0xe52594ed, 0xb4c46a4f, 0xe2423252,
|
||||
0x6c09aae4, 0xa8cf17d8, 0xc5f5cee5, 0x2d80ffb0, 0x55bbc10d, 0x2dede100, 0xe2360382, 0x1f4e7a7c,
|
||||
0xae2fe433, 0x586c3847, 0x78eadae1, 0x915c56e1, 0x69a5ce00, 0xa35b2945, 0x767c08ca, 0x9d66e7fe,
|
||||
0xd8b88c77, 0x7e44cf6a, 0x67c9c873, 0xb29bfc93, 0xbbc80af9, 0x6a24005a, 0xc64ce3d5, 0x00014a92};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x000096f2, 0x44fc0000, 0x40011c6b, 0x75d2db65, 0x05d7ef63, 0x955e3a45, 0xe449967a, 0xf211fb24,
|
||||
0x5362fda2, 0xdfcd0520, 0x5f92d1b6, 0xfc36e505, 0x205177e1, 0xc3ca9197, 0x751ff807, 0x12129dac,
|
||||
0xc3d39f2a, 0xac7d4c62, 0xa923157f, 0xd06994c4, 0xa554ca3d, 0xca4b29da, 0x6988d49f, 0xc48464a5,
|
||||
0xd81355c9, 0x519e2fb0, 0x8beb9dcb, 0x5b01ff61, 0xab77821a, 0x5bdbc200, 0xc46c0704, 0x3e9cf4f9,
|
||||
0x5c5fc866, 0xb0d8708f, 0xf1d5b5c2, 0x22b8adc2, 0xd34b9c01, 0x46b6528a, 0xecf81195, 0x3acdcffc,
|
||||
0xb17118ef, 0xfc899ed5, 0xcf9390e6, 0x6537f926, 0x779015f3, 0xd44800b5, 0x8c99c7aa, 0x00029525};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00012de4, 0x89f80000, 0x800238d6, 0xeba5b6ca, 0x0bafdec6, 0x2abc748a, 0xc8932cf5, 0xe423f649,
|
||||
0xa6c5fb45, 0xbf9a0a40, 0xbf25a36d, 0xf86dca0a, 0x40a2efc3, 0x8795232e, 0xea3ff00f, 0x24253b58,
|
||||
0x87a73e54, 0x58fa98c5, 0x52462aff, 0xa0d32989, 0x4aa9947b, 0x949653b5, 0xd311a93f, 0x8908c94a,
|
||||
0xb026ab93, 0xa33c5f61, 0x17d73b96, 0xb603fec3, 0x56ef0434, 0xb7b78401, 0x88d80e08, 0x7d39e9f3,
|
||||
0xb8bf90cc, 0x61b0e11e, 0xe3ab6b85, 0x45715b85, 0xa6973802, 0x8d6ca515, 0xd9f0232a, 0x759b9ff9,
|
||||
0x62e231de, 0xf9133dab, 0x9f2721cd, 0xca6ff24d, 0xef202be6, 0xa890016a, 0x19338f55, 0x00052a4b};
|
||||
static constexpr storage<limbs_count> m = {0x2507e899, 0x11629ccd, 0x2e4424dd, 0xab1eef5b, 0x481d2cfa, 0xb82146a9,
|
||||
0x34e4227b, 0xf3182afa, 0xbeb25621, 0xf615fdb5, 0xccc261d6, 0xc4d8988c,
|
||||
0xaaf4fab0, 0x3590d652, 0x2ab9ff30, 0x9c5d0a04, 0x6ec3f460, 0xf6e8534f,
|
||||
0x88075ab4, 0xe8d78b06, 0x6f3fc8fe, 0xa8d3675b, 0x7bc5cd4b, 0x03852086};
|
||||
static constexpr storage<limbs_count> one = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {
|
||||
0xffff85d5, 0x0202ffff, 0x8fff8ce7, 0x5a582635, 0x827faade, 0x9e996e43, 0x0ee47df4, 0xda6aff32,
|
||||
0x1d94b80b, 0xece9cb3e, 0x5248240b, 0xc0e667a2, 0xdcad3905, 0xa74da5bf, 0x462f2103, 0x2352e7fe,
|
||||
0x08b1c87c, 0x7b565880, 0xe711022f, 0x45848a63, 0x9f65a9df, 0xd7a81ebb, 0xf127e87d, 0x0051f77e};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {
|
||||
0x181fa3f1, 0x27c2b2a0, 0x25a0e1b8, 0x7d9ca9f9, 0x0a004a5d, 0x35a910f0, 0xdb6b8539, 0x54655b3f,
|
||||
0x7695ef18, 0x5e763565, 0x4fae56bb, 0x226022c2, 0xb70d7652, 0x80e7f067, 0x72116b89, 0x435a8b4a,
|
||||
0x5d84e0d4, 0xac258fd6, 0x4427c7b2, 0x47ee8ac5, 0xd04e621b, 0x478c4048, 0x2add3e93, 0x00e0aa7d};
|
||||
static constexpr storage<24> modulus = {0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae,
|
||||
0x5667a8f8, 0x98a116c2, 0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed,
|
||||
0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638, 0x8087be41, 0x528275ef,
|
||||
0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
PARAMS(modulus)
|
||||
};
|
||||
} // namespace bw6_761
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#include "fields/snark_fields/bls12_377_base.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
|
||||
@@ -3,46 +3,15 @@
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/quartic_extension.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
namespace babybear {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 1;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 31;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<1> modulus = {0x78000001};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x78000001};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xf0000002};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x78000001, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {0xf0000001, 0x38400000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {0xe0000002, 0x70800001};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {0xc0000004, 0xe1000003};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x88888887};
|
||||
static constexpr storage<limbs_count> one = {0x00000001};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffffe};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x38400000};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x78000000}, {0x10faa3e0}, {0x6b615c47}, {0x21ceed5a}, {0x2c1c3348}, {0x36c54c86}, {0x701dd01c},
|
||||
{0x56a9a28e}, {0x03e4cabf}, {0x5bacde79}, {0x1eb53838}, {0x1cd781af}, {0x0961a0b7}, {0x65098a87},
|
||||
{0x77851a0b}, {0x5bcba331}, {0x053fc0f5}, {0x5bf816e5}, {0x4bb124ab}, {0x571e9d4e}, {0x313732cb},
|
||||
{0x28aca172}, {0x4e319b52}, {0x45692d95}, {0x14ff4ba1}, {0x00004951}, {0x00000089}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x78000000}, {0x67055c21}, {0x5ee99486}, {0x0bb4c4e4}, {0x4ab33b27}, {0x044b4497}, {0x410e23aa},
|
||||
{0x08a7ee2b}, {0x563cb93d}, {0x3d70b4b7}, {0x77d999f1}, {0x6ceb65b5}, {0x49e7f635}, {0x0eae3a8c},
|
||||
{0x238b8a78}, {0x70d71b0a}, {0x0eaacc45}, {0x5af0f193}, {0x47303308}, {0x573cbfad}, {0x29ff72c0},
|
||||
{0x05af9dac}, {0x00ef24df}, {0x26985530}, {0x22d1ce4b}, {0x08359375}, {0x2cabe994}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x3c000001}, {0x5a000001}, {0x69000001}, {0x70800001}, {0x74400001}, {0x76200001}, {0x77100001},
|
||||
{0x77880001}, {0x77c40001}, {0x77e20001}, {0x77f10001}, {0x77f88001}, {0x77fc4001}, {0x77fe2001},
|
||||
{0x77ff1001}, {0x77ff8801}, {0x77ffc401}, {0x77ffe201}, {0x77fff101}, {0x77fff881}, {0x77fffc41},
|
||||
{0x77fffe21}, {0x77ffff11}, {0x77ffff89}, {0x77ffffc5}, {0x77ffffe3}, {0x77fffff2}}};
|
||||
static constexpr storage<1> rou = {0x00000089};
|
||||
TWIDDLES(modulus, rou)
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 11;
|
||||
|
||||
@@ -14,7 +14,7 @@ namespace m31 {
|
||||
HOST_DEVICE_INLINE MersenneField(storage<CONFIG::limbs_count> x) : Field<CONFIG>{x} {}
|
||||
HOST_DEVICE_INLINE MersenneField(const Field<CONFIG>& other) : Field<CONFIG>(other) {}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero.limbs[0]); }
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero); }
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE MersenneField one() { return MersenneField(CONFIG::one.limbs[0]); }
|
||||
|
||||
|
||||
@@ -2,626 +2,18 @@
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "fields/params_gen.cuh"
|
||||
|
||||
// modulus = 3618502788666131213697322783095070105623107215331596699973092056135872020481 (2^251+17*2^192+1)
|
||||
namespace stark252 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 252;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr unsigned omegas_count = 192;
|
||||
static constexpr storage<8> modulus = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000011, 0x08000000};
|
||||
PARAMS(modulus)
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000011, 0x08000000};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000022, 0x10000000};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000044, 0x20000000};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xffffffee, 0xf7ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000022, 0x10000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000121, 0x10000000, 0x00000001, 0x00400000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000044, 0x20000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000242, 0x20000000, 0x00000002, 0x00800000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000088, 0x40000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000484, 0x40000000, 0x00000004, 0x01000000};
|
||||
static constexpr storage<limbs_count> m = {0x8c81fffb, 0x00000002, 0xfeccf000, 0xffffffff,
|
||||
0x0000907f, 0x00000000, 0xffffffbc, 0x1fffffff};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffffe1, 0xffffffff, 0xffffffff, 0xffffffff,
|
||||
0xffffffff, 0xffffffff, 0xfffffdf0, 0x07ffffff};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000121, 0x10000000, 0x00000001, 0x00400000};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000},
|
||||
{0xf41337e3, 0x2a616626, 0xac8320da, 0xc5268e56, 0x4329f8c7, 0x53312066, 0x29a2995b, 0x06250239},
|
||||
{0xee6feebb, 0x3ada5e1d, 0xe4412e87, 0x98c62155, 0x2f9c676e, 0xc90adb1e, 0x0de874d9, 0x063365fe},
|
||||
{0x6021e539, 0x8337c45f, 0xbbf30245, 0xb0bdf467, 0x514425f3, 0x4537602d, 0x88826aba, 0x05ec467b},
|
||||
{0x9b48a8ab, 0x2225638f, 0x1a8e7981, 0x26da375d, 0xce6246af, 0xfcdca219, 0x9ecd5c85, 0x0789ad45},
|
||||
{0xb2703765, 0xd6871506, 0xf9e225ec, 0xd09bd064, 0x10826800, 0x5e869a07, 0xe82b2bb5, 0x0128f0fe},
|
||||
{0xdd4af20f, 0xfdab65db, 0x56f9ddbc, 0xefa66822, 0x1b03a097, 0x587781ce, 0x9556f9b8, 0x000fcad1},
|
||||
{0xff0cb347, 0x9f1bc8d7, 0xd0e87cd5, 0xc4d78992, 0xdd51a717, 0xbc7924d5, 0xfd121b58, 0x00c92ecb},
|
||||
{0xc13a1d0b, 0xcc4074a0, 0xe3bc8e32, 0xa1f811a9, 0x6d4b9bd4, 0x0234b46e, 0x7880b4dc, 0x011d07d9},
|
||||
{0xec89c4f1, 0xa206c054, 0xdc125289, 0x653d9e35, 0x711825f5, 0x72406af6, 0x46a03edd, 0x0659d839},
|
||||
{0x0fa30710, 0x45391692, 0x11b54c6c, 0xd439f572, 0xa3492c1e, 0xed5ebbf4, 0xb5d9a6de, 0x010f4d91},
|
||||
{0x7afd187f, 0x9273dbbc, 0x91ee171f, 0xdb5375bc, 0x6749ae3d, 0xc061f425, 0x6ec477cf, 0x003d14df},
|
||||
{0x3112b02d, 0x8171e1da, 0xadf9bf78, 0x5c4564eb, 0x5689b232, 0x68c34184, 0x6538624f, 0x0363d70a},
|
||||
{0x606082e1, 0x3e5a42f0, 0x76fc314a, 0x5edd09f0, 0x0f673d7c, 0xd650df25, 0x34832dba, 0x0393a32b},
|
||||
{0x13a77460, 0xe3efc75d, 0x62ef8a01, 0x93898bc8, 0x8bdbd9b3, 0x1c3a6e5c, 0x611b7206, 0x034b5d5d},
|
||||
{0x309d9da9, 0x80ee9837, 0xf51eddbc, 0x1646d633, 0x4901fab8, 0xb9d2cd85, 0x9978ee09, 0x01eb6d84},
|
||||
{0x2755bfac, 0xa7b1f98c, 0xeb7aa1c1, 0x9ec8116c, 0x3109e611, 0x0eeadedd, 0xc9761a8a, 0x06a6f98d},
|
||||
{0x9745a046, 0xce7b0a8b, 0xe411ee63, 0x7ff61841, 0x635f8799, 0x34f67453, 0xef852560, 0x04768803},
|
||||
{0xbffaa9db, 0x1727fce0, 0xf973dc22, 0x858f5918, 0x223f6558, 0x3e277fa0, 0xf71614e3, 0x02d25658},
|
||||
{0x8574e81f, 0xe3d47b99, 0x7fc4c648, 0xc727c9af, 0xee93dc85, 0x581d81ca, 0xca8a00d9, 0x0594beaf},
|
||||
{0x0e5ffcb8, 0x00654744, 0xe7c1b2fd, 0x030530a6, 0xecbf157b, 0x27e46d76, 0xbeea04f1, 0x01f4c2bf},
|
||||
{0x3e3a2f4b, 0xead33145, 0xd6482f17, 0xd841544d, 0x8d24a344, 0x9822fb10, 0x31eeac7c, 0x03e43835},
|
||||
{0xb40bdbe8, 0x01af11c3, 0xb32a3b23, 0xd7c9c0a1, 0xcd0be360, 0x81cb2e43, 0xafb3df1a, 0x01054544},
|
||||
{0x77156db2, 0xf6b13488, 0xddc0f211, 0x1ad6f3be, 0xd664f4da, 0xe643d3ea, 0x174a8e80, 0x071a47b8},
|
||||
{0x4ca88ffc, 0xb86b03a4, 0x8ef9a25a, 0x6e3398e6, 0xf5fa4665, 0xce9a0d37, 0x5c437763, 0x06e8e769},
|
||||
{0x4586dbc3, 0x32609f1d, 0xaa2da684, 0x03148f22, 0x4795d346, 0xa679e36b, 0x9e51225c, 0x03d8d2c7},
|
||||
{0xea5f81cf, 0xeac5be9e, 0x64c12e72, 0x102e16b2, 0xfee282e4, 0xce0bc0d9, 0xa93b28f3, 0x01f05206},
|
||||
{0xbb6422f9, 0x258e96d2, 0x617c5468, 0x751615d8, 0x6056f032, 0x27145cb6, 0x81c06d84, 0x057a7971},
|
||||
{0xb030713c, 0xf42231bb, 0x3a96c59e, 0xae9c3f9a, 0xf1ee840c, 0x5397e8e2, 0xf2b87657, 0x05e7deca},
|
||||
{0xf81f58b4, 0x209745aa, 0x91af248d, 0x74a64310, 0xc04b00b7, 0xe566a8e1, 0x80fb4cea, 0x022bde40},
|
||||
{0x5de74517, 0x8265b62b, 0xb9b9f2c9, 0x6a788149, 0xa9565d98, 0x6fec2239, 0x573f0c28, 0x060ac0c4},
|
||||
{0xd3ce8992, 0xc129d0f1, 0x81c43de5, 0x719252eb, 0x48221e1a, 0xfea566de, 0x0be8ced2, 0x050732ed},
|
||||
{0x2216f1c8, 0x9aae0db3, 0xd7220015, 0x95e231ac, 0x6340df6f, 0xbd6ae160, 0x16a6e39c, 0x0166c8e2},
|
||||
{0x76b0a92e, 0x3ccd9d2b, 0x7d671a9d, 0x1feb39d7, 0x2109fd56, 0x3c49a630, 0x5d4ec292, 0x07badc4b},
|
||||
{0x5dd8c4c3, 0x081c3166, 0xec14ba21, 0x9dca12d8, 0xcf93b2e5, 0xf58069e2, 0x571ddc34, 0x02399005},
|
||||
{0x08a616fc, 0x65a19cf4, 0x8aea6ff7, 0x860d442c, 0x6896a559, 0x4f24ab19, 0x3d7f5ae6, 0x0685db92},
|
||||
{0x622478c4, 0x051093f0, 0x3fab8962, 0x5c200627, 0x21254c39, 0x2aa7ae1b, 0x7b116fb9, 0x0100fff9},
|
||||
{0x00637050, 0x2693b834, 0x22440235, 0x3fef7c1b, 0x3481c4fe, 0x31150ac1, 0xf261b6de, 0x0772cb7a},
|
||||
{0xd990d491, 0x6966804c, 0xc7505f35, 0x46aba1bc, 0xaceeb7f7, 0x4f696cba, 0x6474b8f0, 0x02b73cad},
|
||||
{0xf39cd3e8, 0x7d13e948, 0x62a1db76, 0xd5c33593, 0x4d1be159, 0x7fd3b59b, 0x3676644e, 0x066d3f61},
|
||||
{0xb3bd8b7e, 0x5a896ef3, 0xba5762ab, 0x2319450a, 0x1a545f8b, 0x226f0a07, 0x55446d35, 0x02760973},
|
||||
{0x140e5623, 0x38eaa186, 0x94be15ba, 0x5a48d469, 0xad75d32a, 0xe4f1f15b, 0x2f14e2f1, 0x039ccdaa},
|
||||
{0xe6fcfdb2, 0xad7108d3, 0x9c9f7f04, 0xfadfc050, 0x9df95366, 0xdbb20071, 0xe555c739, 0x02c4d3fa},
|
||||
{0xc3111bcb, 0xb640956f, 0xbb11fb86, 0xcd942bbd, 0xa3db81cd, 0xa4b4eb09, 0x684fdb65, 0x041ed5ed},
|
||||
{0xdd5ca525, 0x462b41fa, 0x153c3d28, 0xbcc17ccd, 0x6b06db5c, 0x8a81d137, 0x4a050358, 0x05f5cf39},
|
||||
{0xcc60fb85, 0x374012a6, 0x34d1905d, 0x978f9785, 0x4e17ff38, 0x713383d4, 0x1055c25d, 0x07f3796f},
|
||||
{0x0643771f, 0x852ba56e, 0x86781a31, 0xadfa956c, 0xb26a3811, 0x2ee2fccf, 0xdbd56ba7, 0x009214ce},
|
||||
{0x68bc148c, 0xe2bf6c4b, 0x01c203ce, 0xd38dbf38, 0x97923b55, 0x27f73df4, 0x5081f7d9, 0x030a2e81},
|
||||
{0xf11422a0, 0xbe23b78f, 0x99cdc2e0, 0xd4f3510d, 0xaa13ffe5, 0xcb05b3da, 0xc724e0c5, 0x028d98a5},
|
||||
{0x96934000, 0x15277271, 0x588c8a51, 0x8013dd5e, 0x9ed55af8, 0x77772f7c, 0x03549e60, 0x020895f8},
|
||||
{0x34db29f8, 0xc0cc8556, 0x67455b5d, 0x5582a9ff, 0x8a9a38b5, 0x12862a43, 0xa59fd242, 0x059655bc},
|
||||
{0x94ceaf98, 0x39bc5131, 0xc71ccc0d, 0x99f4d1a0, 0x54acb87c, 0xc565794d, 0xc33590ef, 0x0593fcef},
|
||||
{0xe97bf51c, 0xa2922d09, 0x3200d367, 0xdbb866a2, 0x4ad9302d, 0x05849ed8, 0xdf93f2b5, 0x000c447e},
|
||||
{0x850fb317, 0x2755d6c2, 0xd45eb3f5, 0x36feeeea, 0xdfbc1d97, 0x4f4471d7, 0x4e3003f8, 0x07ec8926},
|
||||
{0xb6a791f1, 0x38b8dc2a, 0x27a1bbb1, 0x79d6de48, 0xcad54cf2, 0x78c40b06, 0xa43bc898, 0x036dd150},
|
||||
{0x1cc4133c, 0xefa72477, 0x477d39be, 0x5327d617, 0x2c5db3a4, 0xfd1de1f9, 0xc9a18a1c, 0x0147819b},
|
||||
{0xf8133966, 0x275e6b02, 0x87969b48, 0x82bc79b9, 0x5d1e2f0e, 0x85b1f9bd, 0xc819531b, 0x00f9ea29},
|
||||
{0x120edfab, 0x9e0392a5, 0xe3681a15, 0x07403ad4, 0x8a1c3817, 0xa8d469d8, 0x89f15c6f, 0x0395e7fc},
|
||||
{0x641826ac, 0x7f405a9f, 0x6861e2ce, 0xa566e755, 0xba82a050, 0x8a3a08ba, 0xea63598d, 0x071dd923},
|
||||
{0x5f65c188, 0x1d2b7538, 0xd6fc9625, 0xcb704d0f, 0xf59deccc, 0x18729111, 0x52fe1979, 0x07595020},
|
||||
{0x8a08756f, 0x0175aa1c, 0x7fa7c6c4, 0x9a76a312, 0x6e93f6f3, 0x0bfa523a, 0x258c2f23, 0x03d70de4},
|
||||
{0x8229376d, 0x8a0b9d02, 0x2c65c94e, 0x08421430, 0xd34b0aa6, 0x1160b441, 0xbbfb9491, 0x03b9eb75},
|
||||
{0x827caf53, 0x91874856, 0x37e8a006, 0xdfdcae7a, 0x04e3af6b, 0x6dcfc3f2, 0xba66ff37, 0x0592823d},
|
||||
{0x72fb8b0d, 0xb0a6628d, 0xa72b1f03, 0x7d3eef8b, 0x8dd54dbe, 0x5be965ba, 0x96d1fe4c, 0x0114a278},
|
||||
{0x06051d55, 0x0256d8e6, 0xb9fa9dcc, 0xbf152353, 0x44140d6e, 0x6ef2c68c, 0xc9c0fea6, 0x015f291a},
|
||||
{0xed992efc, 0xa1826724, 0x771da991, 0x9a58fd99, 0xd0b370a1, 0xce51a153, 0x826df846, 0x03c53bf5},
|
||||
{0xcc7bf8c3, 0x3909aad7, 0xb08ddfa2, 0xd408ae7d, 0xff94d9fc, 0x2e9ab5d6, 0xf11cbcf6, 0x0020a1b2},
|
||||
{0x3e257b43, 0x448fff07, 0x5fd9edca, 0x00f4a128, 0x7b429f71, 0x6f8987e3, 0x0fc8b522, 0x013336c1},
|
||||
{0x062bd860, 0xef78ac4c, 0xf5d787d2, 0x6539ee52, 0xbb65576e, 0x113b6071, 0x9f3d7f85, 0x0160e952},
|
||||
{0xf966d24e, 0x0c4e7c07, 0x318277e8, 0x011853d8, 0x7c287f58, 0x93bae650, 0xf64289f7, 0x00b974a1},
|
||||
{0x30408cb9, 0x66d19420, 0x0430b017, 0x709ca6c6, 0x23d95951, 0xb174ad46, 0x111f4192, 0x030762f8},
|
||||
{0xf246c901, 0xb9d70015, 0x57a1cdec, 0xd3616cb1, 0x0d732fdb, 0x61aab25e, 0x12d620d8, 0x0712858b},
|
||||
{0x16334e1a, 0x8ec7e113, 0xa96aeeab, 0x0021a55b, 0xfd639175, 0x8f4c1366, 0x69bc866a, 0x07acdde9},
|
||||
{0x23088fc7, 0x1fb24e5e, 0x92a88089, 0xcacd65df, 0x17343c48, 0x103ec3c8, 0xc387a3b5, 0x03d296b9},
|
||||
{0xcd9fedee, 0xae703c5b, 0x7853b30d, 0xd0c3e0c6, 0x12abaef5, 0xc1e326b3, 0x5d57bb23, 0x04f42d7f},
|
||||
{0x1824b92c, 0x19cd1b4e, 0x81ebc117, 0xc5daaff4, 0xb8183a1d, 0xeeedaa59, 0xe28baf8a, 0x069d8f0c},
|
||||
{0x9dc50729, 0x9733e8df, 0xf1b9f411, 0xd7e0dbb9, 0x50edf7ea, 0x59e4dbd2, 0x4059cb5f, 0x002259fe},
|
||||
{0xb79a92b1, 0x5e3197fc, 0x59086db1, 0xbfddf5c5, 0xdbea4a69, 0x234d8639, 0x4d0a367d, 0x05dd79b0},
|
||||
{0xa86eec0c, 0x8cc1d845, 0x573b44d7, 0x3cac8839, 0x7b0de880, 0x8b8d8735, 0x68c99722, 0x01c5ef12},
|
||||
{0xc2ba0f23, 0x12680395, 0x471f947e, 0xd43bcf85, 0xcc9d9b24, 0x19935b68, 0x108eec6a, 0x06263e1e},
|
||||
{0x5b7be972, 0x29617bad, 0xc55b1b68, 0x0ab73eef, 0x2544381e, 0x07f12359, 0x63a080a0, 0x0161444d},
|
||||
{0x312f9080, 0x07a4b921, 0x2f530413, 0x64c25a07, 0x7d71ca2f, 0x3f6903d7, 0x04838ba1, 0x06917cab},
|
||||
{0x10bdb6cc, 0xec7cfc1f, 0x3bcf85c7, 0x7046910d, 0x7bc3ff5f, 0x7ef09e22, 0x385306d4, 0x004b0b60},
|
||||
{0x3a41158a, 0x82d06d78, 0xaa690d1f, 0x37c4a361, 0x7117c44a, 0x700766e1, 0xab40d7e4, 0x031261d0},
|
||||
{0x91b88258, 0x384c5e8b, 0x009b84dc, 0xd777abd5, 0xe7eed224, 0x02102b55, 0xdbefe5e9, 0x03b22830},
|
||||
{0x8770a4be, 0xec982f60, 0x961f56ad, 0x4b92533d, 0xf428c4b9, 0x7df85fbb, 0x2d9291a4, 0x057e4876},
|
||||
{0xf4910a60, 0x6ace9477, 0x9fc63b7f, 0xdb5a705f, 0x72328369, 0x4cc157b4, 0xc282db6f, 0x05b8acbc},
|
||||
{0x57269216, 0x4c69edd9, 0xbfee24ac, 0xd04f1eeb, 0x2a069b18, 0xacda8418, 0x5990b523, 0x03761a4f},
|
||||
{0xc608d246, 0x7f2e2048, 0x4664959b, 0xd4f52ed2, 0x11c1d565, 0x354e3bf7, 0x457eabd3, 0x0156d837},
|
||||
{0xd455f483, 0xea8cbefd, 0x5d940684, 0x33cd5725, 0x8091a287, 0x2d89a777, 0x939b3ef3, 0x06159e4a},
|
||||
{0x4fa405aa, 0xe43439f1, 0xdbe5763d, 0xa258cfc7, 0x78d7b607, 0x9491173a, 0x9ad23eac, 0x01775d66},
|
||||
{0xd772d637, 0x2413e92c, 0x5eac4588, 0x22c99c9f, 0x71a0cdd2, 0xa2bd1d06, 0xfdd73a36, 0x05e88acb},
|
||||
{0xb2bfa1ad, 0x68886b35, 0x35d2dfb6, 0x7a969b62, 0x9767a44a, 0x359ddb45, 0x52e5da6d, 0x00f1a46e},
|
||||
{0x1c5a4861, 0x4ef9fe94, 0x1c841a89, 0x1540cf67, 0xa9bed4f5, 0x8b51336f, 0xf63c32ab, 0x0240fc41},
|
||||
{0x87086e50, 0x7f5c626d, 0x049c46e2, 0x38ec0386, 0x0c597ea7, 0x30b003fd, 0x6660a912, 0x07a8faa1},
|
||||
{0x7dac5d19, 0x2810d2b4, 0x80339f39, 0x040470c4, 0xc946ab30, 0x30d97769, 0x52667151, 0x019fa1f9},
|
||||
{0x5e7c57a2, 0x00e13c8e, 0x2a0fb7bd, 0x95490ca0, 0x08451e35, 0x6af2b76d, 0xcf78c579, 0x04c3a3a1},
|
||||
{0x55e39071, 0xa848b2f2, 0xf132ce21, 0x6831da1d, 0xe080e2ec, 0x439bdda4, 0xadd19a7d, 0x06680f09},
|
||||
{0x6be27786, 0xfebd2a8b, 0x093a5a7f, 0x2cdd8f78, 0xdcb004b3, 0xbc0746a1, 0xd12450ed, 0x005f950a},
|
||||
{0x39759f39, 0xe1462ca6, 0x7bbe087d, 0x0c37dca2, 0x0c8661cb, 0x198de347, 0x7e531b52, 0x03602655},
|
||||
{0x66d7eb25, 0xaf24ead2, 0x5ee6eb03, 0x27cea560, 0x4f6267c7, 0xe9aa6d50, 0xe5dd28e0, 0x00c962b1},
|
||||
{0xb11706c9, 0x3c3407a5, 0xcf0e1b88, 0x44370686, 0x9fbda5e3, 0x5d0e7af0, 0x41cf0a6b, 0x010d235f},
|
||||
{0x358cfcc2, 0x1fbc42a3, 0xc78f7dac, 0x5a2e6ea2, 0xa12773f2, 0x33e089ca, 0xed7788c1, 0x04bef156},
|
||||
{0xbea42f88, 0xdb150649, 0x5f3fb72a, 0x71329f69, 0x86b82de7, 0x7aa46ad0, 0xc6093912, 0x07913b17},
|
||||
{0xb3b67067, 0xb2b074ae, 0xc55f4455, 0x4f17674d, 0xdeb0740d, 0x9a112816, 0x316cc0d3, 0x06bd0cde},
|
||||
{0x1a264ab3, 0x962ceb6b, 0xd99f7159, 0xd5930255, 0x24a4096e, 0x7db961b0, 0x3e50dfed, 0x050c8e5c},
|
||||
{0x443af109, 0xc3eebe54, 0x86946633, 0x2ca03fcb, 0x04badff6, 0x6e6eef04, 0x82210754, 0x05d92ab7},
|
||||
{0xa5c0dca4, 0xcbadd8ad, 0x5ac103a0, 0x4cf688cf, 0x26e5d435, 0x571dbdb9, 0x220fc7db, 0x074ffc4d},
|
||||
{0x88740c3e, 0x70b80432, 0x03821aa8, 0x4a959d50, 0xe4df06d8, 0x3eb8c3a0, 0xcac57496, 0x025a425b},
|
||||
{0x55205413, 0xdcadfd29, 0x90b17b01, 0xda7456d2, 0x73696a28, 0x437c2fda, 0x329f6855, 0x00a8a188},
|
||||
{0xa828431e, 0x3cde2cdd, 0x9ed29340, 0x60e6c362, 0x7c13e145, 0xef00dfa9, 0xba288c0b, 0x04159bec},
|
||||
{0x9065f8ee, 0x41d351cd, 0xa4845868, 0x4e2e298f, 0xbdb3834a, 0xbcba6ac1, 0xea85f2ec, 0x042c8871},
|
||||
{0x1fda880f, 0xc4dc0d20, 0x26fc2d5c, 0x4f0f9dc4, 0x86839de7, 0x2c555343, 0xf698dd8f, 0x04d12da8},
|
||||
{0x21bd655a, 0x3a6299bd, 0x8cfd772f, 0x2e4aea22, 0xd2c2590d, 0x09716ad9, 0xb298587d, 0x053b143c},
|
||||
{0xa95e3cbf, 0xd35f3e32, 0x04eac3cf, 0xe380dee7, 0x0f7e3e6b, 0x27e6570a, 0xbed46774, 0x008cd288},
|
||||
{0x9583f023, 0xe42676b0, 0x75cfaa7e, 0x39d57dd6, 0x4f0bb727, 0x10d4a8d0, 0x27c81bdd, 0x016b03c9},
|
||||
{0x4decc603, 0x89b394f7, 0xd24690f4, 0xd7322ee9, 0x947a00fd, 0xbbc12961, 0x82e8fa75, 0x00886d23},
|
||||
{0xeb0faad4, 0x7b48a33b, 0x60e0b0c8, 0x4c11ef26, 0x36f0f791, 0x4163a401, 0xa4074faf, 0x07986fea},
|
||||
{0x31d9587e, 0x96044919, 0x9049fd2d, 0xb1cab341, 0x9c0eea09, 0xf28c83c9, 0x5c6620aa, 0x033b74dd},
|
||||
{0x13ee028c, 0xde558d16, 0x5d4233b0, 0x4dcf3932, 0x2e422803, 0x7bd46887, 0xe1261bff, 0x04b4757d},
|
||||
{0xd48e9b00, 0x6c80848f, 0x10b6a121, 0x937c1e6e, 0xe9f2008c, 0x7782f8b8, 0x2bc7171c, 0x00217358},
|
||||
{0x324228d8, 0xba523265, 0x682ee17c, 0x4ebe5506, 0x3be009f9, 0x6c646fe8, 0x8594b924, 0x046de7bc},
|
||||
{0x3b50645a, 0x270aa33a, 0x2a9c6282, 0x28fd23fd, 0xcfe96515, 0x5b2fa771, 0x3f812377, 0x063039de},
|
||||
{0xaba4060a, 0xa1da52b0, 0x0374be67, 0x7f191fd6, 0x0d7d2126, 0x14c64d05, 0xf7f77381, 0x00419cb7},
|
||||
{0xe4b19319, 0x07eda692, 0x0fef654e, 0x6190d3f6, 0x0b21ca7e, 0x893b0916, 0x073c48b4, 0x0367a3c7},
|
||||
{0xc520e3ea, 0x8fd405b2, 0x487e93c9, 0x73b4f714, 0xd5142cff, 0x70b7ee88, 0xa320eca2, 0x058fb800},
|
||||
{0x72ef3623, 0x3b5a8740, 0xaff370fd, 0xbff4af42, 0xe338258e, 0x64c137b0, 0xc7afafca, 0x05ac9917},
|
||||
{0x82ccc89a, 0x99c46a0d, 0x9ff87868, 0x05ae3209, 0xa489481f, 0x6249b2a4, 0xbaead348, 0x0056c235},
|
||||
{0xba0ea95e, 0x5a0640f3, 0xc03af976, 0x518db5cd, 0x5a250a06, 0x1c3223aa, 0xbc3442eb, 0x0397b942},
|
||||
{0xacf14a4f, 0x164f0705, 0x33eb6c0e, 0x386c2325, 0xd7264573, 0xdfaceff6, 0xd1e22f80, 0x00e94509},
|
||||
{0x9ff51bc7, 0x8964ee48, 0x57bbca04, 0x3e0f5037, 0x6510630c, 0xe78d6c8d, 0xdf0a61c1, 0x041d6351},
|
||||
{0x45aa1b58, 0x47892f3b, 0x915c1c70, 0x5a1787ba, 0x67f20d25, 0xbaa23359, 0x0c4bc4be, 0x00e1919f},
|
||||
{0xb9975332, 0x2a87c37a, 0xcdecebc9, 0x95db523f, 0x1d0db226, 0x703949ee, 0x4c3842dd, 0x03152c1d},
|
||||
{0xecfb6f72, 0x0eff7e6a, 0x9493a628, 0xb3a83455, 0xd596cd51, 0xced58dd1, 0x25ee51ff, 0x033dee78},
|
||||
{0x72a30547, 0x1f4047ca, 0xd40b6d0f, 0x9feefa06, 0x94db1b38, 0x836ffd80, 0xa0992ed5, 0x037c79f6},
|
||||
{0xceb3dffd, 0x7ffa095d, 0x768e2cb3, 0x23097a65, 0x373f6222, 0xd228b1f9, 0xc57feea2, 0x06309a6b},
|
||||
{0xecd4c6f7, 0x7a5bead4, 0x7e70f7de, 0xab92043c, 0x220db8d8, 0xf78f890e, 0x2865a07e, 0x052eeb98},
|
||||
{0xdf253531, 0x8e9a6336, 0xbafa937b, 0xb24b664a, 0x303b1f5a, 0xc89f660e, 0x876bd8c7, 0x07ea9749},
|
||||
{0x1d4c3fec, 0xd958e726, 0x06fbef31, 0xa5eb368f, 0xba6a027d, 0x0c911679, 0x5f80f992, 0x06321b51},
|
||||
{0x046b49b2, 0x3ca61d9e, 0x6aa9c29a, 0x616a47d6, 0x9e9462dc, 0x27a7ffeb, 0x8971b70e, 0x0794ed38},
|
||||
{0x9f47496f, 0xdb259a57, 0xa6b0481c, 0x7f3e3f90, 0x4afab47a, 0x76f42726, 0xc5a79505, 0x07b9da96},
|
||||
{0x57e7aeed, 0x908e6450, 0x81648127, 0xe86db2fb, 0x8dd76882, 0x53f3c573, 0x72327da6, 0x02b37324},
|
||||
{0x73a220ec, 0x82a941c9, 0x7f25beea, 0xb4cbecb7, 0xbfb061d6, 0x746ded71, 0x641b3f3d, 0x00f7af27},
|
||||
{0xcbd4ba67, 0x69b8f4df, 0x3d526981, 0x5ee3ac6f, 0x145cef8c, 0x9372af4e, 0x72a31ef1, 0x05cc1cc6},
|
||||
{0x62d1ba57, 0xce898b0d, 0xee3fa47e, 0x86ba0504, 0x4395b70d, 0xc68233b1, 0x80eb8d60, 0x024cfa58},
|
||||
{0x74d51c41, 0x8fa83850, 0x60f8f9da, 0x5824a285, 0xaf1bea48, 0xa7a2067e, 0x5455acc3, 0x04ba49f2},
|
||||
{0x324c6039, 0x0a1e223e, 0x7b18a9d0, 0x28312228, 0x88b6ecda, 0xb60c1f93, 0x687ba365, 0x053097d8},
|
||||
{0xa7dae551, 0x5604b398, 0xe2e11609, 0x51f02e33, 0xe58e2094, 0x0b51a085, 0x3a3ecc28, 0x078679d6},
|
||||
{0x92d52444, 0xe24b5528, 0x33d0fa70, 0xf77e35ad, 0x9bcbfb57, 0x8af5a7b7, 0x022748d2, 0x015c5f15},
|
||||
{0xc993b168, 0xc002185c, 0x293ad856, 0x5586addb, 0x8ec50726, 0x69c1bfcf, 0x5fd97ea1, 0x00d514fc},
|
||||
{0x8866c747, 0x52d7a9a2, 0x01d6ee05, 0x9bd77465, 0xc3a87a88, 0x576adf96, 0xfa69f0ec, 0x0693e89a},
|
||||
{0x05903be3, 0xcfe50d90, 0xcf739179, 0xbe651dd1, 0x2ae70678, 0xba80ffda, 0xb55b06cc, 0x051dbe40},
|
||||
{0x5585a6f0, 0x4adb5947, 0x9fa37e68, 0x14634b99, 0xa2a910a8, 0x27da5fbf, 0xa99c704d, 0x022a91ce},
|
||||
{0xe2ddaacd, 0xfabab7b8, 0x60cf9603, 0x1edf6a83, 0xbfadddd3, 0x20b04218, 0xa81dbffa, 0x03e0ddb6},
|
||||
{0xda25c9fd, 0xf9c1e3a3, 0xac57ece3, 0x41ff4e1e, 0xdd684055, 0x9ba50868, 0x46d8156a, 0x01b30314},
|
||||
{0xab76a462, 0x30e067cc, 0x08f1b99b, 0x2d84c4c2, 0x73edc56f, 0x6b399ae0, 0x62cfacb2, 0x02f187e1},
|
||||
{0x34fc5356, 0xb085758e, 0xf805fedf, 0xbafe9a1c, 0x95272d01, 0x0bcf423c, 0x1feca651, 0x01df4a81},
|
||||
{0x4c264e97, 0xd3bd9833, 0xc08b1798, 0xc0b192be, 0xdc3ed49e, 0x42724e80, 0xbaee9a58, 0x04100303},
|
||||
{0xe49749c9, 0xb653c919, 0x09f8e2fc, 0x07dbe557, 0xca71e551, 0xbb172d28, 0x7989c8fd, 0x07f5f801},
|
||||
{0xdf1d9004, 0x9412a9f3, 0xbe90d67e, 0xddcf6d66, 0x4692f803, 0x1dbfd679, 0x524c2944, 0x04f4fae1},
|
||||
{0x5707d134, 0xd413afdf, 0x887fd7e9, 0xf8a339cf, 0x84883580, 0xf74544f4, 0x851739e0, 0x0554f72a},
|
||||
{0x59824907, 0xe3827564, 0x421182c9, 0x352eab2a, 0x8f8530f2, 0x19138257, 0x20275950, 0x04e3bf44},
|
||||
{0x33f928b7, 0xef7660f9, 0xf5952362, 0xb7cb0619, 0xf17eb8d7, 0x5b24913b, 0x8e8b8082, 0x00f4804c},
|
||||
{0x5bd84f3e, 0xe7020613, 0x736a1659, 0x7ee777e1, 0x0795844b, 0x34ca7cb6, 0x7503ddc3, 0x07ce12e4},
|
||||
{0x6d8408a5, 0xbbbafb3f, 0x519dadca, 0xe0f02915, 0x0670f5d4, 0x5acba199, 0x4a93340f, 0x0056db45},
|
||||
{0xe404f6c5, 0x73f8a435, 0x01731858, 0x68cd3f7a, 0xd01f3de9, 0x214d3134, 0xd5d75a88, 0x05fb76be},
|
||||
{0xf976eb41, 0x3a66ad86, 0xcd08787a, 0x6401b6d3, 0x7d1e82a8, 0x575950f3, 0x55ee9d49, 0x00e34b33},
|
||||
{0x0cc5cbf4, 0xbff2f4e6, 0xec205dcd, 0x5a6b430d, 0xc94862af, 0xa8114ab3, 0x2fe8be1f, 0x0247ecf5},
|
||||
{0x8b98bf40, 0xded3bc57, 0xe26b66b3, 0xb658c8c4, 0x8d4220db, 0x8bd91c55, 0x94d2adea, 0x00d109f2},
|
||||
{0xedeaec42, 0x0fbfd336, 0x5d407ae8, 0xd94f928d, 0x727e74b5, 0xe5e4a16b, 0xc8c22dd8, 0x06a550df},
|
||||
{0x135e0ee9, 0xe378a012, 0x856a1aef, 0x5be86512, 0xd8febe77, 0x7de04ce2, 0xea43d59b, 0x03ddeed6},
|
||||
{0x005a1d86, 0xc04dc48c, 0x6f29053d, 0x64f4bbd2, 0x9be0aef5, 0x10b1b3db, 0xcc625a0b, 0x03745ca5},
|
||||
{0x1f4f0e85, 0x6c72bd40, 0xc2069cba, 0x4234afd0, 0xb99395f4, 0xc25b262f, 0xae0874e2, 0x0605f6a2},
|
||||
{0xdd756b6d, 0x9513e0d4, 0xf0c137cd, 0x5127a167, 0x7f01c538, 0x1a12a425, 0x00a4483b, 0x068b3aaf},
|
||||
{0x79bc6c86, 0x7a5b3e70, 0x375dc240, 0x5a337909, 0xe111d6ce, 0x46d6fe3c, 0x2ff2ca50, 0x02708b05},
|
||||
{0x1524ad8c, 0x1181eb95, 0x52294490, 0xd0744ddc, 0x848605cf, 0x88ed5b7b, 0xb478c12a, 0x04b9cb49},
|
||||
{0x27105dae, 0x98cb2411, 0xed5c1361, 0x3efa8fae, 0xd498e337, 0x6fa736a5, 0x1e369b4f, 0x038e3b07},
|
||||
{0x98c8db7f, 0xbc5915ae, 0x50425ae8, 0x1f3c8f96, 0xfa86658a, 0x77d60416, 0x28ec2dda, 0x02bc8b30},
|
||||
{0xb94bc10e, 0xad6794f2, 0x7e80093a, 0x7463b3f3, 0x90db4c79, 0x7bf5af53, 0x965c0cc4, 0x031531c6},
|
||||
{0x7cc1083d, 0x66425289, 0xa45d785f, 0x778ba471, 0xbbc94c16, 0xe3f5c599, 0x9b92e036, 0x02606413},
|
||||
{0xcf287faf, 0x191a2ea9, 0x823ddf07, 0xe6406a78, 0xaabe912b, 0xabcf2825, 0x7c48649a, 0x021dab44},
|
||||
{0x65375f6c, 0x9465d77c, 0x65370520, 0x924e189c, 0x918f0105, 0x8be0ca5f, 0xb1925509, 0x07586d27},
|
||||
{0x9302ac44, 0xe4fa93cb, 0xbf87d840, 0xf381ebbd, 0x44793049, 0x5027e7d9, 0xd3f09392, 0x0230b5c3},
|
||||
{0x31d48a82, 0x123e992e, 0x729d40e2, 0xef2990c6, 0x0f331903, 0x946813e3, 0x112a2c4d, 0x022f575e},
|
||||
{0xd4ee8cf7, 0x4b44764e, 0xdb576ebc, 0x4d44cff8, 0x0ab93ba1, 0xc6185d3a, 0x7e3f1e78, 0x0520c2d3},
|
||||
{0xbc46b8b4, 0xd9446736, 0x91e2ede1, 0xc7776293, 0x87689930, 0x0323845f, 0x379293ae, 0x061e359f},
|
||||
{0xb49b3a0a, 0x767a1747, 0x2b58f45e, 0x17e69346, 0x1425ad98, 0x10820519, 0x1b487ae5, 0x0367f384},
|
||||
{0x92f8ac25, 0xe0407696, 0x2beb71a6, 0x9ca9d269, 0x2f0c2471, 0x914017ea, 0xf421a10d, 0x07709cc3},
|
||||
{0xc3bb6a8f, 0x2c8ed622, 0xa2a1a8f2, 0x31c57cb6, 0x4bf6c316, 0x053924d5, 0x09563089, 0x0727b76a},
|
||||
{0x09dc6b5c, 0x567be37f, 0x9476eb5d, 0x57e36f45, 0xee5be5b6, 0xf68488dd, 0x2884c2d7, 0x05ac1ff1},
|
||||
{0x04173760, 0x0fc5b934, 0xda828f00, 0xe43272df, 0x2fad6e9c, 0x7e2ab5fe, 0x0a4995b3, 0x00e0a5eb},
|
||||
{0x42f8ef94, 0x6070024f, 0xe11a6161, 0xad187148, 0x9c8b0fa5, 0x3f046451, 0x87529cfa, 0x005282db}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000011, 0x08000000},
|
||||
{0x0becc81e, 0xd59e99d9, 0x537cdf25, 0x3ad971a9, 0xbcd60738, 0xaccedf99, 0xd65d66b5, 0x01dafdc6},
|
||||
{0x4bc9ca34, 0xc8e6df6f, 0x5397aaca, 0xab8bfbc5, 0x94813e6e, 0xb5ea6773, 0xe295dda2, 0x0446ed3c},
|
||||
{0x8145aa75, 0xd7981c5b, 0x3d174c52, 0xb14011ea, 0xe4721c1e, 0x647c9ba3, 0x6f6ac6dd, 0x05c3ed0c},
|
||||
{0x6e0bef41, 0x9de8c5cf, 0xcee1b9b0, 0xec349cbb, 0x2121589c, 0xfe72ab05, 0x24c7669c, 0x03b1c96a},
|
||||
{0x246766d8, 0xb878549e, 0xb5a03ab4, 0x8c5d8531, 0x7f1ec75e, 0x334a83ab, 0x46b146d7, 0x01342b29},
|
||||
{0x31055652, 0x8c71bd50, 0x6081f8c3, 0x2eedac49, 0xab013740, 0x25164a76, 0xbca84bf7, 0x05c0a717},
|
||||
{0xd0a6b4f5, 0x1ad37af3, 0x8ca50294, 0x6dc49fe3, 0x5d9529c3, 0x8357a7ff, 0xcefe8efe, 0x02c161bc},
|
||||
{0x296fbf1c, 0x90a5fa7f, 0xc977b113, 0x18226a39, 0xc178262e, 0x9362d5c9, 0x40d28de5, 0x03a362d3},
|
||||
{0x125ca33a, 0x04eeb1c0, 0x8437c604, 0xaa47a4c0, 0xa4d6bafe, 0x064426a2, 0xb8cc76db, 0x00ffbb44},
|
||||
{0x179e2ebe, 0xecf0daf8, 0x2574403b, 0x942e643e, 0x6bf06f7c, 0x684d31aa, 0x244c675c, 0x003b2bde},
|
||||
{0xfeccfccc, 0x96bc19dc, 0x269130b4, 0xbb26f74e, 0xd511649f, 0x15d57a9f, 0x7dcde3c3, 0x02d852a4},
|
||||
{0x44ad0610, 0xb4a47f4c, 0x06fa1b55, 0xdc2f028f, 0xd25979ac, 0xd73ddcd4, 0x076e7f5d, 0x06ba7cbe},
|
||||
{0x349eea63, 0xb0f43dd2, 0x3e64660d, 0x5e64466c, 0xc3bb94ce, 0x7206f426, 0xed4327aa, 0x036cb7c6},
|
||||
{0xf248b36c, 0x6503e80b, 0xe36060ec, 0xb93dd56f, 0x95c2c067, 0x6d3b2763, 0x155023a7, 0x038e7d59},
|
||||
{0xcdf92351, 0x140437ad, 0x2a5ab630, 0xb7a6e1b4, 0xd48175a5, 0xaa80b742, 0xd4afae89, 0x06a50046},
|
||||
{0xaea51997, 0xe8cde2cd, 0x417e3754, 0x612806f6, 0xb940adf4, 0xe40a4a07, 0xa33929b2, 0x063f5efa},
|
||||
{0x0c07573f, 0x0c0926df, 0xd8d4bee3, 0xa84e9027, 0x6bcd79ea, 0xf3776dfa, 0x523f55a8, 0x043a8517},
|
||||
{0x66984d05, 0x5b7e4e45, 0xdb8c30c4, 0xb9381de7, 0xae86e4f6, 0xd7c15128, 0x809daae7, 0x0718f1ad},
|
||||
{0xc1eae1a6, 0xe4fb0a7d, 0xa90a0813, 0xe5484134, 0x895df525, 0x24cca8f9, 0x1cedd2ee, 0x035fd390},
|
||||
{0x82e87775, 0x0a87a942, 0x971f450b, 0x9f2b4b62, 0x8eae6f09, 0x1dc5aecd, 0x1c5686a6, 0x07547fa3},
|
||||
{0x2e35511a, 0x785975cc, 0xa085c456, 0x4266bc82, 0x3abd5bfd, 0x45cf52e1, 0x7bd95ece, 0x019e8e43},
|
||||
{0xae580194, 0xfad72a75, 0x2989ac16, 0xf2bb5a00, 0x55f2b4d0, 0x53fee728, 0x9c7a91e5, 0x02b9f95d},
|
||||
{0x71200963, 0xb0062d2c, 0x1ac57a23, 0xe16e9f91, 0xc4bd9d3e, 0xaae7b169, 0x7f505f35, 0x07462151},
|
||||
{0x57e31913, 0xcf7bd10e, 0x6a4d0ee4, 0x1a360a91, 0x31869e35, 0xb2ba4914, 0x18005db4, 0x07a62d5c},
|
||||
{0xb4344711, 0x431f11e2, 0x6192c47e, 0x0cc3049c, 0xeb9c1bc3, 0x375dff93, 0x42071ee8, 0x03a75790},
|
||||
{0x9ed81498, 0x4eb14251, 0x98b804ef, 0x5852dbc5, 0x56d7f20c, 0xe0c1be13, 0x20d69181, 0x023e7f68},
|
||||
{0xe34f2d55, 0xf2eeb9b5, 0x2aad6f84, 0x63459f16, 0xbe37dbea, 0xf12099e7, 0x11b1a0fd, 0x06e45493},
|
||||
{0x0d6c93ed, 0x63032f6a, 0x5a04829f, 0xd99cbcc8, 0x89608b5e, 0x80f20416, 0x9df329f4, 0x00bf4231},
|
||||
{0x2710f927, 0xc7fc3d1b, 0x90d8503e, 0xc72d19af, 0x9940e689, 0xa9dcd3b8, 0x2da77ac9, 0x06fd386e},
|
||||
{0x08b27bc2, 0xc800035f, 0x4dfacc03, 0xd98987cf, 0x1256e525, 0x24f8fdbf, 0x1f104273, 0x04c575f1},
|
||||
{0x256c604a, 0x68b16e90, 0x6eba097d, 0x7f51023a, 0x1aeba9c8, 0x52c7629c, 0x4809d8da, 0x0575e850},
|
||||
{0x4ac81249, 0x7439d2f9, 0x4fc31ff2, 0x351e4a62, 0xb3906ded, 0x68fb8313, 0x08507a35, 0x007d43d8},
|
||||
{0x98859a12, 0xa87902b8, 0x73af55b3, 0x2f0d13e0, 0x1b9783c2, 0x5a46c66a, 0x2f5f71d4, 0x01045b06},
|
||||
{0x604fce1e, 0x0c379595, 0x7fccc2b4, 0x20ab6eb8, 0xf1820ae7, 0xac0bc709, 0x93fb2b07, 0x07e7654f},
|
||||
{0x246c4bf0, 0xa0e40811, 0x816b15e0, 0xe12accf5, 0x17938138, 0xee417239, 0x2c9a34fb, 0x004e092e},
|
||||
{0xad2cd984, 0x6304351b, 0x4bf1aafc, 0x38546ca6, 0xf310e99f, 0x1fb81192, 0xb5376275, 0x07e89896},
|
||||
{0x7b2d141d, 0xe4376a0b, 0x6dac220c, 0xea1795e5, 0xb19e1901, 0xd778ab50, 0xa94c274f, 0x077df905},
|
||||
{0x16fcd6c7, 0x7039bab1, 0xa6ea1c94, 0x8eececb7, 0x0f122046, 0x84d26ab5, 0x22fd55a1, 0x053c5d48},
|
||||
{0x72f11f65, 0xd43eb7bb, 0xb2a566d6, 0xfb538785, 0x3f35cbf5, 0xccc2cdc6, 0x7112504a, 0x06df5a9e},
|
||||
{0x60ce9c30, 0x75efb55c, 0x3c541437, 0x991873ed, 0xdf0cbb3b, 0x37eaedcb, 0xb04c2858, 0x0278d7f0},
|
||||
{0x1a06866b, 0x5757dd4e, 0x6570fa7f, 0x15c176b1, 0xafe89a1d, 0x9981b57f, 0xee0cb14c, 0x03c57f4d},
|
||||
{0x503c31cd, 0x3438cd66, 0xc0736d4b, 0x34437e52, 0x2a9d1b28, 0xe825b769, 0x73c06ee7, 0x06955a3a},
|
||||
{0x5c5e530e, 0xbbf0995a, 0x6569a2f9, 0xdee304b3, 0x5bd1a886, 0x3b9c993c, 0xc9cd050a, 0x00f66017},
|
||||
{0xee755737, 0x3666e752, 0x74d0e317, 0xa13bfafc, 0x01d2f1bf, 0x17ab672a, 0x0778f525, 0x079dde3a},
|
||||
{0xed8a25e9, 0x96a003c2, 0x8f347cec, 0x45d258fe, 0x96ea14ac, 0x68ff148d, 0xe148eda9, 0x058f4ec7},
|
||||
{0xe2a700ab, 0x23baf732, 0x5202a945, 0x6434725a, 0x2e693363, 0xa19a338d, 0xbf2f39c6, 0x01d0ea7a},
|
||||
{0x3ab52589, 0x5e571cad, 0x92240361, 0xe2916bb2, 0xdff5e354, 0xe6f8897b, 0x2ffa4707, 0x02a62880},
|
||||
{0xef649a85, 0xaf446c62, 0xed4e461f, 0x14d8072f, 0x59993efa, 0x5a07f4e5, 0x72a3a652, 0x00dc28b6},
|
||||
{0xf21511df, 0x139299d7, 0x4854ebc3, 0x8914e707, 0xbfd102a9, 0x9f3b5913, 0x3a5af894, 0x009dc24f},
|
||||
{0x1f4ba4fa, 0x650e1d91, 0x1977bff0, 0x6ba67806, 0xaa9bbc1b, 0xffbdc531, 0x997408aa, 0x057b69b2},
|
||||
{0x65fb1a91, 0x25c03e81, 0x7fd22618, 0x8682f98b, 0xf46cb453, 0xcad67f13, 0x5a80e5c6, 0x060ca599},
|
||||
{0x94188f2a, 0xa7978a90, 0xdbb9338e, 0xd5fc8f0b, 0xcbdd84f0, 0xf8387e6d, 0xbbc743a3, 0x073ae131},
|
||||
{0x0415bbcc, 0xafd00c46, 0x0df4a52a, 0x1a00eb6c, 0x0b96b594, 0x1ec67c64, 0x8e26b699, 0x01cb82a5},
|
||||
{0x7f740f93, 0xf56319fb, 0x2e2f6ed7, 0xb40d559b, 0x75e19784, 0x63f96f04, 0xc31ba061, 0x06406929},
|
||||
{0xfa5a3239, 0x22349e8b, 0xb9ca6bf9, 0xe1236395, 0x9b0017a4, 0x76ae5a8b, 0x17b7af03, 0x06cfb4ce},
|
||||
{0xb51abfe6, 0x34938785, 0x1249edb6, 0x21f54c80, 0xab038972, 0x3bd1cc16, 0xa4a57a81, 0x0636b37f},
|
||||
{0xf88717cf, 0xfda4a9a1, 0xee19d402, 0xf8fcba35, 0x47c9ba1b, 0x1ac940f6, 0xdd991440, 0x013c0ab3},
|
||||
{0x3743adf4, 0x5082318a, 0x22440f94, 0x3293bae1, 0x8dd2d761, 0x4c2e6d7f, 0xcdc38c82, 0x07124118},
|
||||
{0x76198779, 0xb031f8b7, 0x1b6c1944, 0x6742f602, 0x894a6134, 0xa18290db, 0xaba037dc, 0x035289d8},
|
||||
{0x9f8a9b07, 0x4579e855, 0x4dca3764, 0x1e580662, 0xb8c8ef49, 0xda92152e, 0x8b54508a, 0x0444085a},
|
||||
{0x34696648, 0x7f670ce1, 0xc05768d9, 0x2f00108f, 0x390fb519, 0x2d00a444, 0x1cd6f914, 0x015c468b},
|
||||
{0xfe46c5f2, 0x00666cbf, 0x9f7174d6, 0xca4051c5, 0x8e4277f4, 0x1629882a, 0x6ee002a3, 0x00b3f261},
|
||||
{0xc1dbb4f6, 0x418a2b86, 0x9a6ca270, 0x9f453ccc, 0x1d457b20, 0x1966471f, 0x80fd1319, 0x00b4d831},
|
||||
{0x1c76c8b1, 0xa12f86a8, 0xc0125e48, 0x2772e424, 0x1459dfb8, 0x8d650644, 0xad06d01c, 0x02128e5c},
|
||||
{0x3472799c, 0xcc8cc7f6, 0x2f511cae, 0xfbd97f95, 0x5ebbff71, 0xadd8818b, 0x09af0983, 0x00520540},
|
||||
{0x8ec654cc, 0xcaab5dd4, 0x17ba15a9, 0xc05ad0a7, 0x36300a00, 0x4bda7469, 0x41bb0610, 0x02e486cd},
|
||||
{0x2d6be8b5, 0x077ba983, 0xfe89eb7d, 0xdd5e728f, 0x63f9c51f, 0xe3c872fb, 0xce639995, 0x01f2f7a8},
|
||||
{0xaa2ea7eb, 0xd82b1599, 0xa16489e0, 0x1be5d254, 0x173d3219, 0x19cb236a, 0x1fe63b23, 0x007dd45f},
|
||||
{0x19dba628, 0xa27cc4d3, 0x5fd2e061, 0xf04ac441, 0x9307a758, 0xc7405333, 0x28c40fe4, 0x0103c707},
|
||||
{0x54662aab, 0xb5129fd1, 0x59158f32, 0x2ec5b69b, 0x12c44eec, 0x6c7e6492, 0xe527abb2, 0x046e7c11},
|
||||
{0xe32d46fe, 0xb9bf4936, 0xb08ef006, 0xf23ae18c, 0xe6a5179e, 0x5352cc59, 0x5bf7c0b8, 0x0753a621},
|
||||
{0x9318db3a, 0x19f65bc2, 0x7e3d0014, 0x93ff3f79, 0x6beb580d, 0xf7f93c7f, 0xddd72603, 0x04fdb898},
|
||||
{0xe184a935, 0xf7e1f88f, 0x1ad510f0, 0x82a0f047, 0x4c9ab6ca, 0xce0f7c44, 0x5104a95a, 0x0552304e},
|
||||
{0x985bba5c, 0x06615580, 0xf487a1fb, 0x8ccd29a8, 0xeecf758d, 0xb3e15ed0, 0x857ce648, 0x05328783},
|
||||
{0x6cb042b0, 0x5d1d5a22, 0x0277083c, 0x64375cf4, 0x5fa82215, 0xe8947dab, 0x86932495, 0x05e72829},
|
||||
{0x8c3e2849, 0x5bf6f46a, 0x4924c8f4, 0x7e40314c, 0xdffd6118, 0x3c74a4ba, 0x2f8de20a, 0x05247cdd},
|
||||
{0xd0042d11, 0x25a418c5, 0x2f7da60c, 0x1b60ee9f, 0x02c0b69f, 0x61c041ad, 0x15670214, 0x0632d33a},
|
||||
{0x90e05a92, 0x32b03a5e, 0x78d1e8d6, 0xfb12a1b1, 0x5bc2f5d5, 0xb8af534e, 0xa032918a, 0x05ab4772},
|
||||
{0x0a711a9d, 0x096878a8, 0x6b083c8c, 0x87d070da, 0x87d06afb, 0x77931578, 0xf3104057, 0x03705277},
|
||||
{0xdf993e46, 0x502d2374, 0x35baf646, 0xc1cd2868, 0xe30aa213, 0xa61b54b6, 0xbce34b74, 0x02511017},
|
||||
{0x90a6b9b9, 0xcfb6c51a, 0x8be6ade8, 0x4e0b29ef, 0xd3832d74, 0xa8292467, 0x41ca1e45, 0x02ce7977},
|
||||
{0x3e672d5b, 0x25ee10aa, 0x28597504, 0xb0e60c63, 0xe263c827, 0x4a8d0567, 0xfadefeba, 0x01f4ec42},
|
||||
{0xa5a26158, 0x8b4b15e0, 0x88a71cf2, 0xa59b2df9, 0x5d734341, 0xde44f2e7, 0x4db8d2e8, 0x007a18a0},
|
||||
{0xb4d18100, 0x30fcf001, 0xf8ae0b4f, 0xcdaa5334, 0xe325615a, 0x67017b2b, 0xf0ccbf57, 0x016c6d47},
|
||||
{0xba937732, 0x66afc115, 0xc20be386, 0x917d4890, 0xa017c59d, 0x5dadccff, 0x986c39c1, 0x043fa44e},
|
||||
{0x08baa72a, 0xc57ec886, 0x052364ed, 0xe65a4680, 0x85f9a523, 0x0536b505, 0xfe744ee2, 0x03580609},
|
||||
{0x1bab1ab8, 0x88109415, 0x62f0fa74, 0x02244b19, 0x915618e0, 0x837fcd10, 0x942f12d2, 0x061b83d0},
|
||||
{0x687b7798, 0x823d0bba, 0x84a49784, 0x5f93174a, 0x2574af37, 0xcfd64159, 0xe108057c, 0x0290722e},
|
||||
{0x58a66036, 0x900a7031, 0x6153c2ae, 0xcb443378, 0xa6ccdffe, 0x4c48b8dd, 0xa06e955a, 0x049a9211},
|
||||
{0xea0b9dd9, 0x1b034532, 0x638c79ec, 0x11cba08f, 0x7c5b2d15, 0x16d00728, 0xbb9a759c, 0x05abcbcd},
|
||||
{0x1552d6af, 0x21b4f60e, 0xbed54865, 0x2f7ea9d2, 0x738befdb, 0x39378802, 0x97845360, 0x02adf76c},
|
||||
{0x4026bb92, 0x6e5eb2ca, 0xcbed5570, 0x18f3d8bf, 0xb655ac26, 0x2a5fc8cd, 0x3809a1c5, 0x0031cd25},
|
||||
{0x0ef5e011, 0x2d698950, 0xc018b82d, 0xc0668c45, 0xf520d325, 0xd180ff47, 0xa38122b1, 0x046714c7},
|
||||
{0x12df2cc7, 0x8dec8a4b, 0x963031f8, 0x5eb84a1b, 0x88525708, 0xb75ad701, 0x07df57bd, 0x02054a99},
|
||||
{0x82b2f616, 0xe0013d43, 0x7b385914, 0x2ad34c97, 0x11108f4b, 0xc9969223, 0x9c9fad59, 0x0183f639},
|
||||
{0x06b4dc38, 0xaca9dfbc, 0x962d5774, 0x85596bbc, 0x22f1cd7d, 0xd7023923, 0x2067b180, 0x04d3c939},
|
||||
{0xe4004173, 0x6d13e6ab, 0xaafe8726, 0x3495d095, 0x33dc3303, 0xa22d3e4a, 0x776d2e14, 0x0276dbb2},
|
||||
{0x68c539b6, 0xa03f83cb, 0x7b42a06e, 0xfd3fa839, 0xe8d45ac3, 0xea0f1f15, 0xa414b012, 0x061adb94},
|
||||
{0xb33fb188, 0xd22fc6e3, 0xf723dc18, 0xbebc7978, 0xf6c99f34, 0xa874b584, 0xf67ff454, 0x049beb53},
|
||||
{0x754bed16, 0x7c247948, 0xe50eac10, 0x4a84bcfb, 0xade97580, 0xc00d65df, 0xca79c5ae, 0x0763d73c},
|
||||
{0x7aadbe1a, 0x696e27af, 0x9d8e2a1f, 0x113535e0, 0x4c011766, 0x6953003f, 0xbb52558c, 0x0498a75f},
|
||||
{0x6e09cee7, 0xcf26e897, 0x299b63c7, 0x813a76f2, 0x0939904c, 0x67c02fa7, 0x7e0b9483, 0x045c41a9},
|
||||
{0x4af5adcc, 0xad979914, 0xc2c7c068, 0x7d9267f9, 0x21b4a0a7, 0xda4fa3f8, 0x3386c423, 0x03f4bcc9},
|
||||
{0xd1228595, 0xe5fcd634, 0x12fc8b7c, 0x5571b994, 0x244857f8, 0xd50dcd33, 0x263b93f0, 0x060dc1d6},
|
||||
{0xfee59c89, 0x7040a236, 0x78ceb168, 0x91a4301b, 0x19cdb36a, 0x973b55bd, 0x71008400, 0x06a1c58e},
|
||||
{0x6af1f351, 0x1d3c7ad7, 0xe8ad24dc, 0x8493c0c1, 0x48d5ffd9, 0x076f9dea, 0x5931555f, 0x00b9b2bf},
|
||||
{0xeaa5731c, 0xa3d54d89, 0xba84ee02, 0xfcc41a45, 0xcc1cdac8, 0x7c828f73, 0x5bfe9d23, 0x009c426b},
|
||||
{0x3f1f352c, 0x36fb314c, 0x9feb1120, 0x750a2a5f, 0xd7b06171, 0x3a2f19e8, 0x3b550cd9, 0x06de1885},
|
||||
{0xb69183f6, 0xefc03237, 0x979ee075, 0xb5a14fc3, 0x2dcb1d51, 0xbf114125, 0xb8eca2d3, 0x062364f7},
|
||||
{0x95375861, 0x575f1ea7, 0x80cc8dba, 0x30608586, 0xcf7a8f9f, 0x2beca9f5, 0x5fe60da4, 0x00dfc078},
|
||||
{0x0f86ded5, 0x312928eb, 0xb9c4f0cc, 0x646f5d3e, 0x2fbf14dd, 0x23c69382, 0xc44caa0e, 0x023aae90},
|
||||
{0x13e16243, 0xa7c92faf, 0x92efd5fc, 0x035a3e75, 0x86a744ea, 0x32f44d08, 0x1ea28333, 0x05b45217},
|
||||
{0xc41fdf22, 0xb557d203, 0x4bbc8f76, 0x9697570c, 0x81eaf742, 0x3a6a2cb5, 0xb0d03a0f, 0x07f2c08a},
|
||||
{0x2a18b73a, 0xca806385, 0xdb6a953d, 0xf2015d6d, 0xba5f67b9, 0x51d21a8e, 0x14807dd6, 0x051439d5},
|
||||
{0xf75051de, 0x7b6e0c13, 0x14dd1aa0, 0x114681fb, 0x0fd95a37, 0x72a1cccc, 0xa39e5bb8, 0x02f29d4c},
|
||||
{0x116529cd, 0x4808a0de, 0x5b941d1c, 0x1cf38580, 0xd70796f7, 0xc96a451e, 0x3f24e64f, 0x016d083f},
|
||||
{0x3cf155ee, 0xc71b78d0, 0x0c361b67, 0x0c04a134, 0x7756e4a9, 0xdb546edc, 0x2988eb2c, 0x03474404},
|
||||
{0xf30cef17, 0x1a0b3585, 0x864abd80, 0x63c1de29, 0xc0687c8e, 0x0c171d6e, 0xc9763a97, 0x0353aec8},
|
||||
{0x94192fb8, 0x0a2c9cff, 0x1a7f5bbf, 0x27320b93, 0xe5ceeb75, 0x465d2f9f, 0xd78f1cc3, 0x07ce6f99},
|
||||
{0xe8d1b26d, 0x0f899233, 0xb87a2984, 0xed4b44d2, 0x0bd6354a, 0x0c0712c6, 0xc7032f5c, 0x01eb2a31},
|
||||
{0x46b03b57, 0xc4c03fbd, 0x785ebbe8, 0x989b0ff3, 0x7f0bcb19, 0x5cada62a, 0xa97557c9, 0x01426410},
|
||||
{0x96fb0a26, 0xf1d2e82b, 0x1edb9ce3, 0xe270bc10, 0xfc7aaed8, 0x9549cfd0, 0xd90d7c9c, 0x03e8256c},
|
||||
{0x43ac9984, 0x14eef0ee, 0xa16d6770, 0x2903ff22, 0xa38fbfc0, 0xc66c2690, 0x8755440e, 0x0032a202},
|
||||
{0xf3601782, 0x46a07cf2, 0xaa71d137, 0x79f410f9, 0x8bcabc59, 0xc320c6f1, 0xf8ab64d8, 0x00a706cf},
|
||||
{0x8dbd8d4f, 0x8848a9f0, 0x0085061d, 0xeff89e69, 0xfee62fbe, 0x90e634a7, 0x2ffb456b, 0x03983046},
|
||||
{0xb272ed5c, 0x91ec28a8, 0xdc0cbb77, 0xf8529918, 0x3648d2c5, 0x8f896ddb, 0x74edaf19, 0x0668a86c},
|
||||
{0x128c9bd9, 0x341d5fc8, 0x6b3241c5, 0x592f87d8, 0xb2cc3c97, 0xf8cba6f2, 0x03f396ed, 0x03463bf1},
|
||||
{0xafd9d239, 0xcf3ae525, 0xea20b753, 0x06b8b7b9, 0x3408a993, 0xb2be1e49, 0x9f47063f, 0x02bcb200},
|
||||
{0xa0bd0bc8, 0x7ca02722, 0xb862774d, 0xce8b32ee, 0x5f8da059, 0x424ba5f0, 0x3bb422a0, 0x05c81961},
|
||||
{0x32fd8907, 0x137dad8c, 0xc95a3a5d, 0x301d5119, 0x8937ac08, 0x144b38c3, 0x39338de7, 0x00e66f0e},
|
||||
{0xcfc10885, 0xe68b8875, 0x96147e68, 0x4f24d49a, 0x43032c15, 0x5da9e6fd, 0x9bf25e12, 0x061ab0e6},
|
||||
{0x455c65ad, 0xeab29bbd, 0x2448be64, 0x1c7da0e7, 0x8eedfa1f, 0x8c2c1bcd, 0x698c1197, 0x0400e2d2},
|
||||
{0x04549c13, 0x335d3e9e, 0xd31585cc, 0x546f0d82, 0xe16dbbac, 0x350d5ed5, 0x113c53fd, 0x05f77544},
|
||||
{0x7d8f3b7e, 0x6aa75c04, 0x10a641ae, 0xc70851dd, 0x9a0750fe, 0x4d33edd4, 0xcd1b230f, 0x022802cf},
|
||||
{0xef8170e3, 0x59fa1903, 0x62995788, 0x464a73ef, 0x13369717, 0x338be7fd, 0x52d21278, 0x02e97589},
|
||||
{0x4856ddd5, 0x3f2deca8, 0xfced10e2, 0x969b10e2, 0x52860ee7, 0x09620dde, 0xb620fa3f, 0x04a169bf},
|
||||
{0xa03b49f1, 0xd9beb712, 0xe9af606e, 0x0798af09, 0x63e70b9a, 0xe37f9aea, 0xb35abd7c, 0x02542a44},
|
||||
{0xf6e78973, 0x335d4000, 0x76f1bb23, 0x7bc28fde, 0x1b30e9ca, 0x6cfdc907, 0x0400b651, 0x03ff88aa},
|
||||
{0x36433eaf, 0xfb862981, 0x4111cfa3, 0x15fdc659, 0xeab2909d, 0x569574b9, 0x3cd80f84, 0x01442360},
|
||||
{0xe85c4af3, 0xa8ed8f31, 0xe6aaf3da, 0xf7680fee, 0xc5c1772c, 0x2240e931, 0xaebeeb70, 0x04f44f6f},
|
||||
{0x8846e0af, 0x29de323f, 0x42c25319, 0x33f91593, 0x6cbadd58, 0x863099c1, 0xfd83e5b3, 0x06a603cf},
|
||||
{0x86c77703, 0x1bdd17f3, 0xe02db671, 0x8cee8e78, 0x0b6dffce, 0xed1627af, 0xa0d9b3cc, 0x04491984},
|
||||
{0xcb583661, 0x177f8f9c, 0x73d05bfc, 0x54122d0c, 0xebe37b4a, 0xa9231660, 0xd4826038, 0x06e885db},
|
||||
{0x13c253b9, 0x64cde875, 0x2fbc98a9, 0x8484bccb, 0x4885a9af, 0xbad877c5, 0x0cbc33b6, 0x03007c90},
|
||||
{0x47cfa357, 0x41eb9173, 0x325309ad, 0xb3f06289, 0xaa85421b, 0x029da7c1, 0x84de4bd4, 0x07b7eb0d},
|
||||
{0x56b831e2, 0x2c459a80, 0x321aba19, 0x2b99d098, 0xea73c0e1, 0x96237364, 0xe25ed0ed, 0x02f2c638},
|
||||
{0x9b388bf4, 0xfc8c3228, 0x82cd081d, 0xa4c371e4, 0xc85f75df, 0x11239026, 0x8892896e, 0x01f01c5e},
|
||||
{0x73457917, 0xce1dde59, 0x16dd8b49, 0xdfdaeb19, 0xbfd17b1e, 0x4289a976, 0xc842870a, 0x05e2cf7e},
|
||||
{0xc7705532, 0x72faa825, 0x8f7fe8c2, 0xd24bf942, 0xb695e31b, 0xb7403e13, 0xfc85a0c6, 0x02eac9e7},
|
||||
{0x1ddb2dff, 0xc47638e3, 0x799bb649, 0x78b91a13, 0x552588ed, 0x001800de, 0x9cd9425c, 0x01d0640c},
|
||||
{0xfb431e10, 0x159891e7, 0xa012b461, 0x2f2fb29a, 0xb3333e5d, 0xc1dca804, 0x9a47200d, 0x05b918ec},
|
||||
{0x2d5ce760, 0x379119b5, 0xda2ccdab, 0xf9911f75, 0x47b5c054, 0x92b09490, 0x7298d065, 0x0742a31e},
|
||||
{0x4a73d1f1, 0xe2a1046b, 0xc6ab4d9c, 0xbc85a747, 0xba0701f8, 0x79b0e699, 0xeebc6762, 0x05e5c2cb},
|
||||
{0xe0c0db50, 0xdc644b37, 0x2b8444d2, 0x26f7f083, 0x63479a84, 0x90acf2e7, 0x90ffe372, 0x0590d880},
|
||||
{0x83c0fc9c, 0x3dd1aba4, 0xcfb43020, 0x30a1051f, 0xaf5be716, 0x7d1ca380, 0x1ed8aed9, 0x01d56947},
|
||||
{0x0fa23690, 0x657df8c4, 0x32111be3, 0x61a12fe4, 0xe78236c9, 0xd6cc9942, 0x85e66191, 0x01709635},
|
||||
{0xc6a054f0, 0x96bf35ed, 0x004113cc, 0x9d1e411a, 0x1ac7a3ec, 0xccdb9bc3, 0xd08016b8, 0x07362425},
|
||||
{0x9721b035, 0x72744cce, 0x0beb72e3, 0xb87eb606, 0x60870c2e, 0x00c5e70c, 0x685d7c14, 0x029fa4d3},
|
||||
{0x86e52af4, 0x06d3a7a3, 0x70020878, 0x7b1c814a, 0x52e68007, 0x44373cb7, 0xe403540f, 0x041cf8c0},
|
||||
{0x76a27949, 0xd5dbc8bf, 0x27d9cd12, 0xb41449bc, 0xa7a667a1, 0x93740020, 0x0fbb4e77, 0x000bf807},
|
||||
{0x9969cfe9, 0x274ce281, 0x259ec27c, 0x3234d283, 0xe0b44f04, 0x9ff85b71, 0xffcc1006, 0x0298d060},
|
||||
{0x68ab54f8, 0x5cd8b289, 0x437eaab8, 0x42e3877f, 0x9318bd3e, 0x6490dc61, 0x4e54d968, 0x075b01f3},
|
||||
{0x7b64243c, 0x73100d65, 0x5c802f82, 0x692378be, 0x88184c0c, 0x00283dbb, 0xab6f4f0e, 0x0442efad},
|
||||
{0x72015722, 0xbe83b708, 0xe1cdcf0e, 0x2035319f, 0x398347da, 0x2b1b3351, 0x1a14b8dc, 0x061823d8},
|
||||
{0x378d9803, 0x1090948c, 0x4725c64b, 0x61a558cc, 0x7d7fcd91, 0x9e5bd3b5, 0x57ebda25, 0x061e02a0},
|
||||
{0xf8324dc8, 0x166b4a3c, 0x38133fda, 0xa25b9d11, 0x917171a5, 0x9d602950, 0x417d104e, 0x0632e48b},
|
||||
{0x6a61d5e0, 0x03b9f1b9, 0xe59cfbb7, 0xd906b740, 0x7892fbe4, 0x99a93267, 0xad1b8171, 0x06ddc2a6},
|
||||
{0x67fc3874, 0x6ae4355d, 0xb1ada695, 0x4fa456d8, 0x9f91ac43, 0x4e234065, 0x829d173e, 0x028da309},
|
||||
{0xfc695c2c, 0x1e08dd18, 0xfa687112, 0x1c0a2fad, 0xffd6302a, 0xeb5ebf01, 0xfd1d10f5, 0x012fd387},
|
||||
{0x236e65c9, 0x0b907f2e, 0xb1281d54, 0x92ba7a15, 0xc13f1d75, 0x07f0a6ad, 0xcd6d1e9c, 0x05dfe4e3},
|
||||
{0xc45f33f8, 0xd99cc41a, 0xd373165c, 0xc1c10a71, 0x2ce2936a, 0x6c809230, 0xa0498cf5, 0x018dc832},
|
||||
{0x7b222ad8, 0x8e881eab, 0xb6194efb, 0xc8b48774, 0x963c6b6b, 0x38452dfd, 0xe4c4e0f8, 0x02847f5a},
|
||||
{0x2bf4ad95, 0x2950bb4a, 0xdc39ffb0, 0x37f42c9b, 0x101253a8, 0x3814fa42, 0xb67f2ca5, 0x04d4a34c},
|
||||
{0xa9684ba0, 0x6c40fece, 0x3b13bca4, 0xc7108aad, 0xe7bff9be, 0x98ccc7ea, 0xe9b3b316, 0x048b3a6a},
|
||||
{0x08390a2b, 0x4d908260, 0x74b070bc, 0xd5a641d0, 0x910015c5, 0xc3b19274, 0xd5a998a7, 0x02ac8e74},
|
||||
{0x9698d605, 0x8de03acc, 0xa4c9137f, 0x3b8b720c, 0x354faf46, 0x5bbad6e4, 0xfd9e842f, 0x0054c120},
|
||||
{0xd65aead5, 0x305fa33f, 0x0fe296f9, 0xba02b164, 0x708efc94, 0x64cba43c, 0x8ad7f0ef, 0x034b9ffe},
|
||||
{0x13c2e8f4, 0x59e1179e, 0xc572f8a8, 0x5d823d59, 0x74003bce, 0x0cfdb6ee, 0x011c179e, 0x00763941},
|
||||
{0xa47999a8, 0x29b692ee, 0xbfcd80d8, 0x6436c3f1, 0x959768d7, 0x553444f3, 0x583896d4, 0x01d45a26},
|
||||
{0xc150b3f8, 0x0ce0791d, 0xf493c135, 0x7d3a0c1f, 0x5ede0712, 0x4d37cc23, 0x34fbae9c, 0x036a6a38},
|
||||
{0x2ca1eb78, 0xa8ee8204, 0x66d8b759, 0xc713a1dc, 0xac061800, 0x1813508d, 0x3b1f0da2, 0x05725ca0},
|
||||
{0xf2f391c1, 0xbe6826df, 0x232878f0, 0xeb85b046, 0xf7e1d662, 0xf5a96510, 0xe38c2b64, 0x0419a43b},
|
||||
{0xe69e791b, 0x4b54889b, 0xb5c95ea5, 0xb371eeb0, 0x0b2f26a3, 0x9f53ccca, 0x66f45f71, 0x0040592d},
|
||||
{0xad2e5d5b, 0x4ced12db, 0x0987b849, 0x5f57b16d, 0xd9ec045b, 0xcab0e2e9, 0x6cfbf4df, 0x03e4e405},
|
||||
{0x3ecb72a4, 0xd71a1eee, 0x03a13fb7, 0x6bd9f7ec, 0x5877c6c7, 0xb74a54c8, 0xa28236a5, 0x0377689b},
|
||||
{0x74b3354c, 0x6f558a20, 0x3f776b18, 0xb67f6d10, 0x01165ed8, 0x8c447df2, 0xf3889308, 0x056b8991},
|
||||
{0x0d306b7a, 0x9482eb10, 0xd441cd03, 0xdd738e0f, 0x2de5dfd7, 0x6d186de5, 0x75fd1833, 0x00781b3e},
|
||||
{0x77ec28e5, 0xdbc14748, 0xd26e050c, 0x02ceee41, 0x18457c96, 0x8e5aef74, 0x1823c60f, 0x0461a6e2},
|
||||
{0x2be17c8b, 0x172e551d, 0x49c6a7b8, 0x90e25fa2, 0xa1b3478f, 0x6219e63e, 0xd063a517, 0x00c412f8},
|
||||
{0x65a9b68e, 0xb136b848, 0x673c6cbc, 0x9a9b7169, 0xf8ec7473, 0x15fa1875, 0x3033a5d6, 0x022d72f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x80000000, 0x00000008, 0x04000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xc0000000, 0x0000000c, 0x06000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xe0000000, 0x0000000e, 0x07000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xf0000000, 0x0000000f, 0x07800000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x78000000, 0x00000010, 0x07c00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xbc000000, 0x00000010, 0x07e00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xde000000, 0x00000010, 0x07f00000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xef000000, 0x00000010, 0x07f80000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xf7800000, 0x00000010, 0x07fc0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfbc00000, 0x00000010, 0x07fe0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfde00000, 0x00000010, 0x07ff0000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfef00000, 0x00000010, 0x07ff8000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xff780000, 0x00000010, 0x07ffc000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffbc0000, 0x00000010, 0x07ffe000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffde0000, 0x00000010, 0x07fff000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffef0000, 0x00000010, 0x07fff800},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfff78000, 0x00000010, 0x07fffc00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffbc000, 0x00000010, 0x07fffe00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffde000, 0x00000010, 0x07ffff00},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffef000, 0x00000010, 0x07ffff80},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffff7800, 0x00000010, 0x07ffffc0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffbc00, 0x00000010, 0x07ffffe0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffde00, 0x00000010, 0x07fffff0},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffef00, 0x00000010, 0x07fffff8},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffff780, 0x00000010, 0x07fffffc},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffbc0, 0x00000010, 0x07fffffe},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffde0, 0x00000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffffef0, 0x80000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffff78, 0xc0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffbc, 0xe0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffde, 0xf0000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xffffffef, 0xf8000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x80000000, 0xfffffff7, 0xfc000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xc0000000, 0xfffffffb, 0xfe000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xe0000000, 0xfffffffd, 0xff000010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xf0000000, 0xfffffffe, 0xff800010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x78000000, 0xffffffff, 0xffc00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xbc000000, 0xffffffff, 0xffe00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xde000000, 0xffffffff, 0xfff00010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xef000000, 0xffffffff, 0xfff80010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xf7800000, 0xffffffff, 0xfffc0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffe0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfde00000, 0xffffffff, 0xffff0010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfef00000, 0xffffffff, 0xffff8010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xff780000, 0xffffffff, 0xffffc010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffe010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffde0000, 0xffffffff, 0xfffff010, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff810, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffc10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffe10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffde000, 0xffffffff, 0xffffff10, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff90, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffd0, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffbc00, 0xffffffff, 0xfffffff0, 0x07ffffff},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffde00, 0xffffffff, 0x00000000, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffef00, 0xffffffff, 0x00000008, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffff780, 0xffffffff, 0x0000000c, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffbc0, 0xffffffff, 0x0000000e, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffde0, 0xffffffff, 0x0000000f, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xfffffef0, 0x7fffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffff78, 0xbfffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffbc, 0xdfffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffde, 0xefffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x00000000, 0xffffffef, 0xf7ffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x80000000, 0xfffffff7, 0xfbffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xc0000000, 0xfffffffb, 0xfdffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xe0000000, 0xfffffffd, 0xfeffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xf0000000, 0xfffffffe, 0xff7fffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0x78000000, 0xffffffff, 0xffbfffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xbc000000, 0xffffffff, 0xffdfffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xde000000, 0xffffffff, 0xffefffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xef000000, 0xffffffff, 0xfff7ffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xf7800000, 0xffffffff, 0xfffbffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffdffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfde00000, 0xffffffff, 0xfffeffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfef00000, 0xffffffff, 0xffff7fff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xff780000, 0xffffffff, 0xffffbfff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffdfff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffde0000, 0xffffffff, 0xffffefff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff7ff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffbff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffdff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffde000, 0xffffffff, 0xfffffeff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff7f, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffbf, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffbc00, 0xffffffff, 0xffffffdf, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffde00, 0xffffffff, 0xffffffef, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffef00, 0xffffffff, 0xfffffff7, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffff780, 0xffffffff, 0xfffffffb, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffde0, 0xffffffff, 0xfffffffe, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xfffffef0, 0x7fffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffff78, 0xbfffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffbc, 0xdfffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffde, 0xefffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x00000000, 0xffffffef, 0xf7ffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x80000000, 0xfffffff7, 0xfbffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xc0000000, 0xfffffffb, 0xfdffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xe0000000, 0xfffffffd, 0xfeffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xf0000000, 0xfffffffe, 0xff7fffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0x78000000, 0xffffffff, 0xffbfffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xbc000000, 0xffffffff, 0xffdfffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xde000000, 0xffffffff, 0xffefffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xef000000, 0xffffffff, 0xfff7ffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xf7800000, 0xffffffff, 0xfffbffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfbc00000, 0xffffffff, 0xfffdffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfde00000, 0xffffffff, 0xfffeffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfef00000, 0xffffffff, 0xffff7fff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xff780000, 0xffffffff, 0xffffbfff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffbc0000, 0xffffffff, 0xffffdfff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffde0000, 0xffffffff, 0xffffefff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffef0000, 0xffffffff, 0xfffff7ff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfff78000, 0xffffffff, 0xfffffbff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffbc000, 0xffffffff, 0xfffffdff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffde000, 0xffffffff, 0xfffffeff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffef000, 0xffffffff, 0xffffff7f, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffff7800, 0xffffffff, 0xffffffbf, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffbc00, 0xffffffff, 0xffffffdf, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffde00, 0xffffffff, 0xffffffef, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffef00, 0xffffffff, 0xfffffff7, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffff780, 0xffffffff, 0xfffffffb, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffde0, 0xffffffff, 0xfffffffe, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xfffffef0, 0x7fffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffff78, 0xbfffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffbc, 0xdfffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffde, 0xefffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x00000000, 0xffffffef, 0xf7ffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x80000000, 0xfffffff7, 0xfbffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xc0000000, 0xfffffffb, 0xfdffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xe0000000, 0xfffffffd, 0xfeffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xf0000000, 0xfffffffe, 0xff7fffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0x78000000, 0xffffffff, 0xffbfffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xbc000000, 0xffffffff, 0xffdfffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xde000000, 0xffffffff, 0xffefffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xef000000, 0xffffffff, 0xfff7ffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xf7800000, 0xffffffff, 0xfffbffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfbc00000, 0xffffffff, 0xfffdffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfde00000, 0xffffffff, 0xfffeffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfef00000, 0xffffffff, 0xffff7fff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xff780000, 0xffffffff, 0xffffbfff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffbc0000, 0xffffffff, 0xffffdfff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffde0000, 0xffffffff, 0xffffefff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffef0000, 0xffffffff, 0xfffff7ff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfff78000, 0xffffffff, 0xfffffbff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffbc000, 0xffffffff, 0xfffffdff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffde000, 0xffffffff, 0xfffffeff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffef000, 0xffffffff, 0xffffff7f, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffff7800, 0xffffffff, 0xffffffbf, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffbc00, 0xffffffff, 0xffffffdf, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffde00, 0xffffffff, 0xffffffef, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffef00, 0xffffffff, 0xfffffff7, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffff780, 0xffffffff, 0xfffffffb, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffbc0, 0xffffffff, 0xfffffffd, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffde0, 0xffffffff, 0xfffffffe, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xfffffef0, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffff78, 0xbfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffbc, 0xdfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffde, 0xefffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x00000001, 0xffffffef, 0xf7ffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x80000001, 0xfffffff7, 0xfbffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xc0000001, 0xfffffffb, 0xfdffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xe0000001, 0xfffffffd, 0xfeffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xf0000001, 0xfffffffe, 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0x78000001, 0xffffffff, 0xffbfffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xbc000001, 0xffffffff, 0xffdfffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xde000001, 0xffffffff, 0xffefffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xef000001, 0xffffffff, 0xfff7ffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xf7800001, 0xffffffff, 0xfffbffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfbc00001, 0xffffffff, 0xfffdffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfde00001, 0xffffffff, 0xfffeffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfef00001, 0xffffffff, 0xffff7fff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xff780001, 0xffffffff, 0xffffbfff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffbc0001, 0xffffffff, 0xffffdfff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffde0001, 0xffffffff, 0xffffefff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffef0001, 0xffffffff, 0xfffff7ff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfff78001, 0xffffffff, 0xfffffbff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffbc001, 0xffffffff, 0xfffffdff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffde001, 0xffffffff, 0xfffffeff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffef001, 0xffffffff, 0xffffff7f, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffff7801, 0xffffffff, 0xffffffbf, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffbc01, 0xffffffff, 0xffffffdf, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffde01, 0xffffffff, 0xffffffef, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffef01, 0xffffffff, 0xfffffff7, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffff781, 0xffffffff, 0xfffffffb, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffbc1, 0xffffffff, 0xfffffffd, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffde1, 0xffffffff, 0xfffffffe, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffef1, 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffff79, 0xbfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffffbd, 0xdfffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xffffffdf, 0xefffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000},
|
||||
{0xfffffff0, 0xf7ffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000010, 0x08000000}}};
|
||||
static constexpr storage<8> rou = {0x42f8ef94, 0x6070024f, 0xe11a6161, 0xad187148,
|
||||
0x9c8b0fa5, 0x3f046451, 0x87529cfa, 0x005282db};
|
||||
TWIDDLES(modulus, rou)
|
||||
};
|
||||
|
||||
/**
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#define DEVICE_CONTEXT_H
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
namespace device_context {
|
||||
|
||||
@@ -30,6 +31,28 @@ namespace device_context {
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
// checking whether a pointer is on host or device and asserts device matches provided device
|
||||
static bool is_host_ptr(const void* p, int device_id = 0)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
const bool is_on_cur_device = !is_on_host && attributes.device == device_id;
|
||||
const bool is_valid_ptr = is_on_host || is_on_cur_device;
|
||||
if (!is_valid_ptr) { THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Invalid ptr"); }
|
||||
|
||||
return is_on_host;
|
||||
}
|
||||
|
||||
static int get_cuda_device(const void* p)
|
||||
{
|
||||
cudaPointerAttributes attributes;
|
||||
CHK_STICKY(cudaPointerGetAttributes(&attributes, p));
|
||||
const bool is_on_host = attributes.type == cudaMemoryTypeHost ||
|
||||
attributes.type == cudaMemoryTypeUnregistered; // unregistered is host memory
|
||||
return is_on_host ? -1 : attributes.device;
|
||||
}
|
||||
|
||||
} // namespace device_context
|
||||
#endif
|
||||
174
icicle/include/hash/hash.cuh
Normal file
174
icicle/include/hash/hash.cuh
Normal file
@@ -0,0 +1,174 @@
|
||||
#pragma once
|
||||
#ifndef HASH_H
|
||||
#define HASH_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include <cassert>
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace hash
|
||||
* Includes classes and methods for describing hash functions.
|
||||
*/
|
||||
namespace hash {
|
||||
|
||||
/**
|
||||
* @struct HashConfig
|
||||
* Encodes hash operations parameters.
|
||||
*/
|
||||
struct HashConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the hash operations asynchronously. If set to `true`, the functions will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false,
|
||||
* functions will block the current CPU thread. */
|
||||
};
|
||||
|
||||
/**
|
||||
* A function that returns the default value of [HashConfig](@ref HashConfig) for the [Hasher](@ref
|
||||
* Hasher) class.
|
||||
* @return Default value of [HashConfig](@ref HashConfig).
|
||||
*/
|
||||
static HashConfig
|
||||
default_hash_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
HashConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputs_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* @class Hasher
|
||||
*
|
||||
* An interface containing methods for hashing
|
||||
*
|
||||
* @tparam PreImage type of inputs elements
|
||||
* @tparam Image type of state elements. Also used to describe the type of hash output
|
||||
*/
|
||||
template <typename PreImage, typename Image>
|
||||
class Hasher
|
||||
{
|
||||
public:
|
||||
/// @brief the width of permutation state
|
||||
const unsigned int width;
|
||||
|
||||
/// @brief how many elements a state can fit per 1 permutation. Used with domain separation.
|
||||
const unsigned int preimage_max_length;
|
||||
|
||||
/// @brief portion of the state to absorb input into, or squeeze output from
|
||||
const unsigned int rate;
|
||||
|
||||
/// @brief start squeezing from this offset. Used with domain separation.
|
||||
const unsigned int offset;
|
||||
|
||||
Hasher(unsigned int width, unsigned int preimage_max_length, unsigned int rate, unsigned int offset)
|
||||
: width(width), preimage_max_length(preimage_max_length), rate(rate), offset(offset)
|
||||
{
|
||||
assert(
|
||||
rate * sizeof(PreImage) <= preimage_max_length * sizeof(Image) &&
|
||||
"Input rate can not be bigger than preimage max length");
|
||||
}
|
||||
|
||||
virtual cudaError_t hash_2d(
|
||||
const Matrix<PreImage>* inputs,
|
||||
Image* states,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Absorb 2d is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
virtual cudaError_t compress_and_inject(
|
||||
const Matrix<PreImage>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const Image* prev_layer,
|
||||
Image* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Compress and inject is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
}
|
||||
|
||||
/// @param input pointer to input allocated on-device
|
||||
/// @param out pointer to output allocated on-device
|
||||
cudaError_t compress_many(
|
||||
const Image* input,
|
||||
Image* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int output_len,
|
||||
const HashConfig& cfg) const
|
||||
{
|
||||
return hash_many((const PreImage*)input, out, number_of_states, width, output_len, cfg);
|
||||
}
|
||||
|
||||
virtual cudaError_t run_hash_many_kernel(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "Hash many kernel is not implemented for this hash");
|
||||
return cudaError_t::cudaSuccess;
|
||||
};
|
||||
|
||||
cudaError_t hash_many(
|
||||
const PreImage* input,
|
||||
Image* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const HashConfig& cfg) const
|
||||
{
|
||||
const PreImage* d_input;
|
||||
PreImage* d_alloc_input;
|
||||
Image* d_output;
|
||||
if (!cfg.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_alloc_input, number_of_states * input_len * sizeof(PreImage), cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_alloc_input, input, number_of_states * input_len * sizeof(PreImage), cudaMemcpyHostToDevice,
|
||||
cfg.ctx.stream));
|
||||
d_input = d_alloc_input;
|
||||
} else {
|
||||
d_input = input;
|
||||
}
|
||||
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_output, number_of_states * output_len * sizeof(Image), cfg.ctx.stream));
|
||||
} else {
|
||||
d_output = output;
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(run_hash_many_kernel(d_input, d_output, number_of_states, input_len, output_len, cfg.ctx));
|
||||
|
||||
if (!cfg.are_inputs_on_device) { CHK_IF_RETURN(cudaFreeAsync(d_alloc_input, cfg.ctx.stream)); }
|
||||
if (!cfg.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
output, d_output, number_of_states * output_len * sizeof(Image), cudaMemcpyDeviceToHost, cfg.ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(d_output, cfg.ctx.stream));
|
||||
}
|
||||
|
||||
if (!cfg.is_async) CHK_IF_RETURN(cudaStreamSynchronize(cfg.ctx.stream));
|
||||
|
||||
return CHK_LAST();
|
||||
};
|
||||
};
|
||||
} // namespace hash
|
||||
|
||||
#endif
|
||||
@@ -6,51 +6,67 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
using namespace hash;
|
||||
|
||||
namespace keccak {
|
||||
/**
|
||||
* @struct KeccakConfig
|
||||
* Struct that encodes various Keccak parameters.
|
||||
*/
|
||||
struct KeccakConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the Keccak asynchronously. If set to `true`, the keccak_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, keccak_hash
|
||||
* function will block the current CPU thread. */
|
||||
// Input rate in bytes
|
||||
const int KECCAK_256_RATE = 136;
|
||||
const int KECCAK_512_RATE = 72;
|
||||
|
||||
// Digest size in u64
|
||||
const int KECCAK_256_DIGEST = 4;
|
||||
const int KECCAK_512_DIGEST = 8;
|
||||
|
||||
// Number of state elements in u64
|
||||
const int KECCAK_STATE_SIZE = 25;
|
||||
|
||||
const int KECCAK_PADDING_CONST = 1;
|
||||
const int SHA3_PADDING_CONST = 6;
|
||||
|
||||
class Keccak : public Hasher<uint8_t, uint64_t>
|
||||
{
|
||||
public:
|
||||
const int PADDING_CONST;
|
||||
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const uint8_t* input,
|
||||
uint64_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override;
|
||||
|
||||
Keccak(unsigned int rate, unsigned int padding_const)
|
||||
: Hasher<uint8_t, uint64_t>(KECCAK_STATE_SIZE, KECCAK_STATE_SIZE, rate, 0), PADDING_CONST(padding_const)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
KeccakConfig default_keccak_config()
|
||||
class Keccak256 : public Keccak
|
||||
{
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
KeccakConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
public:
|
||||
Keccak256() : Keccak(KECCAK_256_RATE, KECCAK_PADDING_CONST) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the keccak hash over a sequence of preimages.
|
||||
* Takes {number_of_blocks * input_block_size} u64s of input and computes {number_of_blocks} outputs, each of size {D
|
||||
* / 64} u64
|
||||
* @tparam C - number of bits of capacity (c = b - r = 1600 - r). Only multiples of 64 are supported.
|
||||
* @tparam D - number of bits of output. Only multiples of 64 are supported.
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [input_block_size](@ref input_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}.
|
||||
* @param input_block_size - size of each input block in bytes. Should be divisible by 8.
|
||||
* @param number_of_blocks number of input and output blocks. One GPU thread processes one block
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be of size [output_block_size](@ref output_block_size) * [number_of_blocks](@ref
|
||||
* number_of_blocks)}
|
||||
*/
|
||||
template <int C, int D>
|
||||
cudaError_t
|
||||
keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config);
|
||||
class Keccak512 : public Keccak
|
||||
{
|
||||
public:
|
||||
Keccak512() : Keccak(KECCAK_512_RATE, KECCAK_PADDING_CONST) {}
|
||||
};
|
||||
|
||||
class Sha3_256 : public Keccak
|
||||
{
|
||||
public:
|
||||
Sha3_256() : Keccak(KECCAK_256_RATE, SHA3_PADDING_CONST) {}
|
||||
};
|
||||
|
||||
class Sha3_512 : public Keccak
|
||||
{
|
||||
public:
|
||||
Sha3_512() : Keccak(KECCAK_512_RATE, SHA3_PADDING_CONST) {}
|
||||
};
|
||||
} // namespace keccak
|
||||
|
||||
#endif
|
||||
14
icicle/include/matrix/matrix.cuh
Normal file
14
icicle/include/matrix/matrix.cuh
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
#ifndef MATRIX_H
|
||||
#define MATRIX_H
|
||||
|
||||
namespace matrix {
|
||||
template <typename T>
|
||||
struct Matrix {
|
||||
T* values;
|
||||
size_t width;
|
||||
size_t height;
|
||||
};
|
||||
} // namespace matrix
|
||||
|
||||
#endif
|
||||
130
icicle/include/merkle-tree/merkle.cuh
Normal file
130
icicle/include/merkle-tree/merkle.cuh
Normal file
@@ -0,0 +1,130 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include <vector>
|
||||
#include <numeric>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace hash;
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace merkle_tree
|
||||
* Implementation of the [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle_tree {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr uint64_t STREAM_CHUNK_SIZE = GIGA;
|
||||
|
||||
/// Flattens the tree digests and sum them up to get
|
||||
/// the memory needed to contain all the digests
|
||||
static size_t get_digests_len(uint32_t height, uint32_t arity, uint32_t digest_elements)
|
||||
{
|
||||
size_t digests_len = 0;
|
||||
size_t row_length = digest_elements;
|
||||
for (int i = 0; i <= height; i++) {
|
||||
digests_len += row_length;
|
||||
row_length *= arity;
|
||||
}
|
||||
|
||||
return digests_len;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void swap(T** r, T** s)
|
||||
{
|
||||
T* t = *r;
|
||||
*r = *s;
|
||||
*s = t;
|
||||
}
|
||||
|
||||
static unsigned int get_height(uint64_t number_of_elements)
|
||||
{
|
||||
unsigned int height = 0;
|
||||
while (number_of_elements >>= 1)
|
||||
++height;
|
||||
return height;
|
||||
}
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
unsigned int arity;
|
||||
unsigned int
|
||||
keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
unsigned int
|
||||
digest_elements; /** @param digest_elements the size of output for each bottom layer hash and compression.
|
||||
* Will also be equal to the size of the root of the tree. Default value 1 */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool
|
||||
are_outputs_on_device; /**< True if outputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
2, // arity
|
||||
0, // keep_rows
|
||||
1, // digest_elements
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height) * input_block_len elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(digests_len * (arity ^ (i))) for i in [0..keep_rows]`
|
||||
* @param height the height of the merkle tree
|
||||
* @param input_block_len the size of input vectors at the bottom layer of the tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t build_merkle_tree(
|
||||
const Leaf* inputs,
|
||||
Digest* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const Hasher<Leaf, Digest>& compression,
|
||||
const Hasher<Leaf, Digest>& bottom_layer,
|
||||
const TreeBuilderConfig& config);
|
||||
|
||||
template <typename Leaf, typename Digest>
|
||||
cudaError_t mmcs_commit(
|
||||
const Matrix<Leaf>* inputs,
|
||||
const unsigned int number_of_inputs,
|
||||
Digest* digests,
|
||||
const Hasher<Leaf, Digest>& hasher,
|
||||
const Hasher<Leaf, Digest>& compression,
|
||||
const TreeBuilderConfig& tree_config);
|
||||
} // namespace merkle_tree
|
||||
|
||||
#endif
|
||||
114
icicle/include/poseidon/constants.cuh
Normal file
114
icicle/include/poseidon/constants.cuh
Normal file
@@ -0,0 +1,114 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON_CONSTANTS_H
|
||||
#define POSEIDON_CONSTANTS_H
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
unsigned int arity;
|
||||
unsigned int alpha;
|
||||
unsigned int partial_rounds;
|
||||
unsigned int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag = S::zero();
|
||||
|
||||
PoseidonConstants() = default;
|
||||
PoseidonConstants(const PoseidonConstants& other) = default;
|
||||
|
||||
PoseidonConstants<S>& operator=(PoseidonConstants<S> const& other)
|
||||
{
|
||||
this->arity = other.arity;
|
||||
this->alpha = other.alpha;
|
||||
this->partial_rounds = other.partial_rounds;
|
||||
this->full_rounds_half = other.full_rounds_half;
|
||||
this->round_constants = other.round_constants;
|
||||
this->mds_matrix = other.mds_matrix;
|
||||
this->non_sparse_matrix = other.non_sparse_matrix;
|
||||
this->sparse_matrices = other.sparse_matrices;
|
||||
this->domain_tag = other.domain_tag;
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
class PoseidonKernelsConfiguration
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {width} elements in each state, and {number_of_states} states total
|
||||
static int number_of_threads(unsigned int width) { return 256 / width * width; }
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static int hashes_per_block(unsigned int width) { return number_of_threads(width) / width; }
|
||||
|
||||
static int number_of_full_blocks(unsigned int width, size_t number_of_states)
|
||||
{
|
||||
int total_number_of_threads = number_of_states * width;
|
||||
return total_number_of_threads / number_of_threads(width) +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads(width));
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
}
|
||||
};
|
||||
|
||||
using PKC = PoseidonKernelsConfiguration;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon_constants(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
PoseidonConstants<S>* poseidon_constants,
|
||||
device_context::DeviceContext& ctx);
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_optimized_poseidon_constants(PoseidonConstants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,17 +8,18 @@ import numpy as np
|
||||
from poseidon import round_constants as rc, round_numbers as rn
|
||||
|
||||
# Modify these
|
||||
arity = 11
|
||||
p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
arity = 2
|
||||
p = 2 ** 31 - 1 # grumpkin
|
||||
# p = 0x30644e72e131a029b85045b68181585d97816a916871ca8d3c208c16d87cfd47 # grumpkin
|
||||
# p = 0x73EDA753299D7D483339D80809A1D80553BDA402FFFE5BFEFFFFFFFF00000001 # bls12-381
|
||||
# p = 0x12ab655e9a2ca55660b44d1e5c37b00159aa76fed00000010a11800000000001 # bls12-377
|
||||
# p = 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001 # bn254
|
||||
# p = 0x1ae3a4617c510eac63b05c06ca1493b1a22d9f300f5138f1ef3622fba094800170b5d44300000008508c00000000001 # bw6-761
|
||||
prime_bit_len = 255
|
||||
field_bytes = 32
|
||||
prime_bit_len = 31
|
||||
field_bytes = 4
|
||||
|
||||
# leave set to -1 if not sure
|
||||
full_round = -1
|
||||
full_round = 8
|
||||
half_full_round = full_round // 2
|
||||
# leave set to -1 if not sure
|
||||
partial_round = -1
|
||||
@@ -31,12 +32,12 @@ security_level = 128
|
||||
# F = GF(p)
|
||||
# F.primitive_element()
|
||||
#
|
||||
# primitive_element = None
|
||||
primitive_element = None
|
||||
# primitive_element = 7 # bls12-381
|
||||
# primitive_element = 22 # bls12-377
|
||||
# primitive_element = 5 # bn254
|
||||
# primitive_element = 15 # bw6-761
|
||||
primitive_element = 3 # grumpkin
|
||||
# primitive_element = 3 # grumpkin
|
||||
|
||||
# currently we only support alpha 5, if you need alpha other than 5 - feal free to reach out
|
||||
alpha = 5
|
||||
|
||||
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
508
icicle/include/poseidon/constants/m31_poseidon.h
Normal file
@@ -0,0 +1,508 @@
|
||||
#pragma once
|
||||
#ifndef M31_POSEIDON_H
|
||||
#define M31_POSEIDON_H
|
||||
|
||||
namespace poseidon_constants_m31 {
|
||||
/**
|
||||
* This inner namespace contains optimized constants for running Poseidon.
|
||||
* These constants were generated using an algorithm defined at
|
||||
* https://spec.filecoin.io/algorithms/crypto/poseidon/
|
||||
* The number in the name corresponds to the arity of hash function
|
||||
* Each array contains:
|
||||
* RoundConstants | MDSMatrix | Non-sparse matrix | Sparse matrices
|
||||
*/
|
||||
|
||||
int partial_rounds_2 = 7;
|
||||
|
||||
int partial_rounds_4 = 11;
|
||||
|
||||
int partial_rounds_8 = 12;
|
||||
|
||||
int partial_rounds_11 = 12;
|
||||
|
||||
unsigned char poseidon_constants_2[] = {
|
||||
0x33, 0x8b, 0x6d, 0x47, 0xbb, 0x97, 0x11, 0x67, 0x92, 0x9d, 0x55, 0x2d,
|
||||
0xee, 0x1e, 0x2e, 0x45, 0xfe, 0x35, 0x0e, 0x25, 0x7e, 0xc3, 0x4f, 0x70,
|
||||
0x4d, 0x0a, 0x8c, 0x18, 0xd9, 0x43, 0xa4, 0x61, 0xfb, 0x14, 0xd9, 0x14,
|
||||
0x99, 0x13, 0xb9, 0x30, 0xec, 0x3b, 0x8c, 0x16, 0xcc, 0xb2, 0x0b, 0x2e,
|
||||
0x9e, 0x18, 0xbf, 0x26, 0xb6, 0xb7, 0x2a, 0x44, 0x61, 0x29, 0xdb, 0x21,
|
||||
0x18, 0x84, 0x03, 0x4e, 0xef, 0x95, 0xf9, 0x45, 0xe3, 0xd8, 0xf2, 0x46,
|
||||
0x82, 0xb4, 0xc9, 0x5e, 0x5f, 0xf3, 0xb2, 0x4f, 0x61, 0x80, 0x50, 0x0f,
|
||||
0x0d, 0x7f, 0xe3, 0x1b, 0x23, 0xbd, 0x05, 0x2f, 0x0f, 0xb1, 0x60, 0x67,
|
||||
0xd8, 0x85, 0xdf, 0x57, 0x0c, 0x8c, 0xdf, 0x50, 0x9e, 0x65, 0x3c, 0x58,
|
||||
0x07, 0xbd, 0x29, 0x7e, 0xc5, 0xe5, 0xa7, 0x5a, 0x5a, 0x4b, 0x0c, 0x29,
|
||||
0x89, 0x9d, 0x14, 0x11, 0x8c, 0x20, 0xcb, 0x76, 0x4d, 0x56, 0x2d, 0x4a,
|
||||
0x10, 0xda, 0xaf, 0x0a, 0x65, 0x9d, 0x98, 0x3e, 0xa1, 0xac, 0x57, 0x46,
|
||||
0xcb, 0xe8, 0xfc, 0x5b, 0xd4, 0x43, 0x4b, 0x63, 0x1b, 0x13, 0x4b, 0x1f,
|
||||
0xed, 0xac, 0xbf, 0x30, 0x27, 0x15, 0xac, 0x53, 0x4b, 0x27, 0x61, 0x3e,
|
||||
0x37, 0xc3, 0x65, 0x74, 0x55, 0x55, 0x55, 0x55, 0x00, 0x00, 0x00, 0x20,
|
||||
0x33, 0x33, 0x33, 0x33, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x55, 0x55, 0x55, 0x55, 0xc0, 0x72, 0x8d, 0x36,
|
||||
0x2c, 0xe5, 0xc0, 0x51, 0x00, 0x00, 0x00, 0x20, 0x0b, 0xd5, 0x67, 0x6c,
|
||||
0x6c, 0x67, 0x2c, 0x13, 0x33, 0x33, 0x33, 0x33, 0x6c, 0x67, 0x2c, 0x13,
|
||||
0xe6, 0xb8, 0x2c, 0x62, 0x55, 0x55, 0x55, 0x55, 0x15, 0x1f, 0xaf, 0x6a,
|
||||
0xd9, 0xa8, 0x14, 0x44, 0xae, 0xb0, 0x38, 0x4b, 0x17, 0x76, 0xd9, 0x39,
|
||||
0x55, 0x55, 0x55, 0x55, 0x28, 0xef, 0x9d, 0x4f, 0xc7, 0x3b, 0xa6, 0x24,
|
||||
0x84, 0x5b, 0x79, 0x6f, 0xde, 0x4f, 0x8f, 0x3d, 0x55, 0x55, 0x55, 0x55,
|
||||
0x54, 0xc2, 0xb2, 0x00, 0x5a, 0xed, 0x68, 0x0c, 0xeb, 0xd4, 0xc4, 0x61,
|
||||
0x02, 0x8c, 0x85, 0x27, 0x55, 0x55, 0x55, 0x55, 0xe4, 0xc5, 0xbd, 0x0a,
|
||||
0xf6, 0xec, 0x75, 0x26, 0xe0, 0xdb, 0xd8, 0x52, 0xdf, 0x28, 0xff, 0x33,
|
||||
0x55, 0x55, 0x55, 0x55, 0xac, 0x68, 0x06, 0x00, 0xc9, 0xff, 0x91, 0x19,
|
||||
0xb1, 0x12, 0x2b, 0x19, 0xa2, 0xdd, 0x47, 0x39, 0x55, 0x55, 0x55, 0x55,
|
||||
0xd5, 0x03, 0x00, 0x00, 0x45, 0xc8, 0xcc, 0x4c, 0x55, 0x55, 0x55, 0x35,
|
||||
0x8d, 0xd6, 0x68, 0x3d, 0x55, 0x55, 0x55, 0x55, 0x03, 0x00, 0x00, 0x00,
|
||||
0x64, 0x66, 0x66, 0x26, 0x00, 0x00, 0x00, 0x20, 0x33, 0x33, 0x33, 0x33
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_4[] = {
|
||||
0xdb, 0x64, 0xa5, 0x32, 0xd6, 0x3d, 0x12, 0x6e, 0x65, 0x66, 0x46, 0x59,
|
||||
0x2a, 0x64, 0x51, 0x3b, 0xaf, 0xbe, 0x72, 0x0b, 0x66, 0x5f, 0x5c, 0x6c,
|
||||
0x66, 0x11, 0x8c, 0x61, 0x99, 0x24, 0x99, 0x14, 0x1d, 0x5f, 0x67, 0x0a,
|
||||
0x4d, 0xab, 0xc4, 0x1e, 0x43, 0xb2, 0x09, 0x58, 0xc0, 0x27, 0x4c, 0x5b,
|
||||
0xf0, 0x0c, 0xf5, 0x12, 0xc9, 0x2f, 0x88, 0x4f, 0x59, 0x52, 0x5b, 0x6a,
|
||||
0x73, 0x90, 0x55, 0x5b, 0xaf, 0x47, 0x55, 0x0d, 0xa7, 0xc2, 0x0c, 0x6e,
|
||||
0xe6, 0xd6, 0x4e, 0x30, 0x9e, 0x75, 0x47, 0x12, 0xca, 0x93, 0xd1, 0x5b,
|
||||
0x64, 0x27, 0xfc, 0x60, 0x6c, 0x16, 0x52, 0x20, 0xf5, 0xe0, 0x01, 0x15,
|
||||
0x27, 0xf9, 0x96, 0x7f, 0xa0, 0x38, 0xad, 0x3c, 0x95, 0xd3, 0xe4, 0x32,
|
||||
0x57, 0x95, 0x5a, 0x6b, 0x12, 0xcc, 0xdc, 0x18, 0x2b, 0xdd, 0xa4, 0x66,
|
||||
0xbf, 0xe7, 0x96, 0x15, 0x85, 0x87, 0x6a, 0x1f, 0x15, 0x19, 0x9c, 0x65,
|
||||
0xef, 0x24, 0xaa, 0x2c, 0x3f, 0x6b, 0xbc, 0x6b, 0x54, 0x24, 0x2c, 0x17,
|
||||
0xf1, 0x7a, 0x8d, 0x57, 0x90, 0xa4, 0xd4, 0x4a, 0x12, 0x06, 0x77, 0x6a,
|
||||
0xe8, 0x6b, 0xd9, 0x51, 0x80, 0x72, 0xa1, 0x31, 0xce, 0xa8, 0x59, 0x10,
|
||||
0x0c, 0x90, 0xd4, 0x10, 0x8e, 0x60, 0x54, 0x1c, 0xe7, 0xfd, 0x42, 0x3a,
|
||||
0x73, 0xc1, 0xcc, 0x4f, 0x58, 0xbb, 0x99, 0x7c, 0xd2, 0x51, 0xda, 0x43,
|
||||
0xea, 0x6e, 0xe8, 0x16, 0xb2, 0x51, 0x53, 0x61, 0x7e, 0x68, 0x44, 0x3c,
|
||||
0x33, 0x33, 0x33, 0x33, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0xaa, 0xaa, 0xaa, 0x6a,
|
||||
0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x6d, 0xdb, 0xb6, 0x6d, 0x00, 0x00, 0x00, 0x10,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x33, 0x33, 0x33, 0x33, 0xae, 0x9d, 0xba, 0x61,
|
||||
0x09, 0xf2, 0xee, 0x53, 0x5e, 0x5c, 0xe8, 0x61, 0x8e, 0x1a, 0x60, 0x6c,
|
||||
0xaa, 0xaa, 0xaa, 0x6a, 0xff, 0x1a, 0xb7, 0x09, 0x1d, 0x84, 0x75, 0x5e,
|
||||
0x88, 0x5e, 0x36, 0x25, 0x6b, 0xd4, 0xdd, 0x65, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x1d, 0x84, 0x75, 0x5e, 0x10, 0x9d, 0x2d, 0x63, 0xa7, 0x62, 0xfc, 0x1f,
|
||||
0xe2, 0x43, 0x63, 0x14, 0x00, 0x00, 0x00, 0x10, 0x88, 0x5e, 0x36, 0x25,
|
||||
0xa7, 0x62, 0xfc, 0x1f, 0x47, 0xa0, 0x19, 0x6f, 0x48, 0x1f, 0x4e, 0x22,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x6b, 0xd4, 0xdd, 0x65, 0xe2, 0x43, 0x63, 0x14,
|
||||
0x48, 0x1f, 0x4e, 0x22, 0xb7, 0x4e, 0x73, 0x01, 0x33, 0x33, 0x33, 0x33,
|
||||
0x84, 0xdd, 0xf7, 0x08, 0x6f, 0xc5, 0x14, 0x63, 0xb6, 0x22, 0x01, 0x3d,
|
||||
0xcd, 0xab, 0x7d, 0x62, 0xac, 0x7e, 0x61, 0x57, 0x40, 0x6b, 0xc5, 0x45,
|
||||
0x77, 0xbc, 0x02, 0x18, 0x8c, 0x66, 0xda, 0x74, 0x33, 0x33, 0x33, 0x33,
|
||||
0x01, 0x9d, 0x33, 0x55, 0xed, 0x7d, 0x75, 0x63, 0x41, 0x92, 0x33, 0x76,
|
||||
0x6b, 0xd5, 0x10, 0x23, 0x1a, 0xc4, 0x49, 0x5b, 0x0c, 0x86, 0x5a, 0x60,
|
||||
0x23, 0xe5, 0xd8, 0x1c, 0x43, 0xe9, 0xe2, 0x0d, 0x33, 0x33, 0x33, 0x33,
|
||||
0x1b, 0x68, 0xec, 0x17, 0x0e, 0x3f, 0x34, 0x1a, 0xb0, 0x28, 0xe9, 0x6c,
|
||||
0xc0, 0xf7, 0x3e, 0x79, 0xdc, 0x08, 0x9e, 0x32, 0x45, 0xde, 0xea, 0x73,
|
||||
0x7a, 0xc4, 0xb4, 0x0d, 0x65, 0xb6, 0x61, 0x04, 0x33, 0x33, 0x33, 0x33,
|
||||
0x41, 0x01, 0x02, 0x6b, 0xd8, 0x62, 0x6b, 0x47, 0x47, 0xd9, 0x7e, 0x72,
|
||||
0x4f, 0x80, 0x31, 0x54, 0x8b, 0x5e, 0x3e, 0x26, 0x64, 0x16, 0xe2, 0x51,
|
||||
0xf4, 0xa6, 0xed, 0x35, 0xc3, 0xe9, 0xc5, 0x41, 0x33, 0x33, 0x33, 0x33,
|
||||
0xd5, 0x3f, 0xed, 0x11, 0xf5, 0x0f, 0x56, 0x41, 0xf6, 0x0d, 0xf3, 0x78,
|
||||
0xb0, 0x78, 0xa1, 0x7d, 0x5d, 0x33, 0xc4, 0x5e, 0xa6, 0xd9, 0x47, 0x4c,
|
||||
0x07, 0xc3, 0x30, 0x5a, 0x91, 0x10, 0x31, 0x20, 0x33, 0x33, 0x33, 0x33,
|
||||
0xa5, 0xec, 0xe5, 0x25, 0xe6, 0xa7, 0x4e, 0x01, 0xee, 0x3a, 0xe7, 0x62,
|
||||
0x02, 0xfd, 0xf9, 0x08, 0xdd, 0x91, 0x3f, 0x2d, 0xca, 0xbc, 0xb5, 0x2c,
|
||||
0x54, 0x9e, 0xd4, 0x78, 0x6b, 0x18, 0x94, 0x21, 0x33, 0x33, 0x33, 0x33,
|
||||
0xe6, 0xb3, 0xd2, 0x2e, 0x49, 0xdb, 0xa8, 0x52, 0x5f, 0x6a, 0x75, 0x59,
|
||||
0xd5, 0x45, 0x5c, 0x73, 0x40, 0xe4, 0xd8, 0x2a, 0x8c, 0xe6, 0xda, 0x50,
|
||||
0x5f, 0x4f, 0x18, 0x5d, 0xf4, 0xa4, 0xf4, 0x46, 0x33, 0x33, 0x33, 0x33,
|
||||
0x3e, 0x90, 0x5b, 0x3a, 0x55, 0x96, 0x22, 0x7c, 0xd9, 0x64, 0x36, 0x4e,
|
||||
0x0b, 0xec, 0x66, 0x65, 0xac, 0x55, 0xa9, 0x19, 0x50, 0x87, 0x49, 0x1a,
|
||||
0x1f, 0x78, 0x89, 0x36, 0x25, 0x2a, 0x06, 0x55, 0x33, 0x33, 0x33, 0x33,
|
||||
0x6b, 0xf1, 0x61, 0x67, 0x67, 0x00, 0xc5, 0x24, 0x9e, 0xd1, 0x94, 0x6f,
|
||||
0xbf, 0x8b, 0xaf, 0x2d, 0x69, 0x9c, 0xb7, 0x62, 0xf8, 0x0a, 0x43, 0x13,
|
||||
0x3c, 0xc0, 0x48, 0x3e, 0x9f, 0x3f, 0xa8, 0x2c, 0x33, 0x33, 0x33, 0x33,
|
||||
0x9d, 0x5b, 0xb2, 0x2b, 0x62, 0x05, 0x39, 0x20, 0x52, 0x1f, 0xe8, 0x05,
|
||||
0x1b, 0x24, 0xc0, 0x13, 0x11, 0x11, 0x11, 0x11, 0x9c, 0x6a, 0x35, 0x45,
|
||||
0xf6, 0x7f, 0x5c, 0x4c, 0x9f, 0xc4, 0x8f, 0x1f, 0x33, 0x33, 0x33, 0x33,
|
||||
0xb1, 0xaa, 0xaa, 0x2a, 0xcb, 0xb6, 0x6d, 0x5b, 0x34, 0x49, 0x92, 0x24,
|
||||
0x90, 0x65, 0x59, 0x56, 0xaa, 0xaa, 0xaa, 0x6a, 0x6d, 0xdb, 0xb6, 0x6d,
|
||||
0x00, 0x00, 0x00, 0x10, 0x71, 0x1c, 0xc7, 0x71
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_8[] = {
|
||||
0x90, 0xaf, 0x71, 0x3e, 0xa3, 0xbe, 0x5a, 0x30, 0xd4, 0x1b, 0x6f, 0x5d,
|
||||
0xeb, 0x36, 0x6b, 0x53, 0x14, 0xc0, 0x30, 0x13, 0xd5, 0xf8, 0x0b, 0x1c,
|
||||
0xa8, 0x66, 0xf1, 0x3c, 0xbd, 0x64, 0xa3, 0x6c, 0x06, 0x5e, 0x95, 0x7c,
|
||||
0xee, 0xc4, 0x0a, 0x0f, 0x37, 0x03, 0xba, 0x6d, 0x20, 0x85, 0xf1, 0x2c,
|
||||
0xee, 0x59, 0x21, 0x11, 0x42, 0xae, 0xb7, 0x3c, 0x73, 0xb4, 0xd6, 0x71,
|
||||
0x6a, 0x29, 0x40, 0x03, 0x86, 0xd8, 0x32, 0x68, 0x61, 0x62, 0x62, 0x32,
|
||||
0x44, 0x5d, 0xcc, 0x38, 0x76, 0x0f, 0xbc, 0x1f, 0xc9, 0x6e, 0x67, 0x1d,
|
||||
0x95, 0x35, 0x10, 0x79, 0x45, 0xaa, 0x0f, 0x7c, 0x73, 0xfa, 0x5d, 0x3f,
|
||||
0x53, 0xf2, 0xdc, 0x21, 0x37, 0xfa, 0x15, 0x04, 0xfd, 0x31, 0x3d, 0x5d,
|
||||
0x5d, 0xe6, 0x1d, 0x4a, 0xb3, 0x2b, 0xa2, 0x07, 0x2d, 0x48, 0x07, 0x2b,
|
||||
0x92, 0x1c, 0x31, 0x52, 0x6c, 0xd3, 0x32, 0x2f, 0x0f, 0xdd, 0x82, 0x7d,
|
||||
0x41, 0x0e, 0x81, 0x7e, 0x60, 0xfb, 0x49, 0x7b, 0xe5, 0x39, 0x3d, 0x75,
|
||||
0x6d, 0xcf, 0x02, 0x77, 0x0d, 0xf6, 0xf8, 0x0c, 0x43, 0xae, 0x62, 0x5e,
|
||||
0x26, 0x36, 0x9e, 0x3a, 0x10, 0xe3, 0x59, 0x4b, 0x3a, 0x59, 0x49, 0x73,
|
||||
0x31, 0x20, 0xb9, 0x40, 0x39, 0xed, 0xaf, 0x37, 0x6d, 0x5c, 0x4c, 0x6a,
|
||||
0xce, 0xca, 0xc4, 0x33, 0x53, 0x96, 0x92, 0x1d, 0xb2, 0xa1, 0xac, 0x65,
|
||||
0xbb, 0x43, 0xc4, 0x16, 0xf9, 0x38, 0x10, 0x67, 0x3d, 0xbb, 0x28, 0x7a,
|
||||
0x2b, 0x1e, 0x65, 0x36, 0x07, 0x14, 0x36, 0x3c, 0xcb, 0xdf, 0x03, 0x6b,
|
||||
0x03, 0x7b, 0xe6, 0x67, 0x79, 0x2a, 0x08, 0x47, 0xb7, 0x8f, 0x9c, 0x7e,
|
||||
0x54, 0xde, 0x08, 0x0a, 0xf8, 0x99, 0x24, 0x6f, 0x64, 0x78, 0x80, 0x5f,
|
||||
0x43, 0x76, 0x77, 0x40, 0x12, 0x62, 0x71, 0x10, 0x35, 0xf5, 0xdd, 0x0a,
|
||||
0x06, 0xff, 0x9b, 0x7b, 0xd8, 0x1a, 0xf3, 0x50, 0x1d, 0xc3, 0x8c, 0x60,
|
||||
0xe0, 0x61, 0xf5, 0x3d, 0xf9, 0xbf, 0xe4, 0x38, 0x78, 0xbf, 0x59, 0x0e,
|
||||
0xed, 0xc9, 0x4d, 0x0b, 0xb1, 0x7a, 0x10, 0x2b, 0x84, 0x27, 0x07, 0x70,
|
||||
0x5d, 0xc0, 0xa4, 0x7e, 0x9c, 0xf0, 0xf6, 0x69, 0x89, 0x6c, 0xc5, 0x39,
|
||||
0x4a, 0x7d, 0x5e, 0x26, 0x2f, 0x08, 0x9d, 0x05, 0xdc, 0x71, 0xec, 0x08,
|
||||
0x2b, 0xca, 0x68, 0x14, 0x42, 0xf6, 0xe6, 0x0a, 0x2f, 0xa5, 0x34, 0x6d,
|
||||
0x95, 0xaa, 0x80, 0x55, 0x23, 0x0f, 0x5f, 0x20, 0xbe, 0x4d, 0x0b, 0x20,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x50, 0x05, 0xd7, 0x30, 0x09, 0x94, 0x4f, 0x13,
|
||||
0x11, 0x86, 0x4b, 0x61, 0x74, 0x8b, 0x94, 0x0e, 0x7e, 0x5d, 0x93, 0x27,
|
||||
0xeb, 0xb6, 0x4b, 0x61, 0x90, 0x3f, 0x9b, 0x7d, 0x10, 0xe9, 0x16, 0x06,
|
||||
0x99, 0x99, 0x99, 0x59, 0x4f, 0xf6, 0x15, 0x6b, 0x84, 0x8c, 0xe0, 0x5f,
|
||||
0x88, 0x9e, 0xb2, 0x08, 0x32, 0x36, 0xe3, 0x25, 0x64, 0x0a, 0xf5, 0x6f,
|
||||
0x80, 0xff, 0x8e, 0x6f, 0xcd, 0xb5, 0x72, 0x12, 0x90, 0xa2, 0x7a, 0x09,
|
||||
0x45, 0x17, 0x5d, 0x74, 0x84, 0x8c, 0xe0, 0x5f, 0xf5, 0x67, 0x02, 0x2d,
|
||||
0x71, 0x83, 0xf0, 0x55, 0x81, 0xa2, 0x81, 0x4b, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x17, 0x41, 0xd6, 0x36, 0xf3, 0x16, 0x58, 0x23, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x55, 0x55, 0x55, 0x35, 0x88, 0x9e, 0xb2, 0x08, 0x71, 0x83, 0xf0, 0x55,
|
||||
0x27, 0x2a, 0xb0, 0x29, 0x0b, 0xe4, 0x53, 0x70, 0x7f, 0xeb, 0x60, 0x74,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x51, 0x41, 0x0e, 0x56, 0x1b, 0xe4, 0x67, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0x32, 0x36, 0xe3, 0x25, 0x81, 0xa2, 0x81, 0x4b,
|
||||
0x0b, 0xe4, 0x53, 0x70, 0x73, 0x99, 0xf0, 0x02, 0x1a, 0xf7, 0xe1, 0x40,
|
||||
0x18, 0xc4, 0x58, 0x3a, 0xcc, 0xf5, 0x0b, 0x18, 0xf0, 0x39, 0xab, 0x7a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x64, 0x0a, 0xf5, 0x6f, 0xec, 0xff, 0xb0, 0x6b,
|
||||
0x7f, 0xeb, 0x60, 0x74, 0x1a, 0xf7, 0xe1, 0x40, 0xf7, 0xfc, 0xbe, 0x7f,
|
||||
0xbf, 0x63, 0xc5, 0x05, 0x15, 0x3c, 0x9f, 0x2b, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x11, 0x11, 0x11, 0x11, 0x80, 0xff, 0x8e, 0x6f, 0x17, 0x41, 0xd6, 0x36,
|
||||
0xb9, 0x92, 0xa9, 0x4b, 0x18, 0xc4, 0x58, 0x3a, 0xbf, 0x63, 0xc5, 0x05,
|
||||
0x2f, 0x5c, 0x3c, 0x09, 0x25, 0xaf, 0xdf, 0x11, 0x21, 0x7d, 0x95, 0x58,
|
||||
0x00, 0x00, 0x00, 0x08, 0xcd, 0xb5, 0x72, 0x12, 0xf3, 0x16, 0x58, 0x23,
|
||||
0x51, 0x41, 0x0e, 0x56, 0xcc, 0xf5, 0x0b, 0x18, 0x15, 0x3c, 0x9f, 0x2b,
|
||||
0x25, 0xaf, 0xdf, 0x11, 0x38, 0x50, 0xe9, 0x16, 0x12, 0xb8, 0xc8, 0x17,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x90, 0xa2, 0x7a, 0x09, 0x49, 0x90, 0xa2, 0x17,
|
||||
0x1b, 0xe4, 0x67, 0x43, 0xf0, 0x39, 0xab, 0x7a, 0x9b, 0x77, 0xb0, 0x44,
|
||||
0x21, 0x7d, 0x95, 0x58, 0x12, 0xb8, 0xc8, 0x17, 0x5a, 0xfc, 0xf7, 0x5c,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xdb, 0x50, 0x89, 0x38, 0x5f, 0x88, 0xe3, 0x32,
|
||||
0x8b, 0xb4, 0x3b, 0x6c, 0x95, 0x0a, 0xf1, 0x41, 0xe6, 0x0a, 0x52, 0x7d,
|
||||
0xd1, 0x0d, 0xb1, 0x57, 0x9b, 0xd2, 0xf4, 0x1d, 0x80, 0x17, 0xb2, 0x42,
|
||||
0x9c, 0x40, 0x6e, 0x2f, 0x63, 0xa7, 0x42, 0x77, 0xf9, 0x37, 0xd1, 0x43,
|
||||
0x98, 0xd1, 0xec, 0x50, 0x91, 0x26, 0xfa, 0x4e, 0x0c, 0x9e, 0xcc, 0x31,
|
||||
0x52, 0xf4, 0x20, 0x5d, 0x2a, 0x20, 0xeb, 0x1b, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x54, 0x29, 0xf4, 0x4a, 0xde, 0x91, 0xf6, 0x54, 0x8b, 0xed, 0x18, 0x26,
|
||||
0x71, 0x24, 0x22, 0x34, 0xb7, 0xaf, 0x61, 0x27, 0x7a, 0x0a, 0x21, 0x7f,
|
||||
0x9f, 0xfe, 0xa1, 0x53, 0x26, 0x97, 0x6b, 0x5b, 0xf4, 0xea, 0xef, 0x4a,
|
||||
0x4b, 0x03, 0xa0, 0x7c, 0xe6, 0x64, 0x69, 0x47, 0x76, 0xf7, 0x2d, 0x0b,
|
||||
0x6f, 0xd5, 0x2c, 0x45, 0x52, 0xc1, 0x5c, 0x46, 0x25, 0x38, 0xab, 0x79,
|
||||
0x64, 0xed, 0xe7, 0x57, 0x71, 0x1c, 0xc7, 0x71, 0x94, 0xc2, 0xb7, 0x7f,
|
||||
0xaf, 0x0d, 0x61, 0x4c, 0xa3, 0x86, 0x8e, 0x45, 0xdc, 0x73, 0xe3, 0x77,
|
||||
0x71, 0xed, 0x21, 0x7d, 0x4b, 0x8e, 0xc7, 0x52, 0x39, 0x5d, 0x49, 0x1d,
|
||||
0x75, 0x35, 0xed, 0x09, 0xc6, 0x02, 0x3b, 0x22, 0xb8, 0x91, 0x07, 0x13,
|
||||
0x7f, 0xbf, 0x15, 0x7f, 0xb5, 0xbe, 0x0a, 0x5c, 0xbc, 0x75, 0x54, 0x61,
|
||||
0x6c, 0x2f, 0x28, 0x5f, 0xff, 0xf0, 0x7b, 0x67, 0x11, 0x8e, 0x70, 0x29,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0xe6, 0xfc, 0x29, 0x07, 0xbd, 0x0c, 0x4d, 0x5f,
|
||||
0x57, 0xb7, 0x87, 0x41, 0xec, 0x48, 0xda, 0x18, 0x78, 0x41, 0xb8, 0x6d,
|
||||
0xde, 0x7e, 0x47, 0x5a, 0x13, 0x03, 0xc5, 0x52, 0x2e, 0xee, 0xf3, 0x3f,
|
||||
0x06, 0xd0, 0xcd, 0x48, 0x77, 0x2a, 0xcd, 0x7e, 0x35, 0xee, 0x74, 0x63,
|
||||
0x3e, 0x26, 0x65, 0x64, 0x37, 0xa1, 0xfb, 0x7a, 0x03, 0x44, 0xa8, 0x70,
|
||||
0x2f, 0x03, 0x27, 0x1e, 0xb3, 0x02, 0x3e, 0x4a, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0xfd, 0xe1, 0xfe, 0x3c, 0x88, 0x1c, 0x36, 0x53, 0x36, 0x31, 0x5a, 0x32,
|
||||
0x88, 0x7b, 0xa6, 0x17, 0x40, 0x31, 0xe4, 0x0a, 0xb3, 0x70, 0x8f, 0x4f,
|
||||
0xc3, 0xa2, 0xd7, 0x06, 0x34, 0x9d, 0x4a, 0x71, 0x5b, 0xfa, 0x79, 0x25,
|
||||
0xe8, 0x6f, 0x05, 0x65, 0xc1, 0x4a, 0xee, 0x5c, 0x9a, 0xb2, 0x83, 0x05,
|
||||
0xb0, 0x89, 0x77, 0x2e, 0xc1, 0x56, 0x34, 0x08, 0x50, 0xf5, 0xde, 0x12,
|
||||
0xae, 0x68, 0xc2, 0x1b, 0x71, 0x1c, 0xc7, 0x71, 0xb3, 0x84, 0x6e, 0x4f,
|
||||
0xae, 0x74, 0x57, 0x4f, 0x56, 0xf3, 0xfc, 0x48, 0xfa, 0x73, 0xd7, 0x0e,
|
||||
0x8a, 0xc5, 0x35, 0x4d, 0xf6, 0x26, 0x15, 0x2a, 0xcf, 0xb5, 0x2d, 0x64,
|
||||
0xd1, 0x2a, 0x84, 0x43, 0xab, 0xc0, 0xec, 0x60, 0xa9, 0xbc, 0x09, 0x11,
|
||||
0xfd, 0x06, 0xea, 0x1e, 0xba, 0x29, 0x77, 0x6c, 0xb1, 0x37, 0xa5, 0x42,
|
||||
0x1c, 0x9b, 0x58, 0x37, 0xa8, 0xb7, 0xae, 0x3e, 0x6a, 0xf8, 0x63, 0x25,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x22, 0xa0, 0x75, 0x4e, 0x17, 0x33, 0x99, 0x7c,
|
||||
0x97, 0x97, 0x30, 0x04, 0xbc, 0x22, 0x6d, 0x7c, 0xb3, 0xd7, 0xd9, 0x56,
|
||||
0x4e, 0xef, 0x40, 0x5e, 0x02, 0x05, 0x51, 0x1e, 0x0c, 0x32, 0xb7, 0x06,
|
||||
0x41, 0x16, 0x80, 0x33, 0xc2, 0xdd, 0x8f, 0x18, 0x65, 0xa3, 0xe1, 0x4a,
|
||||
0xdb, 0xb4, 0x5d, 0x78, 0xf3, 0x99, 0x48, 0x3e, 0x04, 0x5b, 0xb9, 0x09,
|
||||
0xd2, 0x3d, 0x14, 0x05, 0x69, 0x50, 0xe9, 0x57, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x0d, 0x72, 0x37, 0x6c, 0xe3, 0xd1, 0x57, 0x2f, 0x9e, 0xb7, 0xe1, 0x30,
|
||||
0x22, 0xce, 0xe5, 0x66, 0x45, 0x7b, 0x06, 0x0e, 0x06, 0x66, 0xdd, 0x11,
|
||||
0xef, 0xdf, 0x61, 0x52, 0x7d, 0xb9, 0xcf, 0x1e, 0x97, 0xbe, 0x55, 0x00,
|
||||
0x94, 0xcb, 0x50, 0x7c, 0xa0, 0x83, 0x1c, 0x57, 0xf3, 0x72, 0x8c, 0x40,
|
||||
0x07, 0x32, 0x39, 0x54, 0xe8, 0x5a, 0x10, 0x7b, 0x09, 0xc2, 0x02, 0x58,
|
||||
0xb0, 0xeb, 0x23, 0x51, 0x71, 0x1c, 0xc7, 0x71, 0xf0, 0xfd, 0x78, 0x2c,
|
||||
0xe7, 0xa8, 0x53, 0x7c, 0xdd, 0xf6, 0xa3, 0x2b, 0xa9, 0x51, 0xf4, 0x33,
|
||||
0x1d, 0x4d, 0x13, 0x0e, 0x53, 0x6b, 0xde, 0x6b, 0x48, 0x46, 0xa0, 0x01,
|
||||
0xbf, 0x74, 0xf2, 0x14, 0xe5, 0x99, 0x3d, 0x72, 0x37, 0x8e, 0xa9, 0x44,
|
||||
0x61, 0xed, 0xdd, 0x3b, 0x7c, 0x11, 0x28, 0x12, 0xd5, 0xd6, 0x27, 0x78,
|
||||
0x4e, 0xf8, 0xe4, 0x3d, 0xdc, 0x5c, 0x92, 0x0c, 0xea, 0x5b, 0xe2, 0x44,
|
||||
0x71, 0x1c, 0xc7, 0x71, 0x64, 0x55, 0xb2, 0x0d, 0x54, 0x7f, 0x64, 0x72,
|
||||
0x8e, 0xe1, 0x7b, 0x52, 0xf5, 0xe4, 0x20, 0x13, 0xd1, 0xd4, 0x5d, 0x4c,
|
||||
0x33, 0x3d, 0xb6, 0x55, 0x26, 0xed, 0xb0, 0x75, 0xa0, 0xf2, 0x72, 0x51,
|
||||
0x6b, 0xc5, 0x37, 0x23, 0x0d, 0x1d, 0xf5, 0x6f, 0xa6, 0x83, 0x5f, 0x3e,
|
||||
0x1e, 0xb5, 0x18, 0x23, 0xc8, 0x40, 0xae, 0x63, 0x68, 0x79, 0x8e, 0x56,
|
||||
0xb0, 0x33, 0x43, 0x08, 0x5b, 0xac, 0x52, 0x39, 0x71, 0x1c, 0xc7, 0x71,
|
||||
0x9d, 0xf2, 0x00, 0x73, 0xf8, 0x96, 0xbb, 0x43, 0x5b, 0x59, 0xce, 0x07,
|
||||
0xbb, 0x11, 0xc8, 0x43, 0xde, 0xea, 0xb7, 0x34, 0x51, 0xbf, 0xa7, 0x2d,
|
||||
0x33, 0x35, 0xc2, 0x40, 0x1c, 0x81, 0x60, 0x63, 0x60, 0x0b, 0xb6, 0x60,
|
||||
0xbf, 0xb9, 0x38, 0x0c, 0x02, 0x54, 0x53, 0x20, 0xd9, 0xf9, 0xeb, 0x2f,
|
||||
0x7e, 0x5b, 0xdf, 0x58, 0x4b, 0x99, 0x8e, 0x04, 0x27, 0xb4, 0x18, 0x78,
|
||||
0xd6, 0x37, 0x16, 0x60, 0x71, 0x1c, 0xc7, 0x71, 0x74, 0x66, 0x66, 0x66,
|
||||
0xb2, 0xf1, 0x94, 0x20, 0xad, 0x2f, 0xba, 0x68, 0x6a, 0x33, 0xfe, 0x6e,
|
||||
0xa5, 0x51, 0xec, 0x44, 0xab, 0x05, 0x7e, 0x60, 0x48, 0x6b, 0xa5, 0x56,
|
||||
0x38, 0x3d, 0xc7, 0x24, 0x99, 0x99, 0x99, 0x59, 0x45, 0x17, 0x5d, 0x74,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f
|
||||
};
|
||||
|
||||
unsigned char poseidon_constants_11[] = {
|
||||
0xb0, 0xf1, 0x1f, 0x2e, 0xf8, 0x8b, 0xb5, 0x07, 0x8d, 0xc4, 0xe1, 0x46,
|
||||
0x99, 0x23, 0x9f, 0x06, 0xcc, 0x64, 0x13, 0x45, 0x9e, 0xb1, 0xdf, 0x5f,
|
||||
0xfa, 0x8e, 0x0f, 0x6f, 0x33, 0xd8, 0xfe, 0x19, 0x0a, 0x25, 0x8b, 0x20,
|
||||
0xe1, 0x2c, 0xcc, 0x36, 0x17, 0x3f, 0x03, 0x05, 0xe1, 0x13, 0xce, 0x35,
|
||||
0xd4, 0xc9, 0xe7, 0x65, 0x1f, 0x7f, 0x2c, 0x7a, 0x93, 0x9f, 0x34, 0x19,
|
||||
0x4d, 0x22, 0xf2, 0x7f, 0x8e, 0xa8, 0xb0, 0x51, 0x22, 0x8c, 0x91, 0x30,
|
||||
0xa5, 0x9c, 0xff, 0x31, 0x0e, 0x04, 0xc9, 0x19, 0x69, 0x60, 0xee, 0x0f,
|
||||
0xc5, 0xa5, 0xeb, 0x6b, 0xb0, 0xa4, 0xaa, 0x5d, 0x1c, 0x4e, 0xeb, 0x73,
|
||||
0xec, 0x94, 0xb7, 0x15, 0xce, 0x64, 0x1c, 0x60, 0x3e, 0xa3, 0x6b, 0x4a,
|
||||
0x87, 0x7a, 0x25, 0x2f, 0xfc, 0xc3, 0x17, 0x20, 0x06, 0xb6, 0x22, 0x7d,
|
||||
0xca, 0xea, 0x8b, 0x3b, 0xf9, 0xca, 0xa4, 0x32, 0xd2, 0xb7, 0x2e, 0x01,
|
||||
0x4f, 0x31, 0xc9, 0x2f, 0x10, 0xbf, 0x41, 0x4c, 0xe6, 0xfe, 0xba, 0x49,
|
||||
0xe5, 0x89, 0xbb, 0x77, 0x7e, 0xe8, 0x83, 0x1c, 0x72, 0xe7, 0x26, 0x58,
|
||||
0x24, 0x90, 0x9d, 0x1e, 0xb3, 0x20, 0xc8, 0x64, 0x84, 0xa3, 0x21, 0x5d,
|
||||
0x06, 0x64, 0x30, 0x4b, 0x19, 0x35, 0x96, 0x1e, 0xd1, 0x86, 0x57, 0x4a,
|
||||
0xb3, 0x8e, 0xd6, 0x7d, 0xaf, 0xd1, 0xde, 0x3f, 0xa2, 0x2c, 0x32, 0x0a,
|
||||
0xbb, 0xea, 0x4a, 0x46, 0x64, 0x1b, 0x72, 0x14, 0x75, 0x85, 0x1b, 0x4d,
|
||||
0x11, 0x02, 0x5f, 0x6f, 0x06, 0xdd, 0xd3, 0x6f, 0xbc, 0xcc, 0x77, 0x2e,
|
||||
0xb7, 0x43, 0xf4, 0x19, 0x9d, 0x2c, 0x4b, 0x2b, 0x0c, 0x41, 0xb9, 0x02,
|
||||
0xdc, 0x14, 0x5a, 0x67, 0xd4, 0x56, 0xca, 0x45, 0x65, 0xd2, 0x7d, 0x17,
|
||||
0xcd, 0x91, 0xdd, 0x45, 0xd8, 0xa8, 0xd8, 0x4b, 0xc9, 0x2b, 0xf2, 0x35,
|
||||
0xc1, 0x81, 0x6c, 0x33, 0xbc, 0xf4, 0x4d, 0x04, 0xfd, 0xb0, 0x91, 0x2b,
|
||||
0xcf, 0xad, 0x39, 0x45, 0x35, 0xb2, 0xac, 0x2e, 0x2f, 0x13, 0xe3, 0x0b,
|
||||
0x40, 0x59, 0x33, 0x07, 0xe3, 0xa5, 0xa1, 0x4d, 0x0e, 0x79, 0x05, 0x4c,
|
||||
0x36, 0x9b, 0xf1, 0x7f, 0x90, 0x50, 0x46, 0x25, 0x87, 0x10, 0x24, 0x3f,
|
||||
0x52, 0x5d, 0xff, 0x18, 0xad, 0xed, 0x78, 0x52, 0x00, 0x9c, 0xfe, 0x66,
|
||||
0x22, 0x24, 0xe0, 0x62, 0x13, 0xe2, 0x6f, 0x67, 0xd9, 0xe3, 0x6c, 0x64,
|
||||
0x6b, 0xa6, 0xea, 0x53, 0x61, 0x56, 0x8a, 0x33, 0x81, 0x35, 0xe5, 0x0f,
|
||||
0x35, 0xc9, 0xf3, 0x59, 0xc2, 0xa8, 0x92, 0x73, 0x69, 0x66, 0x05, 0x70,
|
||||
0xa1, 0x5f, 0xec, 0x4e, 0x3d, 0x6b, 0xc0, 0x78, 0xa4, 0xcb, 0xfc, 0x7e,
|
||||
0x44, 0x8c, 0xc4, 0x1b, 0x25, 0x70, 0x8f, 0x27, 0x87, 0x76, 0x2d, 0x4f,
|
||||
0x70, 0xb0, 0xea, 0x7a, 0x92, 0x43, 0x8c, 0x00, 0xed, 0xfd, 0x3b, 0x23,
|
||||
0x69, 0x71, 0x8e, 0x49, 0x83, 0xc3, 0x4e, 0x37, 0xab, 0x18, 0xd9, 0x30,
|
||||
0x4d, 0x48, 0x5e, 0x7e, 0xbc, 0x5a, 0x1a, 0x24, 0x34, 0xed, 0x19, 0x57,
|
||||
0xf4, 0xf4, 0x0d, 0x02, 0x0c, 0x57, 0xde, 0x6d, 0x40, 0x39, 0x1f, 0x71,
|
||||
0x9c, 0xa1, 0xb0, 0x28, 0x2d, 0x05, 0xb9, 0x6b, 0x85, 0x7a, 0x4c, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32,
|
||||
0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c,
|
||||
0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11,
|
||||
0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a,
|
||||
0x70, 0x3d, 0x0a, 0x57, 0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25,
|
||||
0xdb, 0xb6, 0x6d, 0x3b, 0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48,
|
||||
0xbd, 0xf7, 0xde, 0x7b, 0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c,
|
||||
0xc8, 0x42, 0x16, 0x32, 0xaa, 0xaa, 0xaa, 0x5a, 0x70, 0x3d, 0x0a, 0x57,
|
||||
0xec, 0xc4, 0x4e, 0x2c, 0x7b, 0x09, 0xed, 0x25, 0xdb, 0xb6, 0x6d, 0x3b,
|
||||
0x61, 0xb9, 0xa7, 0x11, 0x88, 0x88, 0x88, 0x48, 0xbd, 0xf7, 0xde, 0x7b,
|
||||
0x00, 0x00, 0x00, 0x04, 0xc1, 0x07, 0x1f, 0x7c, 0x87, 0x87, 0x87, 0x47,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7c, 0xec, 0xe8, 0x54, 0x5f, 0xc4, 0x1c, 0x7e,
|
||||
0x02, 0x38, 0x4e, 0x55, 0x86, 0x80, 0x6d, 0x71, 0xc3, 0xa8, 0x98, 0x4a,
|
||||
0x2b, 0xaa, 0x86, 0x63, 0x60, 0xd7, 0x4f, 0x2e, 0xb4, 0xac, 0xce, 0x78,
|
||||
0xbd, 0x1c, 0x4f, 0x55, 0x6b, 0x2c, 0x33, 0x64, 0x8c, 0x56, 0x30, 0x43,
|
||||
0xd8, 0x89, 0x9d, 0x58, 0xdd, 0x29, 0xc3, 0x15, 0x02, 0x15, 0x5b, 0x4f,
|
||||
0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x8d, 0x4d, 0x53, 0x6e, 0xf2, 0x33, 0x15,
|
||||
0xed, 0x3f, 0x16, 0x06, 0x43, 0xab, 0x59, 0x54, 0x1a, 0x62, 0xcd, 0x3a,
|
||||
0xda, 0x77, 0xa8, 0x51, 0x42, 0x58, 0x05, 0x55, 0x39, 0xeb, 0xd1, 0x45,
|
||||
0xb6, 0x6d, 0xdb, 0x76, 0x02, 0x15, 0x5b, 0x4f, 0xb9, 0x5a, 0x8c, 0x36,
|
||||
0x9a, 0x63, 0x3e, 0x3c, 0xe6, 0x28, 0x72, 0x36, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0xfa, 0xe0, 0x07, 0x07, 0x30, 0xb3, 0x56, 0x39, 0x91, 0x42, 0x86, 0x38,
|
||||
0xda, 0xd2, 0x8f, 0x67, 0x75, 0xca, 0x3e, 0x69, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0x11, 0x11, 0x11, 0xdc, 0xb9, 0x0c, 0x03, 0x9a, 0x63, 0x3e, 0x3c,
|
||||
0x54, 0xdc, 0x52, 0x1f, 0xf3, 0xc8, 0xb6, 0x6b, 0x96, 0x31, 0xf8, 0x1b,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x4c, 0x37, 0x80, 0x4b, 0x31, 0x99, 0xd0, 0x09,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0xa2, 0x72, 0xfb, 0x33, 0x11, 0xd8, 0x0e, 0x65,
|
||||
0x00, 0x00, 0x00, 0x08, 0x9a, 0x8d, 0x4d, 0x53, 0xe6, 0x28, 0x72, 0x36,
|
||||
0xf3, 0xc8, 0xb6, 0x6b, 0xef, 0x80, 0xab, 0x77, 0x4d, 0x49, 0x25, 0x2b,
|
||||
0x7e, 0x10, 0x08, 0x1b, 0x70, 0x22, 0x72, 0x66, 0x8b, 0xe6, 0x06, 0x3a,
|
||||
0x58, 0xb9, 0x7e, 0x02, 0x97, 0xf4, 0xc2, 0x4f, 0x6b, 0x9a, 0x68, 0x53,
|
||||
0x0f, 0x0f, 0x0f, 0x0f, 0x6e, 0xf2, 0x33, 0x15, 0x51, 0x89, 0xdb, 0x3b,
|
||||
0x96, 0x31, 0xf8, 0x1b, 0x4d, 0x49, 0x25, 0x2b, 0xe2, 0xe0, 0x5c, 0x64,
|
||||
0xb6, 0x1d, 0x73, 0x13, 0x38, 0x1b, 0xfd, 0x49, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0x2a, 0x6b, 0xb4, 0x17, 0x7e, 0xa9, 0x6e, 0x72, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0xed, 0x3f, 0x16, 0x06, 0xfa, 0xe0, 0x07, 0x07,
|
||||
0x20, 0xee, 0x0b, 0x07, 0x7e, 0x10, 0x08, 0x1b, 0xb6, 0x1d, 0x73, 0x13,
|
||||
0xca, 0x4a, 0x44, 0x68, 0x1c, 0x93, 0xbc, 0x37, 0xfa, 0x14, 0x8b, 0x55,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xcb, 0x04, 0x09, 0x46, 0x27, 0x8f, 0x96, 0x07,
|
||||
0x28, 0xaf, 0xa1, 0x3c, 0x43, 0xab, 0x59, 0x54, 0x30, 0xb3, 0x56, 0x39,
|
||||
0x4c, 0x37, 0x80, 0x4b, 0x70, 0x22, 0x72, 0x66, 0x38, 0x1b, 0xfd, 0x49,
|
||||
0x1c, 0x93, 0xbc, 0x37, 0xfb, 0xdd, 0xff, 0x41, 0x73, 0x22, 0xa8, 0x31,
|
||||
0xd4, 0xc3, 0x26, 0x2b, 0xe7, 0x8c, 0xce, 0x35, 0x03, 0x29, 0x9c, 0x43,
|
||||
0xcc, 0xcc, 0xcc, 0x6c, 0x1a, 0x62, 0xcd, 0x3a, 0x91, 0x42, 0x86, 0x38,
|
||||
0x31, 0x99, 0xd0, 0x09, 0x8b, 0xe6, 0x06, 0x3a, 0xe1, 0x2c, 0xce, 0x5d,
|
||||
0xfa, 0x14, 0x8b, 0x55, 0x73, 0x22, 0xa8, 0x31, 0xaf, 0x9f, 0x0d, 0x2d,
|
||||
0xd8, 0xf1, 0xd2, 0x43, 0x41, 0x60, 0x7a, 0x48, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xda, 0x77, 0xa8, 0x51, 0xda, 0xd2, 0x8f, 0x67,
|
||||
0xb8, 0xa5, 0x62, 0x5f, 0x58, 0xb9, 0x7e, 0x02, 0x2a, 0x6b, 0xb4, 0x17,
|
||||
0xae, 0xe0, 0xac, 0x31, 0xd4, 0xc3, 0x26, 0x2b, 0xd8, 0xf1, 0xd2, 0x43,
|
||||
0x38, 0xc4, 0xc5, 0x55, 0x39, 0x3d, 0x1f, 0x4c, 0x81, 0xa8, 0x99, 0x14,
|
||||
0xa2, 0x8b, 0x2e, 0x7a, 0x42, 0x58, 0x05, 0x55, 0x75, 0xca, 0x3e, 0x69,
|
||||
0xa2, 0x72, 0xfb, 0x33, 0x97, 0xf4, 0xc2, 0x4f, 0x7e, 0xa9, 0x6e, 0x72,
|
||||
0xcb, 0x04, 0x09, 0x46, 0xe7, 0x8c, 0xce, 0x35, 0x41, 0x60, 0x7a, 0x48,
|
||||
0x39, 0x3d, 0x1f, 0x4c, 0xc3, 0x27, 0xbb, 0x1a, 0x86, 0xb4, 0x97, 0x00,
|
||||
0xc8, 0x42, 0x16, 0x32, 0x39, 0xeb, 0xd1, 0x45, 0xe9, 0xd8, 0x07, 0x6f,
|
||||
0x11, 0xd8, 0x0e, 0x65, 0x6b, 0x9a, 0x68, 0x53, 0x2f, 0x77, 0x47, 0x79,
|
||||
0x27, 0x8f, 0x96, 0x07, 0x03, 0x29, 0x9c, 0x43, 0xca, 0xa1, 0x4c, 0x7c,
|
||||
0x81, 0xa8, 0x99, 0x14, 0x86, 0xb4, 0x97, 0x00, 0x0c, 0xd8, 0x29, 0x37,
|
||||
0x55, 0x55, 0x55, 0x35, 0xcc, 0xab, 0xe7, 0x58, 0x82, 0xaa, 0xb7, 0x06,
|
||||
0x3c, 0x2a, 0x3d, 0x61, 0x45, 0xbd, 0xcc, 0x4b, 0xa9, 0x83, 0x44, 0x56,
|
||||
0x16, 0xe6, 0x58, 0x6e, 0x70, 0x4b, 0x3a, 0x44, 0xe2, 0x3b, 0x37, 0x60,
|
||||
0xf0, 0x3b, 0x41, 0x1e, 0x44, 0x40, 0x84, 0x5a, 0x63, 0x5d, 0x4d, 0x78,
|
||||
0x22, 0x80, 0xb3, 0x0f, 0xe0, 0x85, 0xec, 0x77, 0xe5, 0x3d, 0xda, 0x27,
|
||||
0x55, 0xf9, 0xfd, 0x44, 0x38, 0xa7, 0x0f, 0x0a, 0x2f, 0xec, 0xda, 0x34,
|
||||
0x24, 0xef, 0x00, 0x40, 0x54, 0x9a, 0x0b, 0x27, 0xf9, 0x85, 0xf4, 0x16,
|
||||
0x14, 0x1f, 0x17, 0x30, 0x1d, 0xb0, 0xdf, 0x31, 0x55, 0x55, 0x55, 0x35,
|
||||
0x98, 0x36, 0x7e, 0x31, 0xd0, 0xda, 0x0a, 0x16, 0xae, 0xb0, 0x6a, 0x00,
|
||||
0x0e, 0x7a, 0x7e, 0x6d, 0x93, 0x81, 0x4d, 0x21, 0x45, 0x5a, 0x4d, 0x20,
|
||||
0x42, 0x5d, 0xfd, 0x49, 0x28, 0xc5, 0xe2, 0x75, 0x45, 0x85, 0x03, 0x2c,
|
||||
0xfc, 0x78, 0x72, 0x15, 0x98, 0x9c, 0x88, 0x0b, 0xed, 0x8f, 0x6f, 0x2b,
|
||||
0x55, 0x75, 0x17, 0x5f, 0xe5, 0xed, 0x21, 0x52, 0x5a, 0x34, 0x10, 0x7d,
|
||||
0x42, 0x25, 0x57, 0x6a, 0xa4, 0xb2, 0xe6, 0x2e, 0x05, 0xa8, 0xc4, 0x17,
|
||||
0xff, 0x9c, 0x7f, 0x6f, 0x23, 0x64, 0x17, 0x44, 0x85, 0xa9, 0x6b, 0x46,
|
||||
0x66, 0x58, 0x1b, 0x3b, 0x55, 0x55, 0x55, 0x35, 0x55, 0xf6, 0xca, 0x06,
|
||||
0x68, 0x75, 0xa9, 0x55, 0x54, 0x44, 0x4f, 0x61, 0x65, 0x3b, 0x96, 0x37,
|
||||
0xa9, 0x89, 0xb6, 0x47, 0x70, 0x8a, 0x8d, 0x74, 0x09, 0x53, 0x9e, 0x5e,
|
||||
0x92, 0x56, 0x2b, 0x34, 0x3e, 0x9d, 0x12, 0x0a, 0x54, 0x98, 0xf8, 0x29,
|
||||
0xde, 0xa0, 0xdd, 0x11, 0x46, 0x3e, 0x0f, 0x70, 0xff, 0xee, 0x0d, 0x7c,
|
||||
0x48, 0xe0, 0xe1, 0x6d, 0xb6, 0x5a, 0x2f, 0x7c, 0xb1, 0xb2, 0xf7, 0x2f,
|
||||
0xda, 0x64, 0x33, 0x7e, 0x87, 0x48, 0x48, 0x7e, 0x95, 0x6c, 0xd5, 0x5c,
|
||||
0x26, 0x8f, 0xc9, 0x3e, 0xf9, 0x5e, 0x99, 0x38, 0xf5, 0x32, 0xc2, 0x66,
|
||||
0x55, 0x55, 0x55, 0x35, 0x7f, 0xb1, 0x0f, 0x47, 0xac, 0x5d, 0xec, 0x76,
|
||||
0xba, 0x59, 0xc4, 0x7f, 0xfb, 0xdc, 0x32, 0x46, 0xe8, 0x83, 0xe0, 0x0a,
|
||||
0xf4, 0xb8, 0x56, 0x36, 0x07, 0x4f, 0x7f, 0x29, 0x31, 0xb8, 0xf4, 0x2c,
|
||||
0x7e, 0x42, 0xbd, 0x3e, 0xf1, 0x9d, 0x40, 0x73, 0x51, 0xf1, 0xce, 0x31,
|
||||
0x35, 0x7b, 0x0e, 0x48, 0x9e, 0xb9, 0x6e, 0x3b, 0x37, 0x00, 0x57, 0x0c,
|
||||
0x15, 0x25, 0x74, 0x64, 0xdd, 0x39, 0x64, 0x5c, 0x0a, 0x5d, 0x08, 0x2b,
|
||||
0xf5, 0xe6, 0x0c, 0x3f, 0xe6, 0xce, 0x30, 0x2d, 0x27, 0xc4, 0x07, 0x19,
|
||||
0x82, 0xfb, 0x44, 0x08, 0x7b, 0x94, 0x23, 0x69, 0x55, 0x55, 0x55, 0x35,
|
||||
0xc7, 0xbe, 0xaf, 0x49, 0xa6, 0x9a, 0x26, 0x30, 0x7c, 0xb2, 0x66, 0x35,
|
||||
0xe4, 0x83, 0x46, 0x62, 0xe3, 0x1c, 0x23, 0x07, 0x36, 0x2e, 0xd3, 0x00,
|
||||
0xe2, 0x65, 0xc8, 0x51, 0x0c, 0x09, 0x5c, 0x74, 0x13, 0x94, 0xf9, 0x67,
|
||||
0x4e, 0x07, 0x26, 0x03, 0xba, 0xb4, 0x3a, 0x7f, 0x38, 0xb4, 0x7c, 0x6a,
|
||||
0x44, 0x7a, 0x1c, 0x7b, 0xeb, 0xf9, 0x8b, 0x0b, 0x16, 0xf8, 0x23, 0x36,
|
||||
0x7b, 0x89, 0x79, 0x44, 0x80, 0xfe, 0x33, 0x2a, 0x7d, 0x59, 0xe2, 0x1b,
|
||||
0x7b, 0xe1, 0xb0, 0x15, 0x21, 0xcb, 0x47, 0x77, 0x23, 0x1a, 0xc0, 0x14,
|
||||
0x5b, 0x86, 0x06, 0x2d, 0x55, 0x55, 0x55, 0x35, 0x04, 0xb5, 0x47, 0x27,
|
||||
0x1d, 0xb7, 0x22, 0x44, 0xcc, 0x9e, 0xce, 0x7d, 0xf2, 0x75, 0x78, 0x78,
|
||||
0x7b, 0x98, 0x99, 0x12, 0xbd, 0x34, 0xe4, 0x43, 0xf0, 0x0a, 0x96, 0x43,
|
||||
0xf1, 0x50, 0x1d, 0x0b, 0x86, 0x78, 0xc9, 0x59, 0xc7, 0x78, 0xec, 0x16,
|
||||
0x71, 0xaa, 0x0c, 0x56, 0xbf, 0x92, 0xe2, 0x3a, 0xb5, 0x6e, 0x2d, 0x18,
|
||||
0xe2, 0xc7, 0x31, 0x67, 0x10, 0xab, 0x9f, 0x27, 0x27, 0x1e, 0xf3, 0x69,
|
||||
0xaf, 0x57, 0x42, 0x4c, 0x4f, 0xb4, 0x30, 0x35, 0x00, 0x54, 0xb0, 0x4a,
|
||||
0xa2, 0x00, 0x2a, 0x4a, 0x3d, 0x49, 0x58, 0x73, 0xf9, 0x16, 0xb0, 0x01,
|
||||
0x55, 0x55, 0x55, 0x35, 0xe4, 0xd5, 0x3f, 0x2e, 0xee, 0x84, 0x47, 0x51,
|
||||
0x3f, 0x84, 0xb9, 0x6b, 0x49, 0xb9, 0xae, 0x57, 0x32, 0x5a, 0x04, 0x02,
|
||||
0xe1, 0x6a, 0xf1, 0x4b, 0x30, 0x53, 0xf1, 0x05, 0x29, 0x74, 0x75, 0x76,
|
||||
0x4a, 0x15, 0x5b, 0x5d, 0xe1, 0xaa, 0x15, 0x1b, 0x62, 0xf5, 0xe8, 0x76,
|
||||
0x03, 0xc1, 0xaa, 0x06, 0x13, 0x59, 0xc8, 0x40, 0x84, 0x49, 0xc8, 0x1f,
|
||||
0x85, 0x98, 0x55, 0x6b, 0xed, 0x38, 0x45, 0x17, 0xb8, 0xc7, 0xf7, 0x69,
|
||||
0xc3, 0x87, 0xd0, 0x17, 0x0a, 0x93, 0xb7, 0x35, 0xc2, 0x45, 0x75, 0x34,
|
||||
0x7a, 0x78, 0xff, 0x51, 0x26, 0xd2, 0x59, 0x13, 0x55, 0x55, 0x55, 0x35,
|
||||
0x48, 0x38, 0xf7, 0x6e, 0x4f, 0x7d, 0xc7, 0x70, 0x32, 0x5d, 0x5b, 0x7a,
|
||||
0x85, 0x35, 0x9c, 0x07, 0x40, 0x08, 0x30, 0x5c, 0x64, 0x69, 0x27, 0x7a,
|
||||
0x07, 0x34, 0x90, 0x6c, 0x6e, 0xa6, 0x8e, 0x70, 0xd4, 0xf2, 0xf7, 0x59,
|
||||
0x0f, 0x13, 0x17, 0x5d, 0xa8, 0xa9, 0x01, 0x29, 0xad, 0xfd, 0x9a, 0x77,
|
||||
0x3c, 0x77, 0xc7, 0x67, 0xd0, 0x43, 0xb1, 0x3f, 0x97, 0x76, 0xe4, 0x72,
|
||||
0xd4, 0x82, 0x9a, 0x25, 0xec, 0xef, 0xc3, 0x03, 0xdc, 0xf9, 0x94, 0x3f,
|
||||
0xa4, 0x76, 0x88, 0x5a, 0xb8, 0x0f, 0x03, 0x76, 0x58, 0x87, 0x42, 0x11,
|
||||
0x28, 0xb7, 0xb0, 0x1d, 0x55, 0x55, 0x55, 0x35, 0x2f, 0xe6, 0x44, 0x75,
|
||||
0xf3, 0x0b, 0xe8, 0x68, 0x59, 0x72, 0x1f, 0x16, 0x8c, 0xd0, 0xe3, 0x3c,
|
||||
0xcc, 0xfc, 0x77, 0x05, 0xd6, 0x4b, 0x48, 0x78, 0x51, 0x88, 0x4c, 0x5f,
|
||||
0x30, 0x43, 0x9c, 0x2f, 0x49, 0x72, 0xba, 0x01, 0xba, 0xae, 0xfe, 0x0b,
|
||||
0x94, 0x3f, 0xe7, 0x71, 0x9d, 0xfa, 0x37, 0x06, 0xfc, 0xa2, 0x99, 0x6f,
|
||||
0xe2, 0x0d, 0xcf, 0x4b, 0x63, 0x76, 0xec, 0x49, 0xa8, 0xb5, 0x84, 0x0b,
|
||||
0x84, 0xa3, 0x75, 0x4f, 0x5e, 0x56, 0xdd, 0x37, 0x1a, 0x7d, 0x6e, 0x34,
|
||||
0x95, 0x39, 0x80, 0x1e, 0x58, 0x2e, 0x22, 0x50, 0xd3, 0x46, 0x93, 0x1e,
|
||||
0x55, 0x55, 0x55, 0x35, 0xf5, 0x96, 0x5a, 0x5f, 0x9b, 0xc8, 0x58, 0x50,
|
||||
0x3e, 0x03, 0xab, 0x16, 0xd5, 0xc6, 0x4c, 0x7f, 0x3f, 0x82, 0xf6, 0x34,
|
||||
0x1c, 0x29, 0x22, 0x16, 0x40, 0xdb, 0xe7, 0x71, 0x8b, 0x8a, 0x4b, 0x55,
|
||||
0x45, 0xbf, 0xd1, 0x68, 0x4c, 0xbb, 0xe3, 0x43, 0x1b, 0x96, 0x28, 0x3d,
|
||||
0x36, 0x4f, 0xdb, 0x58, 0xa8, 0x39, 0xac, 0x38, 0xd3, 0xeb, 0x90, 0x18,
|
||||
0x2f, 0xb7, 0x06, 0x1a, 0x5a, 0x82, 0x53, 0x13, 0x77, 0xaf, 0xe0, 0x4d,
|
||||
0x9e, 0xe9, 0x39, 0x79, 0xb7, 0xf6, 0xa2, 0x3c, 0x41, 0x9d, 0x14, 0x59,
|
||||
0x01, 0x33, 0x36, 0x20, 0x15, 0xe0, 0xe4, 0x15, 0x55, 0x55, 0x55, 0x35,
|
||||
0x58, 0x48, 0x07, 0x36, 0x3f, 0x43, 0x1e, 0x05, 0x33, 0x9e, 0x14, 0x45,
|
||||
0x69, 0xc8, 0x16, 0x63, 0x5f, 0xab, 0x77, 0x26, 0xf4, 0x08, 0xb0, 0x2e,
|
||||
0xf8, 0x31, 0x79, 0x29, 0x37, 0xc9, 0x37, 0x28, 0x55, 0x62, 0xcc, 0x43,
|
||||
0xeb, 0x6b, 0xe4, 0x03, 0xfe, 0x82, 0x50, 0x20, 0x2d, 0xdf, 0xf2, 0x7d,
|
||||
0xba, 0x07, 0xe2, 0x0e, 0x88, 0x1e, 0x82, 0x2b, 0x87, 0x54, 0x26, 0x39,
|
||||
0xdd, 0xee, 0x3e, 0x0b, 0xdc, 0xbf, 0x93, 0x1a, 0x8a, 0xce, 0xa6, 0x39,
|
||||
0x5b, 0xaf, 0x8f, 0x00, 0x7a, 0xad, 0x27, 0x71, 0x1e, 0x76, 0xd8, 0x58,
|
||||
0x96, 0x36, 0xa3, 0x14, 0x55, 0x55, 0x55, 0x35, 0x76, 0x27, 0x76, 0x62,
|
||||
0xa4, 0x9f, 0x05, 0x5a, 0x41, 0x28, 0x49, 0x12, 0x24, 0x18, 0x49, 0x12,
|
||||
0x4f, 0xc2, 0xa5, 0x25, 0x0e, 0x0e, 0x3c, 0x3c, 0x01, 0xa7, 0x65, 0x00,
|
||||
0x92, 0x9e, 0x17, 0x36, 0xa1, 0x7a, 0x92, 0x27, 0xcf, 0x74, 0xba, 0x4d,
|
||||
0xcb, 0x6f, 0x66, 0x68, 0xd8, 0x89, 0x9d, 0x58, 0xb6, 0x6d, 0xdb, 0x76,
|
||||
0x11, 0x11, 0x11, 0x11, 0x00, 0x00, 0x00, 0x08, 0x0f, 0x0f, 0x0f, 0x0f,
|
||||
0x38, 0x8e, 0xe3, 0x78, 0x28, 0xaf, 0xa1, 0x3c, 0xcc, 0xcc, 0xcc, 0x6c,
|
||||
0x79, 0x9e, 0xe7, 0x79, 0xa2, 0x8b, 0x2e, 0x7a, 0xc8, 0x42, 0x16, 0x32
|
||||
};
|
||||
} // namespace poseidon_constants
|
||||
#endif
|
||||
@@ -1,9 +1,13 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON_KERNELS_H
|
||||
#define POSEIDON_KERNELS_H
|
||||
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S, int T>
|
||||
__global__ void prepare_poseidon_states(S* states, size_t number_of_states, S domain_tag, bool aligned)
|
||||
__global__ void prepare_poseidon_states(const S* input, S* states, unsigned int number_of_states, const S domain_tag)
|
||||
{
|
||||
int idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int state_number = idx / T;
|
||||
@@ -16,27 +20,27 @@ namespace poseidon {
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
if (aligned) {
|
||||
prepared_element = states[idx];
|
||||
} else {
|
||||
prepared_element = states[idx - 1];
|
||||
}
|
||||
prepared_element = input[idx - state_number - 1];
|
||||
}
|
||||
|
||||
// We need __syncthreads here if the state is not aligned
|
||||
// because then we need to shift the vector [A, B, 0] -> [D, A, B]
|
||||
if (!aligned) { __syncthreads(); }
|
||||
|
||||
// Store element in state
|
||||
states[idx] = prepared_element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_alpha_five(S element)
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
S result = S::sqr(element);
|
||||
result = S::sqr(result);
|
||||
return result * element;
|
||||
S result2 = S::sqr(element);
|
||||
switch (alpha) {
|
||||
case 3:
|
||||
return result2 * element;
|
||||
case 5:
|
||||
return S::sqr(result2) * element;
|
||||
case 7:
|
||||
return S::sqr(result2) * result2 * element;
|
||||
case 11:
|
||||
return S::sqr(S::sqr(result2)) * result2 * element;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
@@ -71,7 +75,7 @@ namespace poseidon {
|
||||
element = element + constants.round_constants[rc_offset + element_number];
|
||||
rc_offset += T;
|
||||
}
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
if (!skip_rc) { element = element + constants.round_constants[rc_offset + element_number]; }
|
||||
|
||||
// Multiply all the states by mds matrix
|
||||
@@ -111,7 +115,7 @@ namespace poseidon {
|
||||
__device__ S partial_round(S state[T], size_t rc_offset, int round_number, const PoseidonConstants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = sbox_alpha_five(element);
|
||||
element = sbox_el(element, constants.alpha);
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
|
||||
S* sparse_matrix = &constants.sparse_matrices[(T * 2 - 1) * round_number];
|
||||
@@ -155,22 +159,58 @@ namespace poseidon {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(S* states, size_t number_of_states, S* out)
|
||||
__global__ void
|
||||
squeeze_states_kernel(const S* states, unsigned int number_of_states, unsigned int rate, unsigned int offset, S* out)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + 1];
|
||||
for (int i = 0; i < rate; i++) {
|
||||
out[idx * rate + i] = states[idx * T + offset + i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void copy_recursive(S* state, size_t number_of_states, S* out)
|
||||
cudaError_t poseidon_permutation_kernel(
|
||||
const S* input,
|
||||
S* out,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const PoseidonConstants<S>& constants,
|
||||
cudaStream_t& stream)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
S* states;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states, number_of_states * T * sizeof(S), stream));
|
||||
|
||||
state[(idx / (T - 1) * T) + (idx % (T - 1)) + 1] = out[idx];
|
||||
prepare_poseidon_states<S, T>
|
||||
<<<PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T), 0, stream>>>(
|
||||
input, states, number_of_states, constants.domain_tag);
|
||||
|
||||
size_t rc_offset = 0;
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, FIRST_FULL_ROUNDS, constants);
|
||||
rc_offset += T * (constants.full_rounds_half + 1);
|
||||
|
||||
partial_rounds<S, T><<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, rc_offset, constants);
|
||||
rc_offset += constants.partial_rounds;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC::number_of_full_blocks(T, number_of_states), PKC::number_of_threads(T),
|
||||
sizeof(S) * PKC::hashes_per_block(T) * T, stream>>>(
|
||||
states, number_of_states, rc_offset, SECOND_FULL_ROUNDS, constants);
|
||||
|
||||
squeeze_states_kernel<S, T>
|
||||
<<<PKC::number_of_singlehash_blocks(number_of_states), PKC::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_len, 1, out);
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(states, stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -8,132 +8,87 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "poseidon/kernels.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
#include "hash/hash.cuh"
|
||||
using namespace hash;
|
||||
|
||||
/**
|
||||
* @namespace poseidon
|
||||
* Implementation of the [Poseidon hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon {
|
||||
#define FIRST_FULL_ROUNDS true
|
||||
#define SECOND_FULL_ROUNDS false
|
||||
|
||||
/**
|
||||
* For most of the Poseidon configurations this is the case
|
||||
* TODO: Add support for different full rounds numbers
|
||||
*/
|
||||
const int FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConstants
|
||||
* This constants are enough to define a Poseidon instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct PoseidonConstants {
|
||||
int arity;
|
||||
int partial_rounds;
|
||||
int full_rounds_half;
|
||||
S* round_constants = nullptr;
|
||||
S* mds_matrix = nullptr;
|
||||
S* non_sparse_matrix = nullptr;
|
||||
S* sparse_matrices = nullptr;
|
||||
S domain_tag;
|
||||
};
|
||||
|
||||
/**
|
||||
* @class PoseidonKernelsConfiguration
|
||||
* Describes the logic of deriving CUDA kernels parameters
|
||||
* such as the number of threads and the number of blocks
|
||||
*/
|
||||
template <int T>
|
||||
class PoseidonKernelsConfiguration
|
||||
class Poseidon : public Hasher<S, S>
|
||||
{
|
||||
public:
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {T} elements in each state, and {number_of_states} states total
|
||||
static const int number_of_threads = 256 / T * T;
|
||||
const std::size_t device_id;
|
||||
PoseidonConstants<S> constants;
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
static const int singlehash_block_size = 128;
|
||||
|
||||
static const int hashes_per_block = number_of_threads / T;
|
||||
|
||||
static int number_of_full_blocks(size_t number_of_states)
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
int total_number_of_threads = number_of_states * T;
|
||||
return total_number_of_threads / number_of_threads +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads);
|
||||
cudaError_t permutation_error;
|
||||
#define P_PERM_T(width) \
|
||||
case width: \
|
||||
permutation_error = poseidon_permutation_kernel<S, width>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants, ctx.stream); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P_PERM_T(3)
|
||||
P_PERM_T(5)
|
||||
P_PERM_T(9)
|
||||
P_PERM_T(12)
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [3, 5, 9, 12]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(permutation_error);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
static int number_of_singlehash_blocks(size_t number_of_states)
|
||||
Poseidon(
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
device_context::DeviceContext& ctx)
|
||||
: Hasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
return number_of_states / singlehash_block_size + static_cast<bool>(number_of_states % singlehash_block_size);
|
||||
PoseidonConstants<S> constants;
|
||||
CHK_STICKY(create_optimized_poseidon_constants(
|
||||
arity, alpha, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, &constants, ctx));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon(int arity, device_context::DeviceContext& ctx)
|
||||
: Hasher<S, S>(arity + 1, arity, arity, 1), device_id(ctx.device_id)
|
||||
{
|
||||
PoseidonConstants<S> constants{};
|
||||
CHK_STICKY(init_optimized_poseidon_constants(arity, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_optimized_poseidon_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
template <int T>
|
||||
using PKC = PoseidonKernelsConfiguration<T>;
|
||||
|
||||
/**
|
||||
* @struct PoseidonConfig
|
||||
* Struct that encodes various Poseidon parameters.
|
||||
*/
|
||||
struct PoseidonConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
bool input_is_a_state; /**< If true, input is considered to be a states vector, holding the preimages
|
||||
* in aligned or not aligned format. Memory under the input pointer will be used for states
|
||||
* If false, fresh states memory will be allocated and input will be copied into it */
|
||||
bool aligned; /**< If true - input should be already aligned for poseidon permutation.
|
||||
* Aligned format: [0, A, B, 0, C, D, ...] (as you might get by using loop_state)
|
||||
* not aligned format: [A, B, 0, C, D, 0, ...] (as you might get from cudaMemcpy2D) */
|
||||
bool loop_state; /**< If true, hash results will also be copied in the input pointer in aligned format */
|
||||
bool is_async; /**< Whether to run the Poseidon asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static PoseidonConfig default_poseidon_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
PoseidonConfig config = {
|
||||
ctx, // ctx
|
||||
false, // are_inputes_on_device
|
||||
false, // are_outputs_on_device
|
||||
false, // input_is_a_state
|
||||
false, // aligned
|
||||
false, // loop_state
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t
|
||||
init_optimized_poseidon_constants(int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* constants);
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param input a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon_hash(
|
||||
S* input, S* output, size_t number_of_states, const PoseidonConstants<S>& constants, const PoseidonConfig& config);
|
||||
} // namespace poseidon
|
||||
|
||||
#endif
|
||||
@@ -1,74 +0,0 @@
|
||||
#pragma once
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace poseidon;
|
||||
|
||||
/**
|
||||
* @namespace merkle
|
||||
* Implementation of the [Poseidon](@ref poseidon) [Merkle tree](https://en.wikipedia.org/wiki/Merkle_tree) builder,
|
||||
* parallelized for the use on GPU
|
||||
*/
|
||||
namespace merkle {
|
||||
static constexpr size_t GIGA = 1024 * 1024 * 1024;
|
||||
|
||||
/// Bytes per stream
|
||||
static constexpr size_t STREAM_CHUNK_SIZE = 1024 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* @struct TreeBuilderConfig
|
||||
* Struct that encodes various Tree builder parameters.
|
||||
*/
|
||||
struct TreeBuilderConfig {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
int keep_rows; /**< How many rows of the Merkle tree rows should be written to output. '0' means all of them */
|
||||
bool are_inputs_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool is_async; /**< Whether to run the tree builder asynchronously. If set to `true`, the build_merkle_tree
|
||||
* function will be non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static TreeBuilderConfig
|
||||
default_merkle_config(const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
{
|
||||
TreeBuilderConfig config = {
|
||||
ctx, // ctx
|
||||
0, // keep_rows
|
||||
false, // are_inputes_on_device
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the Poseidon Merkle tree
|
||||
*
|
||||
* @param leaves a pointer to the leaves layer. May be allocated on device or on host, regulated by the config
|
||||
* Expected to have arity ^ (height - 1) elements
|
||||
* @param digests a pointer to the digests storage. May only be allocated on the host
|
||||
* Expected to have `sum(arity ^ (i)) for i in [0..height-1]`
|
||||
* @param height the height of the merkle tree
|
||||
* # Algorithm
|
||||
* The function will split large tree into many subtrees of size that will fit `STREAM_CHUNK_SIZE`.
|
||||
* Each subtree is build in it's own stream (there is a maximum number of streams)
|
||||
* After all subtrees are constructed - the function will combine the resulting sub-digests into the final top-tree
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t build_merkle_tree(
|
||||
const S* leaves,
|
||||
S* digests,
|
||||
uint32_t height,
|
||||
const PoseidonConstants<S>& poseidon,
|
||||
const TreeBuilderConfig& config);
|
||||
} // namespace merkle
|
||||
|
||||
#endif
|
||||
65
icicle/include/poseidon2/constants.cuh
Normal file
65
icicle/include/poseidon2/constants.cuh
Normal file
@@ -0,0 +1,65 @@
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_CONSTANTS_H
|
||||
#define POSEIDON2_CONSTANTS_H
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon2_constants);
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
1077
icicle/include/poseidon2/constants/m31_poseidon2.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,13 +3,14 @@ from sage.rings.polynomial.polynomial_gf2x import GF2X_BuildIrred_list
|
||||
from math import *
|
||||
import itertools
|
||||
|
||||
CURVE_NAME = "bn254"
|
||||
CURVE_NAME = "m31"
|
||||
|
||||
###########################################################################
|
||||
# p = 18446744069414584321 # GoldiLocks
|
||||
# p = 2013265921 # BabyBear
|
||||
p = 2**31 - 1 # M31
|
||||
# p = 52435875175126190479447740508185965837690552500527637822603658699938581184513 # BLS12-381
|
||||
p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 21888242871839275222246405745257275088548364400416034343698204186575808495617 # BN254/BN256
|
||||
# p = 28948022309329048855892746252171976963363056481941560715954676764349967630337 # Pasta (Pallas)
|
||||
# p = 28948022309329048855892746252171976963363056481941647379679742748393362948097 # Pasta (Vesta)
|
||||
|
||||
@@ -617,6 +618,8 @@ print(f"namespace poseidon2_constants_{CURVE_NAME} {{")
|
||||
for t in TS:
|
||||
NUM_CELLS = t
|
||||
R_F_FIXED, R_P_FIXED, _, _ = poseidon_calc_final_numbers_fixed(p, t, alpha, 128, True)
|
||||
if t == 16:
|
||||
R_P_FIXED = 14
|
||||
|
||||
INIT_SEQUENCE = []
|
||||
|
||||
|
||||
@@ -1,7 +1,28 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#pragma once
|
||||
#ifndef POSEIDON2_KERNELS_H
|
||||
#define POSEIDON2_KERNELS_H
|
||||
|
||||
#include "utils/utils.h"
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace poseidon2 {
|
||||
static DEVICE_INLINE unsigned int d_next_pow_of_two(unsigned int v)
|
||||
{
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
DEVICE_INLINE S sbox_el(S element, const int alpha)
|
||||
{
|
||||
@@ -19,7 +40,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S sbox(S state[T], const int alpha)
|
||||
DEVICE_INLINE void sbox(S state[T], const int alpha)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = sbox_el(state[i], alpha);
|
||||
@@ -27,7 +48,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
DEVICE_INLINE S add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
DEVICE_INLINE void add_rc(S state[T], size_t rc_offset, const S* rc)
|
||||
{
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = state[i] + rc[rc_offset + i];
|
||||
@@ -35,7 +56,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S mds_light_4x4(S s[4])
|
||||
__device__ void mds_light_4x4(S s[4])
|
||||
{
|
||||
S t0 = s[0] + s[1];
|
||||
S t1 = s[2] + s[3];
|
||||
@@ -56,7 +77,7 @@ namespace poseidon2 {
|
||||
// [ 3 1 1 2 ].
|
||||
// https://github.com/Plonky3/Plonky3/blob/main/poseidon2/src/matrix.rs#L36
|
||||
template <typename S>
|
||||
__device__ S mds_light_plonky_4x4(S s[4])
|
||||
__device__ void mds_light_plonky_4x4(S s[4])
|
||||
{
|
||||
S t01 = s[0] + s[1];
|
||||
S t23 = s[2] + s[3];
|
||||
@@ -70,7 +91,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S mds_light(S state[T], MdsType mds)
|
||||
__device__ void mds_light(S state[T], MdsType mds)
|
||||
{
|
||||
S sum;
|
||||
switch (T) {
|
||||
@@ -123,7 +144,7 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ S internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
__device__ void internal_round(S state[T], size_t rc_offset, const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
S element = state[0];
|
||||
element = element + constants.round_constants[rc_offset];
|
||||
@@ -176,17 +197,8 @@ namespace poseidon2 {
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void poseidon2_permutation_kernel(
|
||||
const S* states, S* states_out, size_t number_of_states, const Poseidon2Constants<S> constants)
|
||||
__device__ void permute_state(S state[T], const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
unsigned int rn;
|
||||
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
@@ -213,6 +225,22 @@ namespace poseidon2 {
|
||||
mds_light<S, T>(state, constants.mds_type);
|
||||
rc_offset += T;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void permutation_kernel(
|
||||
const S* states, S* states_out, unsigned int number_of_states, const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S state[T];
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
state[i] = states[idx * T + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < T; i++) {
|
||||
@@ -220,13 +248,120 @@ namespace poseidon2 {
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S, int T>
|
||||
__global__ void get_hash_results(const S* states, size_t number_of_states, int index, S* out)
|
||||
__global__ void hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
uint64_t number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * T + index];
|
||||
S state[T] = {0};
|
||||
UNROLL
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
state[i] = input[idx * input_len + i];
|
||||
}
|
||||
|
||||
permute_state<S, T>(state, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
template <typename S, int T>
|
||||
__device__ void absorb_2d_state(
|
||||
const Matrix<S>* inputs,
|
||||
S state[T],
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
uint64_t row_idx,
|
||||
const Poseidon2Constants<S>& constants)
|
||||
{
|
||||
unsigned int index = 0;
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
const Matrix<S>* input = inputs + i;
|
||||
for (int j = 0; j < input->width; j++) {
|
||||
state[index] = input->values[row_idx * input->width + j];
|
||||
index++;
|
||||
if (index == rate) {
|
||||
permute_state<S, T>(state, constants);
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (index) { permute_state<S, T>(state, constants); }
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void hash_2d_kernel(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int rate,
|
||||
unsigned int output_len,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
uint64_t idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= inputs[0].height) { return; }
|
||||
|
||||
S state[T] = {0};
|
||||
|
||||
absorb_2d_state<S, T>(inputs, state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
UNROLL
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
output[idx * output_len + i] = state[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
__global__ void compress_and_inject_kernel(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int rate,
|
||||
unsigned int digest_elements,
|
||||
const Poseidon2Constants<S> constants)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
uint64_t number_of_rows = d_next_pow_of_two(matrices_to_inject[0].height);
|
||||
if (idx >= number_of_rows) { return; }
|
||||
|
||||
size_t next_layer_len = matrices_to_inject[0].height;
|
||||
S state_to_compress[T] = {S::zero()};
|
||||
|
||||
for (int i = 0; i < digest_elements * 2; i++) {
|
||||
state_to_compress[i] = prev_layer[idx * 2 * digest_elements + i];
|
||||
}
|
||||
permute_state<S, T>(state_to_compress, constants);
|
||||
|
||||
S injected_state[T] = {S::zero()};
|
||||
if (idx < next_layer_len) {
|
||||
absorb_2d_state<S, T>(matrices_to_inject, injected_state, number_of_inputs, rate, idx, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[digest_elements + i] = injected_state[i];
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
injected_state[i] = state_to_compress[i];
|
||||
}
|
||||
}
|
||||
permute_state<S, T>(injected_state, constants);
|
||||
|
||||
for (int i = 0; i < digest_elements; i++) {
|
||||
next_layer[idx * digest_elements + i] = injected_state[i];
|
||||
}
|
||||
}
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -8,124 +8,172 @@
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
|
||||
#include "poseidon2/constants.cuh"
|
||||
#include "poseidon2/kernels.cuh"
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
/**
|
||||
* @namespace poseidon2
|
||||
* Implementation of the [Poseidon2 hash function](https://eprint.iacr.org/2019/458.pdf)
|
||||
* Specifically, the optimized [Filecoin version](https://spec.filecoin.io/algorithms/crypto/poseidon/)
|
||||
*/
|
||||
namespace poseidon2 {
|
||||
/**
|
||||
* For most of the Poseidon2 configurations this is the case
|
||||
*/
|
||||
const int EXTERNAL_ROUNDS_DEFAULT = 8;
|
||||
|
||||
enum DiffusionStrategy {
|
||||
DEFAULT_DIFFUSION,
|
||||
MONTGOMERY,
|
||||
};
|
||||
|
||||
enum MdsType { DEFAULT_MDS, PLONKY };
|
||||
|
||||
enum PoseidonMode {
|
||||
COMPRESSION,
|
||||
PERMUTATION,
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Constants
|
||||
* This constants are enough to define a Poseidon2 instantce
|
||||
* @param round_constants A pointer to round constants allocated on the device
|
||||
* @param mds_matrix A pointer to an mds matrix allocated on the device
|
||||
* @param non_sparse_matrix A pointer to non sparse matrix allocated on the device
|
||||
* @param sparse_matrices A pointer to sparse matrices allocated on the device
|
||||
*/
|
||||
template <typename S>
|
||||
struct Poseidon2Constants {
|
||||
int width;
|
||||
int alpha;
|
||||
int internal_rounds;
|
||||
int external_rounds;
|
||||
S* round_constants = nullptr;
|
||||
S* internal_matrix_diag = nullptr;
|
||||
MdsType mds_type;
|
||||
DiffusionStrategy diffusion;
|
||||
};
|
||||
|
||||
/**
|
||||
* @struct Poseidon2Config
|
||||
* Struct that encodes various Poseidon2 parameters.
|
||||
*/
|
||||
struct Poseidon2Config {
|
||||
device_context::DeviceContext ctx; /**< Details related to the device such as its id and stream id. */
|
||||
bool are_states_on_device; /**< True if inputs are on device and false if they're on host. Default value: false. */
|
||||
bool are_outputs_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
PoseidonMode mode;
|
||||
int output_index;
|
||||
bool
|
||||
is_async; /**< Whether to run the Poseidon2 asynchronously. If set to `true`, the poseidon_hash function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the poseidon_hash
|
||||
* function will block the current CPU thread. */
|
||||
};
|
||||
|
||||
static Poseidon2Config default_poseidon2_config(
|
||||
int t, const device_context::DeviceContext& ctx = device_context::get_default_device_context())
|
||||
class Poseidon2 : public hash::Hasher<S, S>
|
||||
{
|
||||
Poseidon2Config config = {
|
||||
ctx, // ctx
|
||||
false, // are_states_on_device
|
||||
false, // are_outputs_on_device
|
||||
PoseidonMode::COMPRESSION,
|
||||
1, // output_index
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
}
|
||||
static const int POSEIDON_BLOCK_SIZE = 32;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t create_poseidon2_constants(
|
||||
int width,
|
||||
int alpha,
|
||||
int internal_rounds,
|
||||
int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* poseidon_constants);
|
||||
static inline int poseidon_number_of_blocks(size_t number_of_states)
|
||||
{
|
||||
return number_of_states / POSEIDON_BLOCK_SIZE + static_cast<bool>(number_of_states % POSEIDON_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads pre-calculated optimized constants, moves them to the device
|
||||
*/
|
||||
template <typename S>
|
||||
cudaError_t init_poseidon2_constants(
|
||||
int width,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx,
|
||||
Poseidon2Constants<S>* constants);
|
||||
public:
|
||||
const std::size_t device_id;
|
||||
Poseidon2Constants<S> constants;
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_poseidon2_constants(Poseidon2Constants<S>* constants, device_context::DeviceContext& ctx);
|
||||
cudaError_t hash_2d(
|
||||
const Matrix<S>* inputs,
|
||||
S* output,
|
||||
unsigned int number_of_inputs,
|
||||
unsigned int output_len,
|
||||
uint64_t number_of_rows,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_2D_T(width) \
|
||||
case width: \
|
||||
hash_2d_kernel<S, width><<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
inputs, output, number_of_inputs, this->rate, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_2D_T(2)
|
||||
P2_HASH_2D_T(3)
|
||||
P2_HASH_2D_T(4)
|
||||
P2_HASH_2D_T(8)
|
||||
P2_HASH_2D_T(12)
|
||||
P2_HASH_2D_T(16)
|
||||
P2_HASH_2D_T(20)
|
||||
P2_HASH_2D_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonAbsorb2d: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t run_hash_many_kernel(
|
||||
const S* input,
|
||||
S* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_HASH_MANY_T(width) \
|
||||
case width: \
|
||||
hash_many_kernel<S, width><<<poseidon_number_of_blocks(number_of_states), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
input, output, number_of_states, input_len, output_len, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_HASH_MANY_T(2)
|
||||
P2_HASH_MANY_T(3)
|
||||
P2_HASH_MANY_T(4)
|
||||
P2_HASH_MANY_T(8)
|
||||
P2_HASH_MANY_T(12)
|
||||
P2_HASH_MANY_T(16)
|
||||
P2_HASH_MANY_T(20)
|
||||
P2_HASH_MANY_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
cudaError_t compress_and_inject(
|
||||
const Matrix<S>* matrices_to_inject,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
const S* prev_layer,
|
||||
S* next_layer,
|
||||
unsigned int digest_elements,
|
||||
const device_context::DeviceContext& ctx) const override
|
||||
{
|
||||
#define P2_COMPRESS_AND_INJECT_T(width) \
|
||||
case width: \
|
||||
compress_and_inject_kernel<S, width> \
|
||||
<<<poseidon_number_of_blocks(number_of_rows), POSEIDON_BLOCK_SIZE, 0, ctx.stream>>>( \
|
||||
matrices_to_inject, number_of_inputs, prev_layer, next_layer, this->rate, digest_elements, this->constants); \
|
||||
break;
|
||||
|
||||
switch (this->width) {
|
||||
P2_COMPRESS_AND_INJECT_T(2)
|
||||
P2_COMPRESS_AND_INJECT_T(3)
|
||||
P2_COMPRESS_AND_INJECT_T(4)
|
||||
P2_COMPRESS_AND_INJECT_T(8)
|
||||
P2_COMPRESS_AND_INJECT_T(12)
|
||||
P2_COMPRESS_AND_INJECT_T(16)
|
||||
P2_COMPRESS_AND_INJECT_T(20)
|
||||
P2_COMPRESS_AND_INJECT_T(24)
|
||||
default:
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "PoseidonPermutation: #width must be one of [2, 3, 4, 8, 12, 16, 20, 24]");
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
unsigned int alpha,
|
||||
unsigned int internal_rounds,
|
||||
unsigned int external_rounds,
|
||||
const S* round_constants,
|
||||
const S* internal_matrix_diag,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::Hasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(create_poseidon2_constants(
|
||||
width, alpha, internal_rounds, external_rounds, round_constants, internal_matrix_diag, mds_type, diffusion, ctx,
|
||||
&constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
Poseidon2(
|
||||
unsigned int width,
|
||||
unsigned int rate,
|
||||
MdsType mds_type,
|
||||
DiffusionStrategy diffusion,
|
||||
device_context::DeviceContext& ctx)
|
||||
: hash::Hasher<S, S>(width, width, rate, 0), device_id(ctx.device_id)
|
||||
{
|
||||
Poseidon2Constants<S> constants;
|
||||
CHK_STICKY(init_poseidon2_constants(width, mds_type, diffusion, ctx, &constants));
|
||||
this->constants = constants;
|
||||
}
|
||||
|
||||
~Poseidon2()
|
||||
{
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
ctx.device_id = this->device_id;
|
||||
CHK_STICKY(release_poseidon2_constants<S>(&this->constants, ctx));
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Compute the poseidon hash over a sequence of preimages.
|
||||
* Takes {number_of_states * (T-1)} elements of input and computes {number_of_states} hash images
|
||||
* @param T size of the poseidon state, should be equal to {arity + 1}
|
||||
* @param states a pointer to the input data. May be allocated on device or on host, regulated
|
||||
* by the config. May point to a string of preimages or a string of states filled with preimages.
|
||||
* @param output a pointer to the output data. May be allocated on device or on host, regulated
|
||||
* by the config. Must be at least of size [number_of_states](@ref number_of_states)
|
||||
* @param number_of_states number of input blocks of size T-1 (arity)
|
||||
*/
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon2_hash(
|
||||
const S* states,
|
||||
S* output,
|
||||
size_t number_of_states,
|
||||
const Poseidon2Constants<S>& constants,
|
||||
const Poseidon2Config& config);
|
||||
} // namespace poseidon2
|
||||
|
||||
#endif
|
||||
@@ -5,4 +5,15 @@
|
||||
#define CONCAT_DIRECT(a, b) a##_##b
|
||||
#define CONCAT_EXPAND(a, b) CONCAT_DIRECT(a, b) // expand a,b before concatenation
|
||||
|
||||
static unsigned int next_pow_of_two(unsigned int v) {
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
#endif // ICICLE_UTILS_H
|
||||
@@ -105,12 +105,12 @@ namespace vec_ops {
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
template <typename E>
|
||||
cudaError_t transpose_batch(
|
||||
cudaError_t transpose_matrix(
|
||||
const E* mat_in,
|
||||
E* mat_out,
|
||||
uint32_t row_size,
|
||||
uint32_t column_size,
|
||||
device_context::DeviceContext& ctx,
|
||||
const device_context::DeviceContext& ctx,
|
||||
bool on_device,
|
||||
bool is_async);
|
||||
|
||||
|
||||
@@ -20,6 +20,11 @@ extern "C" void CONCAT_EXPAND(CURVE, to_affine)(projective_t* point, affine_t* p
|
||||
*point_out = projective_t::to_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, from_affine)(affine_t* point, projective_t* point_out)
|
||||
{
|
||||
*point_out = projective_t::from_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, generate_projective_points)(projective_t* points, int size)
|
||||
{
|
||||
projective_t::rand_host_many(points, size);
|
||||
|
||||
@@ -20,6 +20,11 @@ extern "C" void CONCAT_EXPAND(CURVE, g2_to_affine)(g2_projective_t* point, g2_af
|
||||
*point_out = g2_projective_t::to_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, g2_from_affine)(g2_affine_t* point, g2_projective_t* point_out)
|
||||
{
|
||||
*point_out = g2_projective_t::from_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, g2_generate_projective_points)(g2_projective_t* points, int size)
|
||||
{
|
||||
g2_projective_t::rand_host_many(points, size);
|
||||
|
||||
@@ -11,6 +11,9 @@ set(SRC ${CMAKE_SOURCE_DIR}/src)
|
||||
|
||||
set(FIELD_SOURCE ${SRC}/fields/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/merkle-tree/extern_mmcs.cu)
|
||||
|
||||
if(EXT_FIELD)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/fields/extern_extension.cu)
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
|
||||
@@ -27,8 +30,6 @@ set(POLYNOMIAL_SOURCE_FILES
|
||||
# TODO: impl poseidon for small fields. note that it needs to be defined over the extension field!
|
||||
if (DEFINED CURVE)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/poseidon.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/tree/merkle.cu)
|
||||
endif()
|
||||
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_POSEIDON2)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
set(TARGET icicle_hash)
|
||||
|
||||
add_library(${TARGET} STATIC keccak/keccak.cu)
|
||||
add_library(${TARGET} STATIC keccak/extern.cu)
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "ingo_hash")
|
||||
2
icicle/src/hash/keccak/.gitignore
vendored
Normal file
2
icicle/src/hash/keccak/.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
test_keccak
|
||||
test_keccak_tree
|
||||
@@ -1,2 +1,10 @@
|
||||
test_keccak_tree: test_tree.cu keccak.cu ../../merkle-tree/merkle.cu
|
||||
nvcc -DMERKLE_DEBUG -o test_keccak_tree -I../../../include test_tree.cu
|
||||
./test_keccak_tree
|
||||
|
||||
test_keccak: test.cu keccak.cu
|
||||
nvcc -o test_keccak -I. -I../.. test.cu
|
||||
nvcc -o test_keccak -I../../../include test.cu
|
||||
./test_keccak
|
||||
|
||||
clear:
|
||||
rm test_keccak test_keccak_tree
|
||||
87
icicle/src/hash/keccak/extern.cu
Normal file
87
icicle/src/hash/keccak/extern.cu
Normal file
@@ -0,0 +1,87 @@
|
||||
#include "utils/utils.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
#include "keccak.cu"
|
||||
#include "../../merkle-tree/merkle.cu"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
namespace keccak {
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
|
||||
{
|
||||
return Keccak256().hash_many(
|
||||
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_256_DIGEST, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
|
||||
{
|
||||
return Keccak512().hash_many(
|
||||
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_512_DIGEST, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
sha3_256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
|
||||
{
|
||||
return Sha3_256().hash_many(
|
||||
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_256_DIGEST, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
sha3_512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, HashConfig& config)
|
||||
{
|
||||
return Sha3_512().hash_many(
|
||||
input, (uint64_t*)output, number_of_blocks, input_block_size, KECCAK_512_DIGEST, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t build_keccak256_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config)
|
||||
{
|
||||
Keccak256 keccak;
|
||||
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
|
||||
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t build_keccak512_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config)
|
||||
{
|
||||
Keccak512 keccak;
|
||||
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
|
||||
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t build_sha3_256_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config)
|
||||
{
|
||||
Sha3_256 keccak;
|
||||
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
|
||||
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t build_sha3_512_merkle_tree_cuda(
|
||||
const uint8_t* leaves,
|
||||
uint64_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const merkle_tree::TreeBuilderConfig& tree_config)
|
||||
{
|
||||
Sha3_512 keccak;
|
||||
return merkle_tree::build_merkle_tree<uint8_t, uint64_t>(
|
||||
leaves, digests, height, input_block_len, keccak, keccak, tree_config);
|
||||
}
|
||||
|
||||
} // namespace keccak
|
||||
@@ -1,6 +1,16 @@
|
||||
#include <cstdint>
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
|
||||
using namespace hash;
|
||||
|
||||
namespace keccak {
|
||||
using u64 = uint64_t;
|
||||
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
|
||||
#define TH_ELT(t, c0, c1, c2, c3, c4, d0, d1, d2, d3, d4) \
|
||||
@@ -144,16 +154,16 @@ namespace keccak {
|
||||
element ^= rc; \
|
||||
}
|
||||
|
||||
__device__ const uint64_t RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
|
||||
__device__ const u64 RC[24] = {0x0000000000000001, 0x0000000000008082, 0x800000000000808a, 0x8000000080008000,
|
||||
0x000000000000808b, 0x0000000080000001, 0x8000000080008081, 0x8000000000008009,
|
||||
0x000000000000008a, 0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
|
||||
0x000000008000808b, 0x800000000000008b, 0x8000000000008089, 0x8000000000008003,
|
||||
0x8000000000008002, 0x8000000000000080, 0x000000000000800a, 0x800000008000000a,
|
||||
0x8000000080008081, 0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
|
||||
|
||||
__device__ void keccakf(uint64_t s[25])
|
||||
__device__ void keccakf(u64 s[KECCAK_STATE_SIZE])
|
||||
{
|
||||
uint64_t t0, t1, t2, t3, t4;
|
||||
u64 t0, t1, t2, t3, t4;
|
||||
|
||||
for (int i = 0; i < 24; i++) {
|
||||
THETA(
|
||||
@@ -169,107 +179,83 @@ namespace keccak {
|
||||
}
|
||||
}
|
||||
|
||||
template <int C, int D>
|
||||
__global__ void keccak_hash_blocks(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output)
|
||||
template <const int R>
|
||||
__global__ void keccak_hash_blocks(
|
||||
const uint8_t* input,
|
||||
int input_block_size,
|
||||
int output_len,
|
||||
int number_of_blocks,
|
||||
uint64_t* output,
|
||||
int padding_const)
|
||||
{
|
||||
int bid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (bid >= number_of_blocks) { return; }
|
||||
int sid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (sid >= number_of_blocks) { return; }
|
||||
|
||||
const int r_bits = 1600 - C;
|
||||
const int r_bytes = r_bits / 8;
|
||||
const int d_bytes = D / 8;
|
||||
|
||||
uint8_t* b_input = input + bid * input_block_size;
|
||||
uint8_t* b_output = output + bid * d_bytes;
|
||||
uint64_t state[25] = {}; // Initialize with zeroes
|
||||
const uint8_t* b_input = input + sid * input_block_size;
|
||||
uint64_t* b_output = output + sid * output_len;
|
||||
uint64_t state[KECCAK_STATE_SIZE] = {}; // Initialize with zeroes
|
||||
|
||||
int input_len = input_block_size;
|
||||
|
||||
// absorb
|
||||
while (input_len >= r_bytes) {
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
while (input_len >= R) {
|
||||
for (int i = 0; i < R; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(b_input + i);
|
||||
}
|
||||
keccakf(state);
|
||||
b_input += r_bytes;
|
||||
input_len -= r_bytes;
|
||||
b_input += R;
|
||||
input_len -= R;
|
||||
}
|
||||
|
||||
// last block (if any)
|
||||
uint8_t last_block[r_bytes];
|
||||
uint8_t last_block[R];
|
||||
for (int i = 0; i < input_len; i++) {
|
||||
last_block[i] = b_input[i];
|
||||
}
|
||||
|
||||
// pad 10*1
|
||||
last_block[input_len] = 1;
|
||||
for (int i = 0; i < r_bytes - input_len - 1; i++) {
|
||||
last_block[input_len] = padding_const;
|
||||
for (int i = 0; i < R - input_len - 1; i++) {
|
||||
last_block[input_len + i + 1] = 0;
|
||||
}
|
||||
// last bit
|
||||
last_block[r_bytes - 1] |= 0x80;
|
||||
last_block[R - 1] |= 0x80;
|
||||
|
||||
// #pragma unroll
|
||||
for (int i = 0; i < r_bytes; i += 8) {
|
||||
for (int i = 0; i < R; i += 8) {
|
||||
state[i / 8] ^= *(uint64_t*)(last_block + i);
|
||||
}
|
||||
keccakf(state);
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < d_bytes; i += 8) {
|
||||
*(uint64_t*)(b_output + i) = state[i / 8];
|
||||
for (int i = 0; i < output_len; i++) {
|
||||
b_output[i] = state[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <int C, int D>
|
||||
cudaError_t
|
||||
keccak_hash(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
cudaError_t Keccak::run_hash_many_kernel(
|
||||
const uint8_t* input,
|
||||
uint64_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_len,
|
||||
unsigned int output_len,
|
||||
const device_context::DeviceContext& ctx) const
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = config.ctx.stream;
|
||||
int number_of_threads = 256;
|
||||
int number_of_gpu_blocks = (number_of_states - 1) / number_of_threads + 1;
|
||||
|
||||
uint8_t* input_device;
|
||||
if (config.are_inputs_on_device) {
|
||||
input_device = input;
|
||||
} else {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&input_device, number_of_blocks * input_block_size, stream));
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(input_device, input, number_of_blocks * input_block_size, cudaMemcpyHostToDevice, stream));
|
||||
switch (rate) {
|
||||
case KECCAK_256_RATE:
|
||||
keccak_hash_blocks<KECCAK_256_RATE><<<number_of_gpu_blocks, number_of_threads, 0, ctx.stream>>>(
|
||||
input, input_len, output_len, number_of_states, output, PADDING_CONST);
|
||||
break;
|
||||
case KECCAK_512_RATE:
|
||||
keccak_hash_blocks<KECCAK_512_RATE><<<number_of_gpu_blocks, number_of_threads, 0, ctx.stream>>>(
|
||||
input, input_len, output_len, number_of_states, output, PADDING_CONST);
|
||||
break;
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "KeccakHash: #rate must be one of [136, 72]");
|
||||
}
|
||||
|
||||
uint8_t* output_device;
|
||||
if (config.are_outputs_on_device) {
|
||||
output_device = output;
|
||||
} else {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&output_device, number_of_blocks * (D / 8), stream));
|
||||
}
|
||||
|
||||
int number_of_threads = 512;
|
||||
int number_of_gpu_blocks = (number_of_blocks - 1) / number_of_threads + 1;
|
||||
keccak_hash_blocks<C, D><<<number_of_gpu_blocks, number_of_threads, 0, stream>>>(
|
||||
input_device, input_block_size, number_of_blocks, output_device);
|
||||
|
||||
if (!config.are_inputs_on_device) CHK_IF_RETURN(cudaFreeAsync(input_device, stream));
|
||||
|
||||
if (!config.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(output, output_device, number_of_blocks * (D / 8), cudaMemcpyDeviceToHost, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(output_device, stream));
|
||||
}
|
||||
|
||||
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
CHK_IF_RETURN(cudaPeekAtLastError());
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak256_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<512, 256>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t
|
||||
keccak512_cuda(uint8_t* input, int input_block_size, int number_of_blocks, uint8_t* output, KeccakConfig& config)
|
||||
{
|
||||
return keccak_hash<1024, 512>(input, input_block_size, number_of_blocks, output, config);
|
||||
}
|
||||
} // namespace keccak
|
||||
@@ -1,5 +1,5 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "keccak.cu"
|
||||
#include "extern.cu"
|
||||
|
||||
// #define DEBUG
|
||||
|
||||
@@ -50,7 +50,7 @@ int main(int argc, char* argv[])
|
||||
uint8_t* out_ptr = static_cast<uint8_t*>(malloc(number_of_blocks * (D / 8)));
|
||||
|
||||
START_TIMER(keccak_timer);
|
||||
KeccakConfig config = default_keccak_config();
|
||||
HashConfig config = default_hash_config();
|
||||
keccak256_cuda(in_ptr, input_block_size, number_of_blocks, out_ptr, config);
|
||||
END_TIMER(keccak_timer, "Keccak")
|
||||
|
||||
|
||||
Binary file not shown.
91
icicle/src/hash/keccak/test_tree.cu
Normal file
91
icicle/src/hash/keccak/test_tree.cu
Normal file
@@ -0,0 +1,91 @@
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "extern.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
using namespace keccak;
|
||||
|
||||
#define D 256
|
||||
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
void uint8_to_hex_string(const uint8_t* values, int size)
|
||||
{
|
||||
std::stringstream ss;
|
||||
|
||||
for (int i = 0; i < size; ++i) {
|
||||
ss << std::hex << std::setw(2) << std::setfill('0') << (int)values[i];
|
||||
}
|
||||
|
||||
std::string hexString = ss.str();
|
||||
std::cout << hexString << std::endl;
|
||||
}
|
||||
|
||||
#define A 2
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
using FpMicroseconds = std::chrono::duration<float, std::chrono::microseconds::period>;
|
||||
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N-1 elements
|
||||
uint32_t input_block_len = 136;
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 10;
|
||||
uint32_t number_of_leaves = pow(A, tree_height);
|
||||
uint32_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 7;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, A, 1);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
uint8_t input = 0;
|
||||
uint8_t* leaves = static_cast<uint8_t*>(malloc(total_number_of_leaves));
|
||||
for (uint64_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = (uint8_t)i;
|
||||
}
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(uint64_t);
|
||||
uint64_t* digests = static_cast<uint64_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
std::cout << "Memory for leaves = " << total_number_of_leaves / 1024 / 1024 << " MB; "
|
||||
<< total_number_of_leaves / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + total_number_of_leaves) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + total_number_of_leaves) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig config = merkle_tree::default_merkle_config();
|
||||
config.arity = A;
|
||||
config.keep_rows = keep_rows;
|
||||
START_TIMER(keccak_timer);
|
||||
build_keccak256_merkle_tree_cuda(leaves, digests, tree_height, input_block_len, config);
|
||||
END_TIMER(keccak_timer, "Keccak")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
uint64_t root = digests[i];
|
||||
std::cout << root << std::endl;
|
||||
// assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
25
icicle/src/merkle-tree/extern.cu
Normal file
25
icicle/src/merkle-tree/extern.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "merkle.cu"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
namespace merkle_tree {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, build_merkle_tree)(
|
||||
const scalar_t* leaves_digests,
|
||||
scalar_t* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const hash::Hasher<scalar_t, scalar_t>* compression,
|
||||
const hash::Hasher<scalar_t, scalar_t>* bottom_layer,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
return build_merkle_tree<scalar_t, scalar_t>(
|
||||
leaves_digests, digests, height, input_block_len, *compression, *bottom_layer, tree_config);
|
||||
}
|
||||
} // namespace merkle_tree
|
||||
26
icicle/src/merkle-tree/extern_mmcs.cu
Normal file
26
icicle/src/merkle-tree/extern_mmcs.cu
Normal file
@@ -0,0 +1,26 @@
|
||||
#include "utils/utils.h"
|
||||
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "mmcs.cu"
|
||||
|
||||
#include "hash/hash.cuh"
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace merkle_tree {
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, mmcs_commit_cuda)(
|
||||
const Matrix<scalar_t>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
scalar_t* digests,
|
||||
const hash::Hasher<scalar_t, scalar_t>* hasher,
|
||||
const hash::Hasher<scalar_t, scalar_t>* compression,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
return mmcs_commit<scalar_t, scalar_t>(leaves, number_of_inputs, digests, *hasher, *compression, tree_config);
|
||||
}
|
||||
} // namespace merkle_tree
|
||||
333
icicle/src/merkle-tree/merkle.cu
Normal file
333
icicle/src/merkle-tree/merkle.cu
Normal file
@@ -0,0 +1,333 @@
|
||||
#include "hash/hash.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
namespace merkle_tree {
|
||||
/// Constructs merkle subtree without parallelization
|
||||
/// The digests are aligned sequentially per row
|
||||
/// Example:
|
||||
///
|
||||
/// Big tree:
|
||||
///
|
||||
/// 1 <- Root
|
||||
/// / \ <- Arity = 2
|
||||
/// 2 3 <- Digests
|
||||
/// / \ / \ <- Height = 2 (as the number of edges)
|
||||
/// 4 5 6 7 <- height^arity leaves
|
||||
/// | | | | <- Bottom layer hash 1 to 1
|
||||
/// a b c d <- Input vector 1x4
|
||||
///
|
||||
/// Subtree 1 Subtree 2
|
||||
/// 2 3
|
||||
/// / \ / \
|
||||
/// 4 5 6 7
|
||||
///
|
||||
/// Digests array for subtree 1:
|
||||
/// [4 5 . . 2 . .]
|
||||
/// | | |
|
||||
/// ----- V
|
||||
/// | Segment (offset = 4, subtree_idx = 0)
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 0)
|
||||
///
|
||||
/// Digests array for subtree 2:
|
||||
/// [. . 6 7 . 3 .]
|
||||
/// | |
|
||||
/// -----
|
||||
/// |
|
||||
/// v
|
||||
/// Segment (offset = 0, subtree_idx = 1)
|
||||
///
|
||||
/// Total digests array:
|
||||
/// [4 5 6 7 2 3 .]
|
||||
///
|
||||
/// Example for custom config:
|
||||
///
|
||||
/// arity = 2
|
||||
/// input_block_len = 2
|
||||
/// digest_elements = 2
|
||||
/// bottom_layer hash width = 4
|
||||
/// compression width = 4
|
||||
/// height = 2
|
||||
///
|
||||
/// [a, b] <- Root of the tree
|
||||
/// | |
|
||||
/// [a, b, c, d]
|
||||
/// / \ / \
|
||||
/// [i, j, m, n]
|
||||
/// ┌──┬──────┴──┴──┴──┴──────┬──┐
|
||||
/// | | | |
|
||||
/// [i, j, k, l] [m, n, o, p] <- compression states
|
||||
/// / \ / \ / \ / \ <- Running permutation
|
||||
/// [1, 2, 5, 6] [9, 1, 4, 5] <- compression states
|
||||
/// ┌──┬───┴──┴──┼──┤ ┌──┬───┴──┴──┼──┤
|
||||
/// | | | | | | | | <- digest_element * height^arity leaves
|
||||
/// [1, 2, 3, 4] [5, 6, 7, 8] [9, 1, 2, 3] [4, 5, 6, 7] <- Permuted states
|
||||
/// / \ / \ / \ / \ / \ / \ / \ / \ <- Running permutation
|
||||
/// [a, b, 0, 0] [c, d, 0, 0] [e, f, 0, 0] [g, h, 0, 0] <- States of the bottom layer hash
|
||||
/// | | | | | | | | <- Bottom layer hash 2 to 2
|
||||
/// a b c d e f g h <- Input vector 2x4
|
||||
///
|
||||
/// Input matrix:
|
||||
/// ┌ ┐
|
||||
/// | a b |
|
||||
/// | c d |
|
||||
/// | e f |
|
||||
/// | g h |
|
||||
/// └ ┘
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_merkle_subtree(
|
||||
const L* leaves,
|
||||
L* d_leaves,
|
||||
D* digests,
|
||||
size_t subtree_idx,
|
||||
size_t subtree_height,
|
||||
D* big_tree_digests,
|
||||
size_t start_segment_size,
|
||||
size_t start_segment_offset,
|
||||
uint64_t keep_rows,
|
||||
uint64_t input_block_len,
|
||||
const Hasher<L, D>& bottom_layer,
|
||||
const Hasher<L, D>& compression,
|
||||
const TreeBuilderConfig& tree_config,
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
uint64_t arity = tree_config.arity;
|
||||
|
||||
HashConfig hash_config = default_hash_config(ctx);
|
||||
hash_config.are_inputs_on_device = true;
|
||||
hash_config.are_outputs_on_device = true;
|
||||
hash_config.is_async = true;
|
||||
|
||||
size_t bottom_layer_states = pow(arity, subtree_height);
|
||||
|
||||
if (!tree_config.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves, leaves, bottom_layer_states * input_block_len * sizeof(L), cudaMemcpyHostToDevice, ctx.stream));
|
||||
}
|
||||
|
||||
bottom_layer.hash_many(
|
||||
tree_config.are_inputs_on_device ? leaves : d_leaves, digests, bottom_layer_states, input_block_len,
|
||||
tree_config.digest_elements, hash_config);
|
||||
|
||||
uint64_t number_of_states = bottom_layer_states / arity;
|
||||
size_t segment_size = start_segment_size;
|
||||
size_t segment_offset = start_segment_offset;
|
||||
|
||||
if (!keep_rows || subtree_height < keep_rows) {
|
||||
D* digests_with_offset = big_tree_digests + segment_offset + subtree_idx * bottom_layer_states;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests, bottom_layer_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, ctx.stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
segment_size /= arity;
|
||||
subtree_height--;
|
||||
|
||||
D* prev_layer = digests;
|
||||
D* next_layer = (D*)d_leaves;
|
||||
|
||||
while (number_of_states > 0) {
|
||||
CHK_IF_RETURN(compression.run_hash_many_kernel(
|
||||
(L*)prev_layer, next_layer, number_of_states,
|
||||
tree_config.digest_elements * tree_config.arity * (sizeof(D) / sizeof(L)), tree_config.digest_elements,
|
||||
hash_config.ctx));
|
||||
|
||||
if (!keep_rows || subtree_height < keep_rows) {
|
||||
D* digests_with_offset =
|
||||
big_tree_digests + segment_offset + subtree_idx * number_of_states * tree_config.digest_elements;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, next_layer, number_of_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, ctx.stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
segment_size /= arity;
|
||||
subtree_height--;
|
||||
number_of_states /= arity;
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_merkle_tree(
|
||||
const L* leaves,
|
||||
D* digests,
|
||||
unsigned int height,
|
||||
unsigned int input_block_len,
|
||||
const Hasher<L, D>& compression,
|
||||
const Hasher<L, D>& bottom_layer,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = tree_config.ctx.stream;
|
||||
|
||||
if (compression.preimage_max_length < tree_config.arity * tree_config.digest_elements)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Hash max preimage length does not match merkle tree arity multiplied by digest elements");
|
||||
|
||||
uint64_t number_of_bottom_layer_states = pow(tree_config.arity, height);
|
||||
|
||||
// This will determine how much splitting do we need to do
|
||||
// `number_of_streams` subtrees should fit in the device
|
||||
// This means each subtree should fit in `STREAM_CHUNK_SIZE` memory
|
||||
uint64_t number_of_subtrees = 1;
|
||||
uint64_t subtree_height = height;
|
||||
uint64_t subtree_bottom_layer_states = number_of_bottom_layer_states;
|
||||
uint64_t subtree_leaves_size = subtree_bottom_layer_states * input_block_len;
|
||||
uint64_t subtree_digests_size = subtree_bottom_layer_states * tree_config.digest_elements;
|
||||
|
||||
size_t subtree_d_leaves_memory = std::max(
|
||||
tree_config.are_inputs_on_device ? 0 : (sizeof(L) * subtree_leaves_size),
|
||||
subtree_digests_size * sizeof(D) / tree_config.arity);
|
||||
size_t subtree_memory_required = sizeof(D) * subtree_digests_size + subtree_d_leaves_memory;
|
||||
while (subtree_memory_required > STREAM_CHUNK_SIZE) {
|
||||
number_of_subtrees *= tree_config.arity;
|
||||
subtree_height--;
|
||||
subtree_bottom_layer_states /= tree_config.arity;
|
||||
subtree_digests_size /= tree_config.arity;
|
||||
subtree_leaves_size /= tree_config.arity;
|
||||
subtree_d_leaves_memory /= tree_config.arity;
|
||||
subtree_memory_required = sizeof(D) * subtree_digests_size + subtree_d_leaves_memory;
|
||||
}
|
||||
int cap_height = height - subtree_height;
|
||||
size_t caps_len = pow(tree_config.arity, cap_height) * tree_config.digest_elements;
|
||||
|
||||
size_t available_memory, _total_memory;
|
||||
CHK_IF_RETURN(cudaMemGetInfo(&available_memory, &_total_memory));
|
||||
available_memory -= GIGA / 8; // Leave 128 MB just in case
|
||||
|
||||
// We can effectively parallelize memory copy with streams
|
||||
// as long as they don't operate on more than `STREAM_CHUNK_SIZE` bytes
|
||||
const size_t number_of_streams = std::min((uint64_t)(available_memory / STREAM_CHUNK_SIZE), number_of_subtrees);
|
||||
cudaStream_t* streams = static_cast<cudaStream_t*>(malloc(sizeof(cudaStream_t) * number_of_streams));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
bool caps_mode = tree_config.keep_rows && tree_config.keep_rows <= cap_height;
|
||||
D* caps;
|
||||
if (caps_mode) { caps = static_cast<D*>(malloc(caps_len * sizeof(D))); }
|
||||
|
||||
#ifdef MERKLE_DEBUG
|
||||
std::cout << "Available memory = " << available_memory / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Number of streams = " << number_of_streams << std::endl;
|
||||
std::cout << "Number of subtrees = " << number_of_subtrees << std::endl;
|
||||
std::cout << "Height of a subtree = " << subtree_height << std::endl;
|
||||
std::cout << "Cutoff height = " << height - subtree_height << std::endl;
|
||||
std::cout << "Number of leaves in a subtree = " << subtree_bottom_layer_states << std::endl;
|
||||
std::cout << "Digest elements for a subtree = " << subtree_digests_size << std::endl;
|
||||
std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Cap height = " << cap_height << std::endl;
|
||||
std::cout << "Enabling caps mode? " << caps_mode << std::endl;
|
||||
std::cout << "Allocated " << subtree_d_leaves_memory << " bytes for d_leaves" << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate memory for the leaves and digests
|
||||
// These are shared by streams in a pool
|
||||
L* d_leaves_ptr;
|
||||
D* digests_ptr;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_ptr, subtree_d_leaves_memory * number_of_streams, stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&digests_ptr, subtree_digests_size * number_of_streams * sizeof(D), stream));
|
||||
// Wait for these allocations to finish
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
|
||||
// Build subtrees in parallel. This for loop invokes kernels that can run in a pool of size `number_of_streams`
|
||||
for (size_t subtree_idx = 0; subtree_idx < number_of_subtrees; subtree_idx++) {
|
||||
size_t stream_idx = subtree_idx % number_of_streams;
|
||||
cudaStream_t subtree_stream = streams[stream_idx];
|
||||
|
||||
const L* subtree_leaves = leaves + subtree_idx * subtree_bottom_layer_states * input_block_len;
|
||||
L* subtree_d_leaves = (L*)((unsigned char*)d_leaves_ptr + stream_idx * subtree_d_leaves_memory);
|
||||
D* subtree_digests = digests_ptr + stream_idx * subtree_digests_size;
|
||||
|
||||
int subtree_keep_rows = 0;
|
||||
if (tree_config.keep_rows) {
|
||||
int diff = tree_config.keep_rows - cap_height;
|
||||
subtree_keep_rows = std::max(1, diff);
|
||||
}
|
||||
device_context::DeviceContext subtree_context{subtree_stream, tree_config.ctx.device_id, tree_config.ctx.mempool};
|
||||
|
||||
uint64_t start_segment_size = number_of_bottom_layer_states * tree_config.digest_elements;
|
||||
cudaError_t subtree_result = build_merkle_subtree<L, D>(
|
||||
subtree_leaves, // leaves
|
||||
subtree_d_leaves, // d_leves
|
||||
subtree_digests, // digests
|
||||
subtree_idx, // subtree_idx
|
||||
subtree_height, // subtree_height
|
||||
caps_mode ? caps : digests, // big_tree_digests
|
||||
start_segment_size, // start_segment_size
|
||||
0, // start_segment_offset
|
||||
subtree_keep_rows, // keep_rows
|
||||
input_block_len, // input_block_len
|
||||
bottom_layer, // bottom_layer
|
||||
compression, // compression
|
||||
tree_config, // tree_config
|
||||
subtree_context // subtree_context
|
||||
);
|
||||
CHK_IF_RETURN(subtree_result);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
|
||||
// Finish the top-level tree if any
|
||||
if (cap_height > 0) {
|
||||
size_t start_segment_size = caps_len / tree_config.arity;
|
||||
size_t start_segment_offset = 0;
|
||||
if (!caps_mode) { // Calculate offset
|
||||
size_t keep_rows = tree_config.keep_rows ? tree_config.keep_rows : height + 1;
|
||||
size_t layer_size = pow(tree_config.arity, keep_rows - 1) * tree_config.digest_elements;
|
||||
for (int i = 0; i < keep_rows - cap_height; i++) {
|
||||
start_segment_offset += layer_size;
|
||||
layer_size /= tree_config.arity;
|
||||
}
|
||||
}
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D),
|
||||
(caps_mode || !tree_config.are_outputs_on_device) ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream));
|
||||
|
||||
uint64_t number_of_states = caps_len / tree_config.arity / tree_config.digest_elements;
|
||||
|
||||
D* prev_layer = (D*)d_leaves_ptr;
|
||||
D* next_layer = digests_ptr;
|
||||
|
||||
size_t segment_size = start_segment_size;
|
||||
size_t segment_offset = start_segment_offset;
|
||||
while (number_of_states > 0) {
|
||||
CHK_IF_RETURN(compression.run_hash_many_kernel(
|
||||
(L*)prev_layer, next_layer, number_of_states,
|
||||
tree_config.digest_elements * tree_config.arity * (sizeof(D) / sizeof(L)), tree_config.digest_elements,
|
||||
tree_config.ctx));
|
||||
if (!tree_config.keep_rows || cap_height < tree_config.keep_rows + (int)caps_mode) {
|
||||
D* digests_with_offset = digests + segment_offset;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, next_layer, number_of_states * tree_config.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, stream));
|
||||
segment_offset += segment_size;
|
||||
}
|
||||
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
|
||||
segment_size /= tree_config.arity;
|
||||
cap_height--;
|
||||
number_of_states /= tree_config.arity;
|
||||
}
|
||||
if (caps_mode) { free(caps); }
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(d_leaves_ptr, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(digests_ptr, stream));
|
||||
if (!tree_config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
CHK_IF_RETURN(cudaStreamDestroy(streams[i]));
|
||||
}
|
||||
free(streams);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
} // namespace merkle_tree
|
||||
456
icicle/src/merkle-tree/mmcs.cu
Normal file
456
icicle/src/merkle-tree/mmcs.cu
Normal file
@@ -0,0 +1,456 @@
|
||||
#include "hash/hash.cuh"
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
#include "matrix/matrix.cuh"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
using matrix::Matrix;
|
||||
|
||||
namespace merkle_tree {
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t hash_leaves(
|
||||
const Matrix<L>* leaves,
|
||||
unsigned int number_of_inputs,
|
||||
uint64_t number_of_rows,
|
||||
D* digests,
|
||||
unsigned int digest_elements,
|
||||
const Hasher<L, D>& hasher,
|
||||
const device_context::DeviceContext& ctx)
|
||||
{
|
||||
HashConfig sponge_config = default_hash_config(ctx);
|
||||
sponge_config.are_inputs_on_device = true;
|
||||
sponge_config.are_outputs_on_device = true;
|
||||
sponge_config.is_async = true;
|
||||
|
||||
uint64_t number_of_rows_padded = next_pow_of_two(number_of_rows);
|
||||
|
||||
CHK_IF_RETURN(hasher.hash_2d(leaves, digests, number_of_inputs, digest_elements, number_of_rows, ctx));
|
||||
|
||||
if (number_of_rows_padded - number_of_rows) {
|
||||
// Pad with default digests
|
||||
cudaMemsetAsync(
|
||||
(void*)(digests + number_of_rows), 0, (number_of_rows_padded - number_of_rows) * digest_elements * sizeof(D),
|
||||
ctx.stream);
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
struct SubtreeParams {
|
||||
unsigned int number_of_inputs; // Number of input matrices
|
||||
unsigned int arity; // Arity of the tree
|
||||
unsigned int digest_elements; // Number of output elements per hash
|
||||
size_t number_of_rows; // Current number of input rows to operate on
|
||||
size_t number_of_rows_padded; // next power of arity for number_of_rows
|
||||
size_t subtree_idx; // The subtree id
|
||||
size_t number_of_subtrees; // Total number of subtrees
|
||||
uint64_t subtree_height; // Height of one subtree
|
||||
|
||||
/// One segment corresponds to one layer of output digests
|
||||
size_t segment_size; // The size of current segment.
|
||||
size_t segment_offset; // An offset for the current segment
|
||||
unsigned int leaves_offset; // An offset in the sorted list of input matrices
|
||||
unsigned int number_of_leaves_to_inject; // Number of leaves to inject in current level
|
||||
unsigned int keep_rows; // Number of rows to keep
|
||||
bool are_inputs_on_device;
|
||||
bool caps_mode;
|
||||
const Hasher<L, D>* hasher = nullptr;
|
||||
const Hasher<L, D>* compression = nullptr;
|
||||
const device_context::DeviceContext* ctx = nullptr;
|
||||
};
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t slice_and_copy_leaves(
|
||||
const std::vector<Matrix<L>>& leaves, L* d_leaves, Matrix<L>* d_leaves_info, SubtreeParams<L, D>& params)
|
||||
{
|
||||
uint64_t target_height = params.number_of_rows_padded * params.number_of_subtrees;
|
||||
params.number_of_leaves_to_inject = 0;
|
||||
while (params.leaves_offset < params.number_of_inputs &&
|
||||
next_pow_of_two(leaves[params.leaves_offset].height) >= target_height) {
|
||||
if (next_pow_of_two(leaves[params.leaves_offset].height) == target_height) params.number_of_leaves_to_inject++;
|
||||
params.leaves_offset++;
|
||||
}
|
||||
|
||||
if (params.number_of_leaves_to_inject) {
|
||||
size_t rows_offset = params.subtree_idx * params.number_of_rows_padded;
|
||||
size_t actual_layer_rows = leaves[params.leaves_offset - params.number_of_leaves_to_inject].height;
|
||||
params.number_of_rows = std::min(actual_layer_rows - rows_offset, params.number_of_rows_padded);
|
||||
|
||||
Matrix<L>* leaves_info = static_cast<Matrix<L>*>(malloc(params.number_of_leaves_to_inject * sizeof(Matrix<L>)));
|
||||
L* d_leaves_ptr = d_leaves;
|
||||
for (auto i = 0; i < params.number_of_leaves_to_inject; i++) {
|
||||
Matrix<L> leaf = leaves[params.leaves_offset - params.number_of_leaves_to_inject + i];
|
||||
if (!params.are_inputs_on_device) {
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_ptr, leaf.values + rows_offset * leaf.width, params.number_of_rows * leaf.width * sizeof(L),
|
||||
cudaMemcpyHostToDevice, params.ctx->stream));
|
||||
} else {
|
||||
d_leaves_ptr = leaf.values + rows_offset * leaf.width;
|
||||
}
|
||||
|
||||
leaves_info[i] = {d_leaves_ptr, leaf.width, params.number_of_rows};
|
||||
d_leaves_ptr += params.number_of_rows * leaf.width;
|
||||
}
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_leaves_info, leaves_info, params.number_of_leaves_to_inject * sizeof(Matrix<L>), cudaMemcpyHostToDevice,
|
||||
params.ctx->stream));
|
||||
free(leaves_info);
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
/// Checks if the current row needs to be copied out to the resulting digests array
|
||||
/// Computes the needed offsets using segments model
|
||||
template <typename L, typename D>
|
||||
cudaError_t maybe_copy_digests(D* digests, L* big_tree_digests, SubtreeParams<L, D>& params)
|
||||
{
|
||||
if (!params.keep_rows || params.subtree_height < params.keep_rows + (int)params.caps_mode) {
|
||||
D* digests_with_offset = big_tree_digests + params.segment_offset +
|
||||
params.subtree_idx * params.number_of_rows_padded * params.digest_elements;
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
digests_with_offset, digests, params.number_of_rows_padded * params.digest_elements * sizeof(D),
|
||||
cudaMemcpyDeviceToHost, params.ctx->stream));
|
||||
params.segment_offset += params.segment_size;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t fold_layer(
|
||||
const std::vector<Matrix<L>>& leaves,
|
||||
D* prev_layer,
|
||||
D* next_layer,
|
||||
L* aux_leaves_mem,
|
||||
Matrix<L>* d_leaves_info,
|
||||
SubtreeParams<L, D>& params)
|
||||
{
|
||||
CHK_IF_RETURN(slice_and_copy_leaves<L>(leaves, aux_leaves_mem, d_leaves_info, params));
|
||||
|
||||
if (params.number_of_leaves_to_inject) {
|
||||
CHK_IF_RETURN(params.compression->compress_and_inject(
|
||||
d_leaves_info, params.number_of_leaves_to_inject, params.number_of_rows, prev_layer, next_layer,
|
||||
params.digest_elements, *params.ctx));
|
||||
} else {
|
||||
CHK_IF_RETURN(params.compression->run_hash_many_kernel(
|
||||
prev_layer, next_layer, params.number_of_rows_padded, params.compression->width, params.digest_elements,
|
||||
*params.ctx));
|
||||
}
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t build_mmcs_subtree(
|
||||
const std::vector<Matrix<L>>& leaves,
|
||||
L* d_leaves,
|
||||
D* states,
|
||||
L* aux_leaves_mem,
|
||||
L* big_tree_digests,
|
||||
SubtreeParams<L, D>& params)
|
||||
{
|
||||
// Leaves info
|
||||
Matrix<L>* d_leaves_info;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_info, params.number_of_inputs * sizeof(Matrix<L>), params.ctx->stream));
|
||||
|
||||
CHK_IF_RETURN(slice_and_copy_leaves(leaves, d_leaves, d_leaves_info, params));
|
||||
|
||||
// Reuse leaves memory
|
||||
D* digests = (D*)d_leaves;
|
||||
|
||||
CHK_IF_RETURN(hash_leaves(
|
||||
d_leaves_info, params.number_of_leaves_to_inject, params.number_of_rows, states, params.digest_elements,
|
||||
*params.hasher, *params.ctx));
|
||||
|
||||
CHK_IF_RETURN(maybe_copy_digests(digests, big_tree_digests, params));
|
||||
|
||||
params.number_of_rows_padded /= params.arity;
|
||||
params.segment_size /= params.arity;
|
||||
params.subtree_height--;
|
||||
|
||||
D* prev_layer = states;
|
||||
D* next_layer = digests;
|
||||
while (params.number_of_rows_padded > 0) {
|
||||
CHK_IF_RETURN(fold_layer(leaves, prev_layer, next_layer, aux_leaves_mem, d_leaves_info, params));
|
||||
CHK_IF_RETURN(maybe_copy_digests(next_layer, big_tree_digests, params));
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
params.segment_size /= params.arity;
|
||||
params.subtree_height--;
|
||||
params.number_of_rows_padded /= params.arity;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename L, typename D>
|
||||
cudaError_t mmcs_commit(
|
||||
const Matrix<L>* inputs,
|
||||
const unsigned int number_of_inputs,
|
||||
D* digests,
|
||||
const Hasher<L, D>& hasher,
|
||||
const Hasher<L, D>& compression,
|
||||
const TreeBuilderConfig& tree_config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = tree_config.ctx.stream;
|
||||
|
||||
if (number_of_inputs == 0) THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "No matrices provided");
|
||||
|
||||
if (compression.preimage_max_length < tree_config.arity * tree_config.digest_elements)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Hash max preimage length does not match merkle tree arity multiplied by digest elements");
|
||||
|
||||
std::vector<Matrix<L>> sorted_inputs(number_of_inputs);
|
||||
std::partial_sort_copy(
|
||||
inputs, inputs + number_of_inputs, sorted_inputs.begin(), sorted_inputs.end(),
|
||||
[](const Matrix<L>& left, const Matrix<L>& right) { return left.height > right.height; });
|
||||
|
||||
// Check that the height of any two given matrices either rounds up
|
||||
// to the same next power of two or otherwise equal
|
||||
for (unsigned int i = 0; i < number_of_inputs - 1; i++) {
|
||||
unsigned int left = sorted_inputs[i].height;
|
||||
unsigned int right = sorted_inputs[i + 1].height;
|
||||
|
||||
if (next_pow_of_two(left) == next_pow_of_two(right) && left != right)
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument, "Matrix heights that round up to the same power of two must be equal");
|
||||
}
|
||||
|
||||
uint64_t max_height = sorted_inputs[0].height;
|
||||
|
||||
// Calculate maximum additional memory needed for injected matrices
|
||||
uint64_t max_aux_total_elements = 0;
|
||||
uint64_t current_aux_total_elements = 0;
|
||||
uint64_t current_height = 0;
|
||||
uint64_t bottom_layer_leaves_elements = 0;
|
||||
if (!tree_config.are_inputs_on_device) {
|
||||
for (auto it = sorted_inputs.begin(); it < sorted_inputs.end(); it++) {
|
||||
if (it->height == max_height) {
|
||||
bottom_layer_leaves_elements += it->height * it->width;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (it->height != current_height) {
|
||||
current_height = it->height;
|
||||
current_aux_total_elements = 0;
|
||||
}
|
||||
|
||||
current_aux_total_elements += it->width * it->height;
|
||||
if (current_aux_total_elements > max_aux_total_elements) {
|
||||
max_aux_total_elements = current_aux_total_elements;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t number_of_bottom_layer_rows = next_pow_of_two(max_height);
|
||||
size_t leaves_info_memory = number_of_inputs * sizeof(Matrix<L>);
|
||||
|
||||
unsigned int tree_height = get_height(number_of_bottom_layer_rows);
|
||||
|
||||
// This will determine how much splitting do we need to do
|
||||
// `number_of_streams` subtrees should fit in the device
|
||||
// This means each subtree should fit in `STREAM_CHUNK_SIZE` memory
|
||||
uint64_t number_of_subtrees = 1;
|
||||
uint64_t subtree_height = tree_height;
|
||||
uint64_t subtree_bottom_layer_rows = number_of_bottom_layer_rows;
|
||||
uint64_t subtree_states_size = subtree_bottom_layer_rows * hasher.width;
|
||||
uint64_t subtree_digests_size = subtree_bottom_layer_rows * tree_config.digest_elements;
|
||||
uint64_t subtree_leaves_elements = 0;
|
||||
for (int i = 0; i < number_of_inputs && sorted_inputs[i].height == max_height; i++) {
|
||||
subtree_leaves_elements += sorted_inputs[i].width * sorted_inputs[i].height;
|
||||
}
|
||||
uint64_t subtree_aux_elements = max_aux_total_elements;
|
||||
|
||||
size_t subtree_leaves_memory = std::max(subtree_leaves_elements * sizeof(L), subtree_digests_size * sizeof(D));
|
||||
size_t subtree_memory_required =
|
||||
sizeof(D) * subtree_states_size + subtree_leaves_memory + subtree_aux_elements * sizeof(L) + leaves_info_memory;
|
||||
while (subtree_memory_required > STREAM_CHUNK_SIZE) {
|
||||
number_of_subtrees *= tree_config.arity;
|
||||
subtree_height--;
|
||||
subtree_bottom_layer_rows /= tree_config.arity;
|
||||
subtree_states_size /= tree_config.arity;
|
||||
subtree_digests_size /= tree_config.arity;
|
||||
subtree_leaves_elements /= tree_config.arity;
|
||||
subtree_aux_elements /= tree_config.arity;
|
||||
subtree_leaves_memory = std::max(subtree_leaves_elements * sizeof(L), subtree_digests_size * sizeof(D));
|
||||
subtree_memory_required =
|
||||
sizeof(D) * subtree_states_size + subtree_leaves_memory + subtree_aux_elements * sizeof(L) + leaves_info_memory;
|
||||
}
|
||||
unsigned int cap_height = tree_height - subtree_height;
|
||||
size_t caps_len = pow(tree_config.arity, cap_height) * tree_config.digest_elements;
|
||||
|
||||
size_t available_memory, _total_memory;
|
||||
CHK_IF_RETURN(cudaMemGetInfo(&available_memory, &_total_memory));
|
||||
if (available_memory < (GIGA / 8 + STREAM_CHUNK_SIZE)) {
|
||||
THROW_ICICLE_ERR(
|
||||
IcicleError_t::InvalidArgument,
|
||||
"Not enough GPU memory to build a tree. At least 1.125 GB of GPU memory required");
|
||||
}
|
||||
available_memory -= GIGA / 8; // Leave 128 MB just in case
|
||||
|
||||
// We can effectively parallelize memory copy with streams
|
||||
// as long as they don't operate on more than `STREAM_CHUNK_SIZE` bytes
|
||||
const size_t number_of_streams = std::min((uint64_t)(available_memory / STREAM_CHUNK_SIZE), number_of_subtrees);
|
||||
std::vector<cudaStream_t> streams(number_of_streams);
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamCreate(&streams[i]));
|
||||
}
|
||||
|
||||
// If keep_rows is smaller then the remaining top-tree height
|
||||
// we need to allocate additional memory to store the roots
|
||||
// of subtrees, in order to proceed from there
|
||||
bool caps_mode = tree_config.keep_rows && tree_config.keep_rows <= cap_height;
|
||||
D* caps;
|
||||
if (caps_mode) { caps = static_cast<D*>(malloc(caps_len * sizeof(D))); }
|
||||
|
||||
#ifdef MERKLE_DEBUG
|
||||
std::cout << "MMCS DEBUG" << std::endl;
|
||||
std::cout << "====================================" << std::endl;
|
||||
std::cout << "Available memory = " << available_memory / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Number of streams = " << number_of_streams << std::endl;
|
||||
std::cout << "Number of subtrees = " << number_of_subtrees << std::endl;
|
||||
std::cout << "Height of a subtree = " << subtree_height << std::endl;
|
||||
std::cout << "Cutoff height = " << tree_height - subtree_height << std::endl;
|
||||
std::cout << "Number of leaves in a subtree = " << subtree_bottom_layer_rows << std::endl;
|
||||
std::cout << "State of a subtree = " << subtree_states_size << std::endl;
|
||||
std::cout << "Digest elements for a subtree = " << subtree_digests_size << std::endl;
|
||||
std::cout << "Size of 1 subtree states = " << subtree_states_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Size of 1 subtree digests = " << subtree_digests_size * sizeof(D) / 1024 / 1024 << " MB" << std::endl;
|
||||
std::cout << "Cap height = " << cap_height << std::endl;
|
||||
std::cout << "Enabling caps mode? " << caps_mode << std::endl;
|
||||
|
||||
std::cout << "Allocating " << subtree_states_size * number_of_streams << " elements for states" << std::endl;
|
||||
std::cout << "Allocating " << subtree_leaves_memory * number_of_streams << " bytes for leaves" << std::endl;
|
||||
std::cout << "Allocating " << subtree_aux_elements * number_of_streams << " elements for aux leaves" << std::endl;
|
||||
std::cout << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate memory for the states, injected leaves (aux) and digests
|
||||
// These are shared by streams in a pool
|
||||
D* states_ptr;
|
||||
L *aux_ptr, *leaves_ptr;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states_ptr, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMemsetAsync(states_ptr, 0, subtree_states_size * number_of_streams * sizeof(D), stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&leaves_ptr, subtree_leaves_memory * number_of_streams, stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&aux_ptr, subtree_aux_elements * number_of_streams * sizeof(L), stream));
|
||||
// Wait for these allocations to finish
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
|
||||
// Build subtrees in parallel. This for loop invokes kernels that can run in a pool of size `number_of_streams`
|
||||
for (size_t subtree_idx = 0; subtree_idx < number_of_subtrees; subtree_idx++) {
|
||||
size_t stream_idx = subtree_idx % number_of_streams;
|
||||
cudaStream_t subtree_stream = streams[stream_idx];
|
||||
|
||||
D* subtree_state = states_ptr + stream_idx * subtree_states_size;
|
||||
L* subtree_leaves = (L*)((unsigned char*)leaves_ptr + stream_idx * subtree_leaves_memory);
|
||||
L* subtree_aux = aux_ptr + stream_idx * subtree_aux_elements;
|
||||
|
||||
unsigned int subtree_keep_rows = 0;
|
||||
if (tree_config.keep_rows) {
|
||||
int diff = tree_config.keep_rows - cap_height;
|
||||
subtree_keep_rows = std::max(1, diff);
|
||||
}
|
||||
device_context::DeviceContext subtree_context{subtree_stream, tree_config.ctx.device_id, tree_config.ctx.mempool};
|
||||
|
||||
SubtreeParams<L, D> params = {};
|
||||
|
||||
params.number_of_inputs = number_of_inputs;
|
||||
params.arity = tree_config.arity;
|
||||
params.digest_elements = tree_config.digest_elements;
|
||||
params.number_of_rows = subtree_bottom_layer_rows;
|
||||
params.number_of_rows_padded = subtree_bottom_layer_rows;
|
||||
|
||||
params.subtree_idx = subtree_idx;
|
||||
params.subtree_height = subtree_height;
|
||||
params.number_of_subtrees = number_of_subtrees;
|
||||
|
||||
params.segment_size = number_of_bottom_layer_rows * tree_config.digest_elements;
|
||||
params.keep_rows = subtree_keep_rows;
|
||||
params.are_inputs_on_device = tree_config.are_inputs_on_device;
|
||||
params.hasher = &hasher;
|
||||
params.compression = &compression;
|
||||
params.ctx = &subtree_context;
|
||||
|
||||
cudaError_t subtree_result = build_mmcs_subtree<L, D>(
|
||||
sorted_inputs,
|
||||
subtree_leaves, // d_leaves
|
||||
subtree_state, // states
|
||||
subtree_aux, // aux_leaves_mem
|
||||
caps_mode ? caps : digests, // big_tree_digests
|
||||
params // params
|
||||
);
|
||||
CHK_IF_RETURN(subtree_result);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(streams[i]));
|
||||
}
|
||||
|
||||
// Finish the top-level tree if any
|
||||
if (cap_height > 0) {
|
||||
D* digests_ptr = (D*)leaves_ptr;
|
||||
size_t start_segment_size = caps_len / tree_config.arity;
|
||||
size_t start_segment_offset = 0;
|
||||
if (!caps_mode) { // Calculate offset
|
||||
size_t keep_rows = tree_config.keep_rows ? tree_config.keep_rows : tree_height + 1;
|
||||
size_t layer_size = pow(tree_config.arity, keep_rows - 1) * tree_config.digest_elements;
|
||||
for (int i = 0; i < keep_rows - cap_height; i++) {
|
||||
start_segment_offset += layer_size;
|
||||
layer_size /= tree_config.arity;
|
||||
}
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
states_ptr, caps_mode ? caps : (digests + start_segment_offset - caps_len), caps_len * sizeof(D),
|
||||
(caps_mode || !tree_config.are_outputs_on_device) ? cudaMemcpyHostToDevice : cudaMemcpyDeviceToDevice, stream));
|
||||
|
||||
uint64_t number_of_states = caps_len / tree_config.arity / tree_config.digest_elements;
|
||||
Matrix<L>* d_leaves_info;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_leaves_info, number_of_inputs * sizeof(Matrix<L>), tree_config.ctx.stream));
|
||||
|
||||
SubtreeParams<L, D> top_params = {};
|
||||
|
||||
top_params.number_of_inputs = number_of_inputs;
|
||||
top_params.arity = tree_config.arity;
|
||||
top_params.digest_elements = tree_config.digest_elements;
|
||||
top_params.number_of_rows = number_of_states;
|
||||
top_params.number_of_rows_padded = number_of_states;
|
||||
|
||||
top_params.subtree_height = cap_height;
|
||||
top_params.number_of_subtrees = 1;
|
||||
|
||||
top_params.segment_offset = start_segment_offset;
|
||||
top_params.segment_size = start_segment_size;
|
||||
top_params.keep_rows = tree_config.keep_rows;
|
||||
top_params.are_inputs_on_device = tree_config.are_inputs_on_device;
|
||||
top_params.caps_mode = caps_mode;
|
||||
top_params.hasher = &hasher;
|
||||
top_params.compression = &compression;
|
||||
top_params.ctx = &tree_config.ctx;
|
||||
|
||||
D* prev_layer = states_ptr;
|
||||
D* next_layer = digests_ptr;
|
||||
while (top_params.number_of_rows_padded > 0) {
|
||||
CHK_IF_RETURN(fold_layer(sorted_inputs, prev_layer, next_layer, aux_ptr, d_leaves_info, top_params));
|
||||
CHK_IF_RETURN(maybe_copy_digests(next_layer, digests, top_params));
|
||||
swap<D>(&prev_layer, &next_layer);
|
||||
top_params.segment_size /= top_params.arity;
|
||||
top_params.subtree_height--;
|
||||
top_params.number_of_rows_padded /= top_params.arity;
|
||||
}
|
||||
}
|
||||
|
||||
if (caps_mode) { free(caps); }
|
||||
CHK_IF_RETURN(cudaFreeAsync(states_ptr, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(leaves_ptr, stream));
|
||||
for (size_t i = 0; i < number_of_streams; i++) {
|
||||
CHK_IF_RETURN(cudaStreamDestroy(streams[i]));
|
||||
}
|
||||
if (!tree_config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
} // namespace merkle_tree
|
||||
7
icicle/src/merkle-tree/tests/merkle/.gitignore
vendored
Normal file
7
icicle/src/merkle-tree/tests/merkle/.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
merkle.o
|
||||
poseidon2.o
|
||||
test_merkle_poseidon2
|
||||
merkle_bls.o
|
||||
poseidon.o
|
||||
test_merkle_poseidon
|
||||
test_merkle
|
||||
22
icicle/src/merkle-tree/tests/merkle/Makefile
Normal file
22
icicle/src/merkle-tree/tests/merkle/Makefile
Normal file
@@ -0,0 +1,22 @@
|
||||
test_merkle_poseidon: poseidon.o merkle_bls.o
|
||||
nvcc -o test_merkle_poseidon -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -DMERKLE_DEBUG poseidon.o merkle_bls.o test.cu
|
||||
./test_merkle_poseidon
|
||||
|
||||
merkle_bls.o: ../../extern.cu ../../merkle.cu
|
||||
nvcc -o merkle_bls.o -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -DMERKLE_DEBUG -c ../../extern.cu
|
||||
|
||||
poseidon.o: ../../../poseidon/extern.cu
|
||||
nvcc -o poseidon.o -I../../../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE=bls12_381 -c ../../../poseidon/extern.cu
|
||||
|
||||
test_merkle: test_poseidon2.cu poseidon2.o merkle.o
|
||||
nvcc -o test_merkle -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG poseidon2.o merkle.o test_poseidon2.cu
|
||||
./test_merkle
|
||||
|
||||
merkle.o: ../../extern.cu ../../merkle.cu
|
||||
nvcc -o merkle.o -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG -c ../../extern.cu
|
||||
|
||||
poseidon2.o: ../../../poseidon2/extern.cu
|
||||
nvcc -o poseidon2.o -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -c ../../../poseidon2/extern.cu
|
||||
|
||||
clear:
|
||||
rm -f poseidon2.o merkle.o test_merkle merkle_bls.o poseidon.o test_merkle
|
||||
@@ -1,10 +1,3 @@
|
||||
// #define DEBUG
|
||||
#define MERKLE_DEBUG
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "../poseidon.cu"
|
||||
#include "merkle.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
@@ -12,15 +5,19 @@
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
using namespace poseidon;
|
||||
using namespace merkle;
|
||||
using namespace curve_config;
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define DEBUG
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include "api/bls12_381.h"
|
||||
using namespace bls12_381;
|
||||
|
||||
// Arity
|
||||
#define A 2
|
||||
#define T (A + 1)
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
@@ -30,24 +27,24 @@ int main(int argc, char* argv[])
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
init_optimized_poseidon_constants<scalar_t>(A, ctx, &constants);
|
||||
poseidon::Poseidon<scalar_t> poseidon(A, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N-1 elements
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 28;
|
||||
uint32_t number_of_leaves = pow(A, (tree_height - 1));
|
||||
uint32_t tree_height = argc > 1 ? atoi(argv[1]) : 26;
|
||||
uint32_t number_of_leaves = pow(A, tree_height);
|
||||
uint32_t total_number_of_leaves = number_of_leaves * A;
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 7;
|
||||
size_t digests_len = get_digests_len<scalar_t>(keep_rows + 1, A);
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, A, 1);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^{tree_height - 1} - 1]
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
size_t leaves_mem = number_of_leaves * sizeof(scalar_t);
|
||||
size_t leaves_mem = total_number_of_leaves * sizeof(scalar_t);
|
||||
scalar_t* leaves = static_cast<scalar_t*>(malloc(leaves_mem));
|
||||
for (uint32_t i = 0; i < number_of_leaves; i++) {
|
||||
for (uint32_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
@@ -62,6 +59,7 @@ int main(int argc, char* argv[])
|
||||
std::cout << "Memory for leaves = " << leaves_mem / 1024 / 1024 << " MB; " << leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
<< std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
@@ -69,12 +67,17 @@ int main(int argc, char* argv[])
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
TreeBuilderConfig config = default_merkle_config();
|
||||
config.keep_rows = keep_rows;
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.arity = 2;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
START_TIMER(timer_merkle);
|
||||
build_merkle_tree<scalar_t, T>(leaves, digests, tree_height, constants, config);
|
||||
bls12_381_build_merkle_tree(leaves, digests, tree_height, A, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
std::cout << digests[i] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
109
icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu
Normal file
109
icicle/src/merkle-tree/tests/merkle/test_poseidon2.cu
Normal file
@@ -0,0 +1,109 @@
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
#include "api/babybear.h"
|
||||
using namespace babybear;
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N elements
|
||||
uint32_t tree_arity = 2;
|
||||
uint32_t width = 16;
|
||||
uint32_t input_block_len = 8;
|
||||
uint32_t digest_elements = 8;
|
||||
uint64_t tree_height = argc > 1 ? atoi(argv[1]) : 23;
|
||||
uint64_t number_of_leaves = pow(tree_arity, tree_height);
|
||||
uint64_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
poseidon2::Poseidon2<scalar_t> poseidon(
|
||||
width, input_block_len, poseidon2::MdsType::DEFAULT_MDS, poseidon2::DiffusionStrategy::DEFAULT_DIFFUSION, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 3;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, tree_arity, digest_elements);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
size_t leaves_mem = total_number_of_leaves * sizeof(scalar_t);
|
||||
scalar_t* leaves = static_cast<scalar_t*>(malloc(leaves_mem));
|
||||
for (uint64_t i = 0; i < total_number_of_leaves; i++) {
|
||||
leaves[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(scalar_t);
|
||||
scalar_t* digests = static_cast<scalar_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
std::cout << "Memory for leaves = " << leaves_mem / 1024 / 1024 << " MB; " << leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
<< std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
|
||||
std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
<< (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.arity = tree_arity;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
tree_config.digest_elements = digest_elements;
|
||||
START_TIMER(timer_merkle);
|
||||
babybear_build_merkle_tree(leaves, digests, tree_height, input_block_len, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << digests[i] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
// for (int j = 0; j < 1; j++) {
|
||||
// std::cout << ((uint32_t*)&digests[i].limbs_storage)[j];
|
||||
// }
|
||||
// std::cout << "}," << std::endl;
|
||||
// }
|
||||
|
||||
scalar_t expected[64] = {
|
||||
{876845485}, {1982055884}, {1232961929}, {1502814326}, {1731913687}, {351564698}, {449044700}, {656218013},
|
||||
{1616800877}, {1324365320}, {651075613}, {1679193452}, {218302636}, {283697394}, {1141456517}, {253630808},
|
||||
{936036237}, {1020969125}, {597252945}, {32839064}, {957901845}, {1137914369}, {155933167}, {986924657},
|
||||
{1553746264}, {1007314324}, {1208763331}, {110389244}, {118704360}, {607471513}, {834479233}, {914998571},
|
||||
{1086906039}, {1673233108}, {431115765}, {233068973}, {1974449092}, {1296268875}, {538093590}, {104288129},
|
||||
{1011605567}, {53314351}, {1461404090}, {870754513}, {1212389386}, {1363519118}, {799527383}, {1258384762},
|
||||
{678820782}, {1940801563}, {887764924}, {1006362075}, {2003940909}, {1213396717}, {1332793191}, {440259232}};
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
scalar_t root = digests[i];
|
||||
// std::cout << root << std::endl;
|
||||
assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
4
icicle/src/merkle-tree/tests/mmcs/.gitignore
vendored
Normal file
4
icicle/src/merkle-tree/tests/mmcs/.gitignore
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
mmcs.o
|
||||
poseidon2.o
|
||||
test_mmcs_poseidon2
|
||||
vec_ops.o
|
||||
15
icicle/src/merkle-tree/tests/mmcs/Makefile
Normal file
15
icicle/src/merkle-tree/tests/mmcs/Makefile
Normal file
@@ -0,0 +1,15 @@
|
||||
test_merkle: poseidon2.o mmcs.o vec_ops.o
|
||||
nvcc -o test_mmcs_poseidon2 -lineinfo -I../../../../include -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG poseidon2.o vec_ops.o mmcs.o test_poseidon2.cu
|
||||
./test_mmcs_poseidon2
|
||||
|
||||
mmcs.o: ../../extern_mmcs.cu ../../mmcs.cu
|
||||
nvcc -o mmcs.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -DMERKLE_DEBUG -c ../../extern_mmcs.cu
|
||||
|
||||
poseidon2.o: ../../../poseidon2/extern.cu
|
||||
nvcc -o poseidon2.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -c ../../../poseidon2/extern.cu
|
||||
|
||||
vec_ops.o:
|
||||
nvcc -o vec_ops.o -I../../../../include -lineinfo -DFIELD=babybear -DFIELD_ID=1001 -c ../../../vec_ops/extern.cu
|
||||
|
||||
clear:
|
||||
rm -f poseidon2.o mmcs.o vec_ops.o test_mmcs_poseidon2
|
||||
139
icicle/src/merkle-tree/tests/mmcs/test_poseidon2.cu
Normal file
139
icicle/src/merkle-tree/tests/mmcs/test_poseidon2.cu
Normal file
@@ -0,0 +1,139 @@
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
|
||||
#include "merkle-tree/merkle.cuh"
|
||||
|
||||
#include "poseidon2/poseidon2.cuh"
|
||||
|
||||
#include "api/babybear.h"
|
||||
using namespace babybear;
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
/// Tree of height N and arity A contains \sum{A^i} for i in 0..N elements
|
||||
uint32_t tree_arity = 2;
|
||||
uint32_t width = 16;
|
||||
uint32_t input_block_len = 600;
|
||||
uint32_t rate = 8;
|
||||
uint32_t digest_elements = 8;
|
||||
uint32_t copied_matrices = 1;
|
||||
uint64_t tree_height = argc > 1 ? atoi(argv[1]) : 3;
|
||||
uint64_t number_of_leaves = pow(tree_arity, tree_height);
|
||||
uint64_t total_number_of_leaves = number_of_leaves * input_block_len;
|
||||
|
||||
bool are_inputs_on_device = true;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
poseidon2::Poseidon2<scalar_t> poseidon(
|
||||
width, rate, poseidon2::MdsType::PLONKY, poseidon2::DiffusionStrategy::MONTGOMERY, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
/// Use keep_rows to specify how many rows do you want to store
|
||||
int keep_rows = argc > 2 ? atoi(argv[2]) : 3;
|
||||
size_t digests_len = merkle_tree::get_digests_len(keep_rows - 1, tree_arity, digest_elements);
|
||||
|
||||
/// Fill leaves with scalars [0, 1, ... 2^tree_height - 1]
|
||||
START_TIMER(timer_allocation);
|
||||
scalar_t input = scalar_t::zero();
|
||||
|
||||
// unsigned int number_of_inputs = tree_height * copied_matrices;
|
||||
unsigned int number_of_inputs = 1;
|
||||
Matrix<scalar_t>* leaves = static_cast<Matrix<scalar_t>*>(malloc(number_of_inputs * sizeof(Matrix<scalar_t>)));
|
||||
uint64_t current_matrix_rows = number_of_leaves;
|
||||
for (int i = 0; i < number_of_inputs; i++) {
|
||||
uint64_t current_matrix_size = current_matrix_rows * input_block_len;
|
||||
for (int j = 0; j < copied_matrices; j++) {
|
||||
scalar_t* matrix = static_cast<scalar_t*>(malloc(current_matrix_size * sizeof(scalar_t)));
|
||||
|
||||
for (uint64_t k = 0; k < current_matrix_size; k++) {
|
||||
matrix[k] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
|
||||
scalar_t* d_matrix;
|
||||
if (are_inputs_on_device) {
|
||||
cudaMalloc(&d_matrix, current_matrix_size * sizeof(scalar_t));
|
||||
cudaMemcpy(d_matrix, matrix, current_matrix_size * sizeof(scalar_t), cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
leaves[i * copied_matrices + j] = {
|
||||
are_inputs_on_device ? d_matrix : matrix,
|
||||
input_block_len,
|
||||
current_matrix_rows,
|
||||
};
|
||||
}
|
||||
|
||||
current_matrix_rows /= tree_arity;
|
||||
}
|
||||
|
||||
END_TIMER(timer_allocation, "Allocated memory for leaves: ");
|
||||
|
||||
/// Allocate memory for digests of {keep_rows} rows of a tree
|
||||
START_TIMER(timer_digests);
|
||||
size_t digests_mem = digests_len * sizeof(scalar_t);
|
||||
scalar_t* digests = static_cast<scalar_t*>(malloc(digests_mem));
|
||||
END_TIMER(timer_digests, "Allocated memory for digests");
|
||||
|
||||
// std::cout << "Memory for leaves = " << total_number_of_leaves * sizeof(scalar_t) / 1024 / 1024 << " MB; " <<
|
||||
// leaves_mem / 1024 / 1024 / 1024 << " GB"
|
||||
// << std::endl;
|
||||
std::cout << "Number of leaves = " << number_of_leaves << std::endl;
|
||||
std::cout << "Total Number of leaves = " << total_number_of_leaves << std::endl;
|
||||
std::cout << "Memory for digests = " << digests_mem / 1024 / 1024 << " MB; " << digests_mem / 1024 / 1024 / 1024
|
||||
<< " GB" << std::endl;
|
||||
std::cout << "Number of digest elements = " << digests_len << std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
// std::cout << "Total RAM consumption = " << (digests_mem + leaves_mem) / 1024 / 1024 << " MB; "
|
||||
// << (digests_mem + leaves_mem) / 1024 / 1024 / 1024 << " GB" << std::endl;
|
||||
|
||||
merkle_tree::TreeBuilderConfig tree_config = merkle_tree::default_merkle_config();
|
||||
tree_config.are_inputs_on_device = are_inputs_on_device;
|
||||
tree_config.arity = tree_arity;
|
||||
tree_config.keep_rows = keep_rows;
|
||||
tree_config.digest_elements = digest_elements;
|
||||
START_TIMER(timer_merkle);
|
||||
babybear_mmcs_commit_cuda(leaves, number_of_inputs, digests, &poseidon, &poseidon, tree_config);
|
||||
END_TIMER(timer_merkle, "Merkle tree built: ")
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
std::cout << digests[digests_len - i - 1] << std::endl;
|
||||
}
|
||||
|
||||
// Use this to generate test vectors
|
||||
// for (int i = 0; i < digests_len; i++) {
|
||||
// std::cout << "{";
|
||||
// for (int j = 0; j < 8; j++) {
|
||||
// std::cout << ((uint64_t*)&digests[i].limbs_storage)[j];
|
||||
// if (j != 7) { std::cout << ", "; }
|
||||
// }
|
||||
// std::cout << "}," << std::endl;
|
||||
// }
|
||||
|
||||
/// These scalars are digests of top-7 rows of a Merkle tree.
|
||||
/// Arity = 2, Tree height = 28, keep_rows = 7
|
||||
/// They are aligned in the following format:
|
||||
/// L-7 L-6 L-5 L-4 L-3 L-2 L-1
|
||||
/// [0..63, 64..95, 96..111, 112..119, 120..123, 124..125, 126]
|
||||
scalar_t expected[0] = {};
|
||||
|
||||
for (int i = 0; i < digests_len; i++) {
|
||||
scalar_t root = digests[i];
|
||||
// assert(root == expected[i]);
|
||||
}
|
||||
free(digests);
|
||||
free(leaves);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -320,7 +320,8 @@ namespace ntt {
|
||||
// less then max to allow more concurrent blocks on SM
|
||||
const int logn_shmem = is_shared_mem_enabled ? int(log(2 * num_threads) / log(2))
|
||||
: 0; // TODO: shared memory support only for types <= 32 bytes
|
||||
int num_threads_coset = max(min(n / 2, MAX_NUM_THREADS), 1);
|
||||
// Note: for ecntt we limit block size (=#threads per block) since otherwise it doesn't fit the SM resources.
|
||||
int num_threads_coset = max(min(n / 2, IS_ECNTT ? MAX_THREADS_BATCH_ECNTT : MAX_NUM_THREADS), 1);
|
||||
int num_blocks_coset = (n * batch_size + num_threads_coset - 1) / num_threads_coset;
|
||||
|
||||
if (inverse) {
|
||||
|
||||
@@ -1,2 +1,5 @@
|
||||
test_poseidon : test.cu poseidon.cu kernels.cu constants.cu nvcc - o test_poseidon - I../../ include - DFIELD_ID =
|
||||
2 - DCURVE_ID = 2 test.cu./ test_poseidon
|
||||
test_poseidon: test.cu
|
||||
nvcc -o test_poseidon -I../../include -DFIELD=bls12_381 -DFIELD_ID=2 -DCURVE_ID=2 -DDEVMODE -DDEBUG extern.cu test.cu
|
||||
|
||||
test_poseidon_m31: test_m31.cu
|
||||
nvcc -o test_poseidon -I../../include -DFIELD=m31 -DFIELD_ID=1003 -DDEVMODE -DDEBUG extern.cu test_m31.cu
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "poseidon/constants.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
/// These are pre-calculated constants for different curves
|
||||
#include "fields/id.h"
|
||||
@@ -17,17 +18,25 @@ using namespace poseidon_constants_bw6_761;
|
||||
#elif FIELD_ID == GRUMPKIN
|
||||
#include "poseidon/constants/grumpkin_poseidon.h"
|
||||
using namespace poseidon_constants_grumpkin;
|
||||
#elif FIELD_ID == M31
|
||||
#include "poseidon/constants/m31_poseidon.h"
|
||||
using namespace poseidon_constants_m31;
|
||||
#endif
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S>
|
||||
cudaError_t create_optimized_poseidon_constants(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const S* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
PoseidonConstants<S>* poseidon_constants)
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const S* round_constants,
|
||||
const S* mds_matrix,
|
||||
const S* non_sparse_matrix,
|
||||
const S* sparse_matrices,
|
||||
const S domain_tag,
|
||||
PoseidonConstants<S>* poseidon_constants,
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = ctx.stream;
|
||||
@@ -41,24 +50,33 @@ namespace poseidon {
|
||||
S* d_constants;
|
||||
CHK_IF_RETURN(cudaMallocAsync(&d_constants, sizeof(S) * constants_len, stream));
|
||||
|
||||
S* d_round_constants = d_constants;
|
||||
S* d_mds_matrix = d_round_constants + round_constants_len;
|
||||
S* d_non_sparse_matrix = d_mds_matrix + mds_matrix_len;
|
||||
S* d_sparse_matrices = d_non_sparse_matrix + mds_matrix_len;
|
||||
|
||||
// Copy constants
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(d_constants, constants, sizeof(S) * constants_len, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
S* round_constants = d_constants;
|
||||
S* mds_matrix = round_constants + round_constants_len;
|
||||
S* non_sparse_matrix = mds_matrix + mds_matrix_len;
|
||||
S* sparse_matrices = non_sparse_matrix + mds_matrix_len;
|
||||
|
||||
// Pick the domain_tag accordinaly
|
||||
// For now, we only support Merkle tree mode
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << (width - 1)) - tree_domain_tag_value;
|
||||
S domain_tag = S::from(tree_domain_tag_value);
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_round_constants, round_constants, sizeof(S) * round_constants_len, cudaMemcpyHostToDevice, stream));
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(d_mds_matrix, mds_matrix, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice, stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_non_sparse_matrix, non_sparse_matrix, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice, stream));
|
||||
CHK_IF_RETURN(cudaMemcpyAsync(
|
||||
d_sparse_matrices, sparse_matrices, sizeof(S) * sparse_matrices_len, cudaMemcpyHostToDevice, stream));
|
||||
|
||||
// Make sure all the constants have been copied
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(stream));
|
||||
*poseidon_constants = {arity, partial_rounds, full_rounds_half, round_constants,
|
||||
mds_matrix, non_sparse_matrix, sparse_matrices, domain_tag};
|
||||
*poseidon_constants = {
|
||||
arity,
|
||||
alpha,
|
||||
partial_rounds,
|
||||
full_rounds_half,
|
||||
d_round_constants,
|
||||
d_mds_matrix,
|
||||
d_non_sparse_matrix,
|
||||
d_sparse_matrices,
|
||||
domain_tag};
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
@@ -68,8 +86,8 @@ namespace poseidon {
|
||||
int arity, device_context::DeviceContext& ctx, PoseidonConstants<S>* poseidon_constants)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
int full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
int partial_rounds;
|
||||
unsigned int full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
unsigned int partial_rounds;
|
||||
unsigned char* constants;
|
||||
switch (arity) {
|
||||
case 2:
|
||||
@@ -94,8 +112,41 @@ namespace poseidon {
|
||||
}
|
||||
S* h_constants = reinterpret_cast<S*>(constants);
|
||||
|
||||
create_optimized_poseidon_constants(arity, full_rounds_half, partial_rounds, h_constants, ctx, poseidon_constants);
|
||||
unsigned int width = arity + 1;
|
||||
unsigned int round_constants_len = width * full_rounds_half * 2 + partial_rounds;
|
||||
unsigned int mds_matrix_len = width * width;
|
||||
|
||||
S* round_constants = h_constants;
|
||||
S* mds_matrix = round_constants + round_constants_len;
|
||||
S* non_sparse_matrix = mds_matrix + mds_matrix_len;
|
||||
S* sparse_matrices = non_sparse_matrix + mds_matrix_len;
|
||||
|
||||
// Pick the domain_tag accordinaly
|
||||
// For now, we only support Merkle tree mode
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << (width - 1)) - tree_domain_tag_value;
|
||||
S domain_tag = S::from(tree_domain_tag_value);
|
||||
|
||||
create_optimized_poseidon_constants<S>(
|
||||
arity, 5, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, poseidon_constants, ctx);
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
cudaError_t release_optimized_poseidon_constants(PoseidonConstants<S>* constants, device_context::DeviceContext& ctx)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
CHK_IF_RETURN(cudaFreeAsync(constants->round_constants, ctx.stream));
|
||||
|
||||
constants->arity = 0;
|
||||
constants->partial_rounds = 0;
|
||||
constants->full_rounds_half = 0;
|
||||
constants->round_constants = nullptr;
|
||||
constants->mds_matrix = nullptr;
|
||||
constants->non_sparse_matrix = nullptr;
|
||||
constants->sparse_matrices = nullptr;
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
@@ -2,58 +2,68 @@
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "poseidon.cu"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "constants.cu"
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace poseidon {
|
||||
/**
|
||||
* Extern "C" version of [poseidon_hash_cuda] function with the following
|
||||
* value of template parameter (where the field is given by `-DFIELD` env variable during build):
|
||||
* - `S` is the [field](@ref scalar_t) - either a scalar field of the elliptic curve or a
|
||||
* stand-alone "STARK field";
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_hash_cuda)(
|
||||
scalar_t* input,
|
||||
scalar_t* output,
|
||||
int number_of_states,
|
||||
int arity,
|
||||
const PoseidonConstants<scalar_t>& constants,
|
||||
PoseidonConfig& config)
|
||||
typedef class Poseidon<scalar_t> PoseidonInst;
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_create_cuda)(
|
||||
PoseidonInst** poseidon,
|
||||
unsigned int arity,
|
||||
unsigned int alpha,
|
||||
unsigned int partial_rounds,
|
||||
unsigned int full_rounds_half,
|
||||
const scalar_t* round_constants,
|
||||
const scalar_t* mds_matrix,
|
||||
const scalar_t* non_sparse_matrix,
|
||||
const scalar_t* sparse_matrices,
|
||||
const scalar_t& domain_tag,
|
||||
device_context::DeviceContext& ctx)
|
||||
{
|
||||
switch (arity) {
|
||||
case 2:
|
||||
return poseidon_hash<scalar_t, 3>(input, output, number_of_states, constants, config);
|
||||
case 4:
|
||||
return poseidon_hash<scalar_t, 5>(input, output, number_of_states, constants, config);
|
||||
case 8:
|
||||
return poseidon_hash<scalar_t, 9>(input, output, number_of_states, constants, config);
|
||||
case 11:
|
||||
return poseidon_hash<scalar_t, 12>(input, output, number_of_states, constants, config);
|
||||
default:
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, "PoseidonHash: #arity must be one of [2, 4, 8, 11]");
|
||||
try {
|
||||
*poseidon = new PoseidonInst(
|
||||
arity, alpha, partial_rounds, full_rounds_half, round_constants, mds_matrix, non_sparse_matrix, sparse_matrices,
|
||||
domain_tag, ctx);
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, create_optimized_poseidon_constants_cuda)(
|
||||
int arity,
|
||||
int full_rounds_half,
|
||||
int partial_rounds,
|
||||
const scalar_t* constants,
|
||||
device_context::DeviceContext& ctx,
|
||||
PoseidonConstants<scalar_t>* poseidon_constants)
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_load_cuda)(
|
||||
PoseidonInst** poseidon, unsigned int arity, device_context::DeviceContext& ctx)
|
||||
{
|
||||
return create_optimized_poseidon_constants<scalar_t>(
|
||||
arity, full_rounds_half, partial_rounds, constants, ctx, poseidon_constants);
|
||||
try {
|
||||
*poseidon = new PoseidonInst(arity, ctx);
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, init_optimized_poseidon_constants_cuda)(
|
||||
int arity, device_context::DeviceContext& ctx, PoseidonConstants<scalar_t>* constants)
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_hash_many_cuda)(
|
||||
const PoseidonInst* poseidon,
|
||||
const scalar_t* inputs,
|
||||
scalar_t* output,
|
||||
unsigned int number_of_states,
|
||||
unsigned int input_block_len,
|
||||
unsigned int output_len,
|
||||
const HashConfig& cfg)
|
||||
{
|
||||
return init_optimized_poseidon_constants<scalar_t>(arity, ctx, constants);
|
||||
return poseidon->hash_many(inputs, output, number_of_states, input_block_len, output_len, cfg);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, poseidon_delete_cuda)(PoseidonInst* poseidon)
|
||||
{
|
||||
try {
|
||||
poseidon->~Poseidon();
|
||||
return cudaError_t::cudaSuccess;
|
||||
} catch (const IcicleError& _error) {
|
||||
return cudaError_t::cudaErrorUnknown;
|
||||
}
|
||||
}
|
||||
} // namespace poseidon
|
||||
@@ -1,90 +0,0 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
#include "kernels.cu"
|
||||
|
||||
namespace poseidon {
|
||||
template <typename S, int T>
|
||||
cudaError_t
|
||||
permute_many(S* states, size_t number_of_states, const PoseidonConstants<S>& constants, cudaStream_t& stream)
|
||||
{
|
||||
size_t rc_offset = 0;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC<T>::number_of_full_blocks(number_of_states), PKC<T>::number_of_threads,
|
||||
sizeof(S) * PKC<T>::hashes_per_block * T, stream>>>(
|
||||
states, number_of_states, rc_offset, FIRST_FULL_ROUNDS, constants);
|
||||
rc_offset += T * (constants.full_rounds_half + 1);
|
||||
|
||||
partial_rounds<S, T>
|
||||
<<<PKC<T>::number_of_singlehash_blocks(number_of_states), PKC<T>::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, rc_offset, constants);
|
||||
rc_offset += constants.partial_rounds;
|
||||
|
||||
full_rounds<S, T><<<
|
||||
PKC<T>::number_of_full_blocks(number_of_states), PKC<T>::number_of_threads,
|
||||
sizeof(S) * PKC<T>::hashes_per_block * T, stream>>>(
|
||||
states, number_of_states, rc_offset, SECOND_FULL_ROUNDS, constants);
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S, int T>
|
||||
cudaError_t poseidon_hash(
|
||||
S* input, S* output, size_t number_of_states, const PoseidonConstants<S>& constants, const PoseidonConfig& config)
|
||||
{
|
||||
CHK_INIT_IF_RETURN();
|
||||
cudaStream_t& stream = config.ctx.stream;
|
||||
S* states;
|
||||
if (config.input_is_a_state) {
|
||||
states = input;
|
||||
} else {
|
||||
// allocate memory for {number_of_states} states of {t} scalars each
|
||||
CHK_IF_RETURN(cudaMallocAsync(&states, number_of_states * T * sizeof(S), stream))
|
||||
|
||||
// This is where the input matrix of size Arity x NumberOfBlocks is
|
||||
// padded and copied to device in a T x NumberOfBlocks matrix
|
||||
CHK_IF_RETURN(cudaMemcpy2DAsync(
|
||||
states, T * sizeof(S), // Device pointer and device pitch
|
||||
input, (T - 1) * sizeof(S), // Host pointer and pitch
|
||||
(T - 1) * sizeof(S), number_of_states, // Size of the source matrix (Arity x NumberOfBlocks)
|
||||
cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
S* output_device;
|
||||
if (config.are_outputs_on_device) {
|
||||
output_device = output;
|
||||
} else {
|
||||
CHK_IF_RETURN(cudaMallocAsync(&output_device, number_of_states * sizeof(S), stream))
|
||||
}
|
||||
|
||||
prepare_poseidon_states<S, T>
|
||||
<<<PKC<T>::number_of_full_blocks(number_of_states), PKC<T>::number_of_threads, 0, stream>>>(
|
||||
states, number_of_states, constants.domain_tag, config.aligned);
|
||||
|
||||
cudaError_t hash_error = permute_many<S, T>(states, number_of_states, constants, stream);
|
||||
CHK_IF_RETURN(hash_error);
|
||||
|
||||
get_hash_results<S, T>
|
||||
<<<PKC<T>::number_of_singlehash_blocks(number_of_states), PKC<T>::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_device);
|
||||
|
||||
if (config.loop_state) {
|
||||
copy_recursive<S, T>
|
||||
<<<PKC<T>::number_of_singlehash_blocks(number_of_states), PKC<T>::singlehash_block_size, 0, stream>>>(
|
||||
states, number_of_states, output_device);
|
||||
}
|
||||
|
||||
if (!config.input_is_a_state) CHK_IF_RETURN(cudaFreeAsync(states, stream));
|
||||
|
||||
if (!config.are_outputs_on_device) {
|
||||
CHK_IF_RETURN(
|
||||
cudaMemcpyAsync(output, output_device, number_of_states * sizeof(S), cudaMemcpyDeviceToHost, stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(output_device, stream));
|
||||
}
|
||||
|
||||
if (!config.is_async) return CHK_STICKY(cudaStreamSynchronize(stream));
|
||||
return CHK_LAST();
|
||||
}
|
||||
} // namespace poseidon
|
||||
@@ -4,7 +4,6 @@
|
||||
using namespace curve_config;
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "poseidon.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
@@ -12,6 +11,10 @@ using namespace curve_config;
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
#include "api/bls12_381.h"
|
||||
using namespace bls12_381;
|
||||
|
||||
#include "poseidon/poseidon.cuh"
|
||||
using namespace poseidon;
|
||||
|
||||
#define A 2
|
||||
@@ -29,8 +32,7 @@ int main(int argc, char* argv[])
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
init_optimized_poseidon_constants<scalar_t>(A, ctx, &constants);
|
||||
Poseidon<scalar_t> poseidon(A, ctx);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
START_TIMER(allocation_timer);
|
||||
@@ -46,9 +48,10 @@ int main(int argc, char* argv[])
|
||||
|
||||
scalar_t* out_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * sizeof(scalar_t)));
|
||||
|
||||
HashConfig cfg = default_hash_config();
|
||||
|
||||
START_TIMER(poseidon_timer);
|
||||
PoseidonConfig config = default_poseidon_config(T);
|
||||
poseidon_hash<curve_config::scalar_t, T>(in_ptr, out_ptr, number_of_blocks, constants, config);
|
||||
poseidon.hash_many(in_ptr, out_ptr, number_of_blocks, A, 1, cfg);
|
||||
END_TIMER(poseidon_timer, "Poseidon")
|
||||
|
||||
scalar_t expected[1024] = {
|
||||
@@ -1080,7 +1083,7 @@ int main(int argc, char* argv[])
|
||||
if (number_of_blocks == 1024) {
|
||||
for (int i = 0; i < number_of_blocks; i++) {
|
||||
#ifdef DEBUG
|
||||
std::cout << out_ptr[i] << std::endl;
|
||||
// std::cout << out_ptr[i] << std::endl;
|
||||
#endif
|
||||
assert((out_ptr[i] == expected[i]));
|
||||
}
|
||||
|
||||
70
icicle/src/poseidon/test_m31.cu
Normal file
70
icicle/src/poseidon/test_m31.cu
Normal file
@@ -0,0 +1,70 @@
|
||||
// #define DEBUG
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using namespace field_config;
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
using namespace poseidon;
|
||||
|
||||
#define A 11
|
||||
#define T (A + 1)
|
||||
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
using FpMicroseconds = std::chrono::duration<float, std::chrono::microseconds::period>;
|
||||
|
||||
// Load poseidon constants
|
||||
START_TIMER(timer_const);
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
PoseidonConstants<scalar_t> constants;
|
||||
init_optimized_poseidon_constants<scalar_t>(A, ctx, &constants);
|
||||
END_TIMER(timer_const, "Load poseidon constants");
|
||||
|
||||
START_TIMER(allocation_timer);
|
||||
// Prepare input data of [0, 1, 2 ... (number_of_blocks * arity) - 1]
|
||||
int number_of_blocks = argc > 1 ? 1 << atoi(argv[1]) : 1024;
|
||||
scalar_t input = scalar_t::zero();
|
||||
scalar_t* in_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * A * sizeof(scalar_t)));
|
||||
for (uint32_t i = 0; i < number_of_blocks * A; i++) {
|
||||
in_ptr[i] = input;
|
||||
input = input + scalar_t::one();
|
||||
}
|
||||
END_TIMER(allocation_timer, "Allocate mem and fill input");
|
||||
|
||||
scalar_t* out_ptr = static_cast<scalar_t*>(malloc(number_of_blocks * sizeof(scalar_t)));
|
||||
|
||||
START_TIMER(poseidon_timer);
|
||||
PoseidonConfig config = default_poseidon_config(T);
|
||||
poseidon_hash<field_config::scalar_t, T>(in_ptr, out_ptr, number_of_blocks, constants, config);
|
||||
END_TIMER(poseidon_timer, "Poseidon")
|
||||
|
||||
// scalar_t expected[0] = {}
|
||||
|
||||
if (number_of_blocks == 1024) {
|
||||
for (int i = 0; i < number_of_blocks; i++) {
|
||||
#ifdef DEBUG
|
||||
// std::cout << out_ptr[i] << std::endl;
|
||||
#endif
|
||||
// assert((out_ptr[i] == expected[i]));
|
||||
}
|
||||
printf("Expected output matches\n");
|
||||
}
|
||||
|
||||
free(in_ptr);
|
||||
free(out_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,3 +0,0 @@
|
||||
test_merkle:
|
||||
nvcc -o test_merkle -I../../../include -DFIELD_ID=2 -DCURVE_ID=2 test.cu
|
||||
./test_merkle
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user