Compare commits

..

36 Commits

Author SHA1 Message Date
hadaringonyama
b39b529463 no device sync 2023-07-05 11:07:07 +03:00
hadaringonyama
86bee3af42 works without the sort - temp fix 2023-07-05 10:55:59 +03:00
hadaringonyama
dbd5cd4cbb fixing streams - WIP 2023-07-05 09:50:04 +03:00
hadaringonyama
655f014dc2 merge with dev passes all tests 2023-07-02 16:13:51 +03:00
hadaringonyama
18c7cad89c Merge remote-tracking branch 'origin/dev' into msm-performance 2023-07-02 14:49:19 +03:00
hadaringonyama
1e9f628235 c=16 works 2023-07-02 12:07:46 +03:00
hadaringonyama
2e45ed1bd4 reduction - simple implementation, c=8 works 2023-06-29 19:12:53 +03:00
hadaringonyama
e828d1da2a c=8 works 2023-06-28 16:23:13 +03:00
Jeremy Felder
e3f089f0f3 Fix: Docs, Curve generation script (#102)
Fix CUDA compilation docs and update new curve script to generate correct cu(h) files. Resolves #101
2023-06-28 15:55:33 +03:00
hadaringonyama
233927668c triangle block mix fix 2023-06-28 12:31:38 +03:00
guy-ingo
1866df60f1 new kernels implementation, no correctness 2023-06-26 18:25:38 +03:00
guy-ingo
ccc8892a52 rectangle kernel works for powers of 2 2023-06-26 17:56:02 +03:00
guy-ingo
67e4ee2864 triangle kernel works as expected 2023-06-26 16:51:42 +03:00
guy-ingo
6c5fe47e55 big triangle replacment works 2023-06-25 15:28:56 +03:00
hadaringonyama
ed9de3d1e9 signed digits working 2023-06-20 14:48:13 +03:00
Jeremy Felder
cb61755c8b Add streams to poseidon (#105)
Adds streams to the poseidon implementation for BLS12-381. Resolves #91
2023-06-20 12:22:16 +03:00
Jeremy Felder
34a556ac85 Update build workflow (#104) 2023-06-19 14:21:46 +03:00
hadaringonyama
d01e0dbfb1 signed + top bm - WIP 2023-06-18 18:10:11 +03:00
hadaringonyama
6aa6fe0c1c adding sort by bucket size 2023-06-18 11:27:07 +03:00
ChickenLover
26f2f5c76c reduce memory consumption in hash_blocks (#100)
* reduce memory consumption in hash_blocks
2023-06-08 20:51:41 +07:00
Jeremy Felder
434ab70305 Fixed omega retrieval issue (#99)
Resolves #71
2023-06-08 11:47:41 +03:00
Jeremy Felder
cd7b81102c Merge pull request #95 from ingonyama-zk/dev-v2 2023-06-01 14:45:49 +03:00
Jeremy Felder
2917024ef7 Update build action to include dev branch (#96) 2023-06-01 13:56:30 +03:00
ChickenLover
24096b6cf7 Fix windows build (#97) 2023-06-01 13:19:51 +03:00
Jeremy Felder
20de60fd43 Add streams capability (#89) 2023-06-01 10:44:11 +03:00
DmytroTym
b5a24d8e4d G2 CUDA (#78)
G2 arithmetic and MSM implemented in CUDA
2023-06-01 10:44:05 +03:00
HadarIngonyama
9ebf3d4f34 MSM for large sizes (#88)
* bugs fixed

* bugs fixed

* remove short msm from extern call

* code cleaning
2023-06-01 10:40:13 +03:00
ChickenLover
43f8c01afe Feat/poseidon (#75)
Adds poseidon function to compute hashes over multiple preimages in parallel
2023-06-01 10:40:13 +03:00
hadaringonyama
a64df640de temp 2023-05-30 09:37:15 +03:00
Vitalii Hnatyk
a4c676e5b6 fix compilation - rust bench (#80)
Co-authored-by: Vitalii <vitalii@ingonyama.com>
2023-05-29 10:07:18 +03:00
hadaringonyama
1b2b9f2826 code cleaning 2023-05-29 09:22:10 +03:00
hadaringonyama
0a36a545bf remove short msm from extern call 2023-05-28 16:24:04 +03:00
hadaringonyama
407273dee3 bugs fixed 2023-05-28 16:13:47 +03:00
hadaringonyama
f55bd30e13 bugs fixed 2023-05-28 15:53:02 +03:00
guy-ingo
a467c9f56e Supporting Additional Curves (#72)
* init commit - changes for supporting new curves

* refactor + additional curve (bls12-377 works, bn254 - not yet)

* general refactor + curves script + fixing bn245

* revert unnecessary changes + refactor new curve script

* add README and fix limbs_p=limbs_q case in python script
2023-05-15 15:31:18 +03:00
Vitalii Hnatyk
20da109a92 Fix for local machines GoogleTest and CMake (#70)
GoogleTest fix, updated readme
2023-05-10 13:56:25 +03:00
57 changed files with 14943 additions and 1190 deletions

View File

@@ -4,4 +4,4 @@ This PR...
## Linked Issues
Closes #
Resolves #

View File

@@ -1,13 +1,27 @@
name: Build
on:
on:
pull_request:
branches: [ "main" ]
types:
- ready_for_review
- opened
branches:
- main
- dev
paths:
- "icicle/**"
- "src/**"
- "Cargo.toml"
- "build.rs"
- icicle/**
- src/**
- Cargo.toml
- build.rs
push:
branches-ignore:
- main
- dev
paths:
- icicle/**
- src/**
- Cargo.toml
- build.rs
env:
CARGO_TERM_COLOR: always

View File

@@ -27,6 +27,10 @@ ark-bls12-381 = "0.3.0"
ark-bls12-377 = "0.3.0"
ark-bn254 = "0.3.0"
serde = { version = "1.0", features = ["derive"] }
serde_derive = "1.0"
serde_cbor = "0.11.2"
rustacuda = "0.1"
rustacuda_core = "0.1"
rustacuda_derive = "0.1"

View File

@@ -4,6 +4,13 @@
![image (4)](https://user-images.githubusercontent.com/2446179/223707486-ed8eb5ab-0616-4601-8557-12050df8ccf7.png)
<div align="center">
![Build status](https://github.com/ingonyama-zk/icicle/actions/workflows/buil.yml/badge.svg)
![Discord server](https://img.shields.io/discord/1063033227788423299?label=Discord&logo=Discord&logoColor=%23&style=plastic)
![Follow us on twitter](https://img.shields.io/twitter/follow/Ingo_zk?style=social)
</div>
## Background
Zero Knowledge Proofs (ZKPs) are considered one of the greatest achievements of modern cryptography. Accordingly, ZKPs are expected to disrupt a number of industries and will usher in an era of trustless and privacy preserving services and infrastructure.
@@ -36,7 +43,7 @@ ICICLE is a CUDA implementation of general functions widely used in ZKP. ICICLE
```sh
mkdir -p build
nvcc -o build/<ENTER_DIR_NAME> ./icicle/appUtils/ntt/ntt.cu ./icicle/appUtils/msm/msm.cu ./icicle/appUtils/vector_manipulation/ve_mod_mult.cu ./icicle/primitives/projective.cu -lib -arch=native
nvcc -o build/<binary_name> ./icicle/curves/index.cu -lib -arch=native
```
### Testing the CUDA code
@@ -95,52 +102,64 @@ Supporting additional curves can be done as follows:
Create a JSON file with the curve parameters. The curve is defined by the following parameters:
- ``curve_name`` - e.g. ``bls12_381``.
- ``modolus_p`` - scalar field modolus (in decimal).
- ``bit_count_p`` - number of bits needed to represent `` modolus_p`` .
- ``limb_p`` - number of bytes needed to represent `` modolus_p`` (rounded).
- ``modulus_p`` - scalar field modulus (in decimal).
- ``bit_count_p`` - number of bits needed to represent `` modulus_p`` .
- ``limb_p`` - number of bytes needed to represent `` modulus_p`` (rounded).
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
- ``modolus_q`` - base field modulus (in decimal).
- ``bit_count_q`` - number of bits needed to represent `` modolus_q`` .
- ``limb_q`` number of bytes needed to represent `` modolus_p`` (rounded).
- ``modulus_q`` - base field modulus (in decimal).
- ``bit_count_q`` - number of bits needed to represent `` modulus_q`` .
- ``limb_q`` number of bytes needed to represent `` modulus_p`` (rounded).
- ``weierstrass_b`` - Weierstrauss constant of the curve.
- ``weierstrass_b_g2_re`` - Weierstrauss real constant of the g2 curve.
- ``weierstrass_b_g2_im`` - Weierstrauss imaginary constant of the g2 curve.
- ``gen_x`` - x-value of a generator element for the curve.
- ``gen_y`` - y-value of a generator element for the curve.
- ``gen_x_re`` - real x-value of a generator element for the g2 curve.
- ``gen_x_im`` - imaginary x-value of a generator element for the g2 curve.
- ``gen_y_re`` - real y-value of a generator element for the g2 curve.
- ``gen_y_im`` - imaginary y-value of a generator element for the g2 curve.
Here's an example for BLS12-381.
```
{
"curve_name" : "bls12_381",
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"modulus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"bit_count_p" : 255,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"modulus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"bit_count_q" : 381,
"limb_q" : 12,
"weierstrass_b" : 4,
"weierstrass_b_g2_re":4,
"weierstrass_b_g2_im":4,
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569,
"gen_x_re" : 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
"gen_x_im" : 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
"gen_y_re" : 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
"gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582
}
```
Save the parameters JSON file in ``curve_parameters``.
Save the parameters JSON file under the ``curve_parameters`` directory.
Then run the Python script ``new_curve_script.py `` from the main icicle folder:
Then run the Python script ``new_curve_script.py `` from the root folder:
```
python3 ./curve_parameters/new_curve_script_rust.py ./curve_parameters/bls12_381.json
python3 ./curve_parameters/new_curve_script.py ./curve_parameters/bls12_381.json
```
The script does the following:
- Creates a folder in ``icicle/curves`` with the curve name, which contains all of the files needed for the supported operations in cuda.
- Adds the curve exported operations to ``icicle/curves/index.cu``.
- Adds the curve's exported operations to ``icicle/curves/index.cu``.
- Creates a file with the curve name in ``src/curves`` with the relevant objects for the curve.
- Creates a test file with the curve name in ``src``.
Testing the new curve could be done by running the tests in ``tests_curve_name`` (e.g. ``tests_bls12_381``).
## Contributions
Join our [Discord Server](https://discord.gg/Y4SkbDf2Ff) and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
Join our [Discord Server][DISCORD] and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
### Hall of Fame
@@ -153,13 +172,16 @@ ICICLE is distributed under the terms of the MIT License.
See [LICENSE-MIT][LMIT] for details.
<!-- Begin Links -->
[BLS12-381]: ./icicle/curves/bls12_381.cuh
[BLS12-381]: ./icicle/curves/bls12_381/supported_operations.cu
[BLS12-377]: ./icicle/curves/bls12_377/supported_operations.cu
[BN254]: ./icicle/curves/bn254/supported_operations.cu
[NVCC]: https://docs.nvidia.com/cuda/#installation-guides
[CRV_TEMPLATE]: ./icicle/curves/curve_template.cuh
[CRV_CONFIG]: ./icicle/curves/curve_config.cuh
[B_SCRIPT]: ./build.rs
[FDI]: https://github.com/ingonyama-zk/fast-danksharding
[LMIT]: ./LICENSE
[DISCORD]: https://discord.gg/Y4SkbDf2Ff
[googletest]: https://github.com/google/googletest/
<!-- End Links -->

View File

@@ -2,13 +2,15 @@ extern crate criterion;
use criterion::{criterion_group, criterion_main, Criterion};
use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng, field::BaseField};
use icicle_utils::test_bls12_381::{
commit_batch_bls12_381, generate_random_points_bls12_381, set_up_scalars_bls12_381,
};
use icicle_utils::utils::*;
#[cfg(feature = "g2")]
use icicle_utils::{commit_batch_g2, field::ExtensionField};
use rustacuda::prelude::*;
const LOG_MSM_SIZES: [usize; 1] = [12];
const BATCH_SIZES: [usize; 2] = [128, 256];
@@ -17,11 +19,11 @@ fn bench_msm(c: &mut Criterion) {
for log_msm_size in LOG_MSM_SIZES {
for batch_size in BATCH_SIZES {
let msm_size = 1 << log_msm_size;
let (scalars, _, _) = set_up_scalars(msm_size, 0, false);
let (scalars, _, _) = set_up_scalars_bls12_381(msm_size, 0, false);
let batch_scalars = vec![scalars; batch_size].concat();
let mut d_scalars = DeviceBuffer::from_slice(&batch_scalars[..]).unwrap();
let points = generate_random_points::<BaseField>(msm_size, get_rng(None));
let points = generate_random_points_bls12_381(msm_size, get_rng(None));
let batch_points = vec![points; batch_size].concat();
let mut d_points = DeviceBuffer::from_slice(&batch_points[..]).unwrap();
@@ -34,7 +36,7 @@ fn bench_msm(c: &mut Criterion) {
group.sample_size(30).bench_function(
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch(&mut d_points, &mut d_scalars, batch_size))
|b| b.iter(|| commit_batch_bls12_381(&mut d_points, &mut d_scalars, batch_size)),
);
#[cfg(feature = "g2")]
@@ -47,4 +49,4 @@ fn bench_msm(c: &mut Criterion) {
}
criterion_group!(msm_benches, bench_msm);
criterion_main!(msm_benches);
criterion_main!(msm_benches);

View File

@@ -2,7 +2,7 @@ extern crate criterion;
use criterion::{criterion_group, criterion_main, Criterion};
use icicle_utils::{interpolate_scalars_batch, interpolate_points_batch, set_up_scalars, set_up_points};
use icicle_utils::test_bls12_381::{interpolate_scalars_batch_bls12_381, interpolate_points_batch_bls12_381, set_up_scalars_bls12_381, set_up_points_bls12_381};
const LOG_NTT_SIZES: [usize; 1] = [15];
@@ -13,17 +13,17 @@ fn bench_ntt(c: &mut Criterion) {
for log_ntt_size in LOG_NTT_SIZES {
for batch_size in BATCH_SIZES {
let ntt_size = 1 << log_ntt_size;
let (_, mut d_evals, mut d_domain) = set_up_scalars(ntt_size * batch_size, log_ntt_size, true);
let (_, mut d_points_evals, _) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
let (_, mut d_evals, mut d_domain) = set_up_scalars_bls12_381(ntt_size * batch_size, log_ntt_size, true);
let (_, mut d_points_evals, _) = set_up_points_bls12_381(ntt_size * batch_size, log_ntt_size, true);
group.sample_size(100).bench_function(
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size))
|b| b.iter(|| interpolate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size))
);
group.sample_size(10).bench_function(
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_points_batch(&mut d_points_evals, &mut d_domain, batch_size))
|b| b.iter(|| interpolate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size))
);
}
}
@@ -31,3 +31,4 @@ fn bench_ntt(c: &mut Criterion) {
criterion_group!(ntt_benches, bench_ntt);
criterion_main!(ntt_benches);

View File

@@ -8,9 +8,12 @@ fn main() {
println!("cargo:rerun-if-changed=./icicle");
let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
let mut arch = String::from("-arch=");
arch.push_str(&arch_type);
let mut stream = String::from("-default-stream=");
stream.push_str(&stream_type);
let mut nvcc = cc::Build::new();
@@ -22,6 +25,7 @@ fn main() {
nvcc.cuda(true);
nvcc.debug(false);
nvcc.flag(&arch);
nvcc.flag(&stream);
nvcc.files([
"./icicle/curves/index.cu",
]);

View File

@@ -1,13 +1,20 @@
{
"curve_name" : "bls12_377",
"modolus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
"modulus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
"bit_count_p" : 253,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
"modulus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
"bit_count_q" : 377,
"limb_q" : 12,
"root_of_unity" : 5928890464389279575069867463136436689218492512582288454256978381122364252082,
"weierstrass_b" : 1,
"gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
"gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030
"weierstrass_b_g2_re" : 0,
"weierstrass_b_g2_im" : 155198655607781456406391640216936120121836107652948796323930557600032281009004493664981332883744016074664192874906,
"g1_gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
"g1_gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030,
"g2_gen_x_re" : 233578398248691099356572568220835526895379068987715365179118596935057653620464273615301663571204657964920925606294,
"g2_gen_x_im" : 140913150380207355837477652521042157274541796891053068589147167627541651775299824604154852141315666357241556069118,
"g2_gen_y_re" : 63160294768292073209381361943935198908131692476676907196754037919244929611450776219210369229519898517858833747423,
"g2_gen_y_im" : 149157405641012693445398062341192467754805999074082136895788947234480009303640899064710353187729182149407503257491
}

View File

@@ -1,13 +1,20 @@
{
"curve_name" : "bls12_381",
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"modulus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"bit_count_p" : 255,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"modulus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"bit_count_q" : 381,
"limb_q" : 12,
"root_of_unity" : 937917089079007706106976984802249742464848817460758522850752807661925904159,
"weierstrass_b" : 4,
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
"weierstrass_b_g2_re":4,
"weierstrass_b_g2_im":4,
"g1_gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"g1_gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569,
"g2_gen_x_re" : 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
"g2_gen_x_im" : 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
"g2_gen_y_re" : 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
"g2_gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582
}

View File

@@ -1,13 +1,20 @@
{
"curve_name" : "bn254",
"modolus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
"modulus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
"bit_count_p" : 254,
"limb_p" : 8,
"ntt_size" : 16,
"modolus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
"ntt_size" : 28,
"modulus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
"bit_count_q" : 254,
"limb_q" : 8,
"root_of_unity" : 19103219067921713944291392827692070036145651957329286315305642004821462161904,
"weierstrass_b" : 3,
"gen_x" : 1,
"gen_y" : 2
"weierstrass_b_g2_re" : 19485874751759354771024239261021720505790618469301721065564631296452457478373,
"weierstrass_b_g2_im" : 266929791119991161246907387137283842545076965332900288569378510910307636690,
"g1_gen_x" : 1,
"g1_gen_y" : 2,
"g2_gen_x_re" : 10857046999023057135944570762232829481370756359578518086990519993285655852781,
"g2_gen_x_im" : 11559732032986387107991004021392285783925812861821192530917403151452391805634,
"g2_gen_y_re" : 8495653923123431417604973247489272438418190587263600148770280649306958101930,
"g2_gen_y_im" : 4082367875863433681332203403145435568316851327593401208105741076214120093531
}

View File

@@ -1,30 +1,12 @@
import json
import math
import os
from sympy.ntheory import isprime, primitive_root
import subprocess
import random
from string import Template
import sys
data = None
with open(sys.argv[1]) as json_file:
data = json.load(json_file)
curve_name = data["curve_name"]
modolus_p = data["modolus_p"]
bit_count_p = data["bit_count_p"]
limb_p = data["limb_p"]
ntt_size = data["ntt_size"]
modolus_q = data["modolus_q"]
bit_count_q = data["bit_count_q"]
limb_q = data["limb_q"]
weierstrass_b = data["weierstrass_b"]
gen_x = data["gen_x"]
gen_y = data["gen_y"]
def to_hex(val, length):
x = str(hex(val))[2:]
def to_hex(val: int, length):
x = hex(val)[2:]
if len(x) % 8 != 0:
x = "0" * (8-len(x) % 8) + x
if len(x) != length:
@@ -33,133 +15,260 @@ def to_hex(val, length):
chunks = [x[i:i+n] for i in range(0, len(x), n)][::-1]
s = ""
for c in chunks:
s += "0x" + c + ", "
return s
s += f'0x{c}, '
return s[:-2]
def get_root_of_unity(order: int) -> int:
assert (modolus_p - 1) % order == 0
return pow(5, (modolus_p - 1) // order, modolus_p)
def compute_values(modulus, modulus_bit_count, limbs):
limb_size = 8*limbs
modulus_ = to_hex(modulus,limb_size)
modulus_2 = to_hex(modulus*2,limb_size)
modulus_4 = to_hex(modulus*4,limb_size)
modulus_wide = to_hex(modulus,limb_size*2)
modulus_squared = to_hex(modulus*modulus,limb_size)
modulus_squared_2 = to_hex(modulus*modulus*2,limb_size)
modulus_squared_4 = to_hex(modulus*modulus*4,limb_size)
m_raw = int(math.floor(int(pow(2,2*modulus_bit_count) // modulus)))
m = to_hex(m_raw,limb_size)
one = to_hex(1,limb_size)
zero = to_hex(0,limb_size)
def create_field_parameters_struct(modulus, modulus_bits_count,limbs,ntt,size,name):
s = " struct "+name+"{\n"
s += " static constexpr unsigned limbs_count = " + str(limbs)+";\n"
s += " static constexpr storage<limbs_count> modulus = {"+to_hex(modulus,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<limbs_count> modulus_2 = {"+to_hex(modulus*2,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<limbs_count> modulus_4 = {"+to_hex(modulus*4,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_wide = {"+to_hex(modulus,8*limbs*2)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared = {"+to_hex(modulus*modulus,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared_2 = {"+to_hex(modulus*modulus*2,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared_4 = {"+to_hex(modulus*modulus*2*2,8*limbs)[:-2]+"};\n"
s += " static constexpr unsigned modulus_bits_count = "+str(modulus_bits_count)+";\n"
m = int(math.floor(int(pow(2,2*modulus_bits_count) // modulus)))
s += " static constexpr storage<limbs_count> m = {"+ to_hex(m,8*limbs)[:-2] +"};\n"
s += " static constexpr storage<limbs_count> one = {"+ to_hex(1,8*limbs)[:-2] +"};\n"
s += " static constexpr storage<limbs_count> zero = {"+ to_hex(0,8*limbs)[:-2] +"};\n"
return (
modulus_,
modulus_2,
modulus_4,
modulus_wide,
modulus_squared,
modulus_squared_2,
modulus_squared_4,
m,
one,
zero
)
if ntt:
def get_fq_params(modulus, modulus_bit_count, limbs, g1_gen_x, g1_gen_y, g2_gen_x_re, g2_gen_x_im, g2_gen_y_re, g2_gen_y_im):
(
modulus,
modulus_2,
modulus_4,
modulus_wide,
modulus_squared,
modulus_squared_2,
modulus_squared_4,
m,
one,
zero
) = compute_values(modulus, modulus_bit_count, limbs)
limb_size = 8*limbs
return {
'fq_modulus': modulus,
'fq_modulus_2': modulus_2,
'fq_modulus_4': modulus_4,
'fq_modulus_wide': modulus_wide,
'fq_modulus_squared': modulus_squared,
'fq_modulus_squared_2': modulus_squared_2,
'fq_modulus_squared_4': modulus_squared_4,
'fq_m': m,
'fq_one': one,
'fq_zero': zero,
'fq_gen_x': to_hex(g1_gen_x, limb_size),
'fq_gen_y': to_hex(g1_gen_y, limb_size),
'fq_gen_x_re': to_hex(g2_gen_x_re, limb_size),
'fq_gen_x_im': to_hex(g2_gen_x_im, limb_size),
'fq_gen_y_re': to_hex(g2_gen_y_re, limb_size),
'fq_gen_y_im': to_hex(g2_gen_y_im, limb_size)
}
def get_fp_params(modulus, modulus_bit_count, limbs, root_of_unity, size=0):
(
modulus_,
modulus_2,
modulus_4,
modulus_wide,
modulus_squared,
modulus_squared_2,
modulus_squared_4,
m,
one,
zero
) = compute_values(modulus, modulus_bit_count, limbs)
limb_size = 8*limbs
if size > 0:
omega = ''
omega_inv = ''
inv = ''
omegas = []
omegas_inv = []
for k in range(size):
omega = get_root_of_unity(int(pow(2,k+1)))
s += " static constexpr storage<limbs_count> omega"+str(k+1)+"= {"+ to_hex(omega,8*limbs)[:-2]+"};\n"
if k == 0:
om = root_of_unity
else:
om = pow(om, 2, modulus)
omegas.append(om)
omegas_inv.append(pow(om, -1, modulus))
omegas.reverse()
omegas_inv.reverse()
for k in range(size):
omega = get_root_of_unity(int(pow(2,k+1)))
s += " static constexpr storage<limbs_count> omega_inv"+str(k+1)+"= {"+ to_hex(pow(omega, -1, modulus),8*limbs)[:-2]+"};\n"
for k in range(size):
s += " static constexpr storage<limbs_count> inv"+str(k+1)+"= {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),8*limbs)[:-2]+"};\n"
s+=" };\n"
return s
omega += "\n {"+ to_hex(omegas[k],limb_size)+"}," if k>0 else " {"+ to_hex(omegas[k],limb_size)+"},"
omega_inv += "\n {"+ to_hex(omegas_inv[k],limb_size)+"}," if k>0 else " {"+ to_hex(omegas_inv[k],limb_size)+"},"
inv += "\n {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),limb_size)+"}," if k>0 else " {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),limb_size)+"},"
return {
'fp_modulus': modulus_,
'fp_modulus_2': modulus_2,
'fp_modulus_4': modulus_4,
'fp_modulus_wide': modulus_wide,
'fp_modulus_squared': modulus_squared,
'fp_modulus_squared_2': modulus_squared_2,
'fp_modulus_squared_4': modulus_squared_4,
'fp_m': m,
'fp_one': one,
'fp_zero': zero,
'omega': omega[:-1],
'omega_inv': omega_inv[:-1],
'inv': inv[:-1],
}
def create_gen():
s = " struct group_generator {\n"
s += " static constexpr storage<fq_config::limbs_count> generator_x = {"+to_hex(gen_x,8*limb_q)[:-2]+ "};\n"
s += " static constexpr storage<fq_config::limbs_count> generator_y = {"+to_hex(gen_y,8*limb_q)[:-2]+ "};\n"
s+=" };\n"
return s
def get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b):
file_content = ""
file_content += "#pragma once\n#include \"../../utils/storage.cuh\"\n"
file_content += "namespace PARAMS_"+curve_name.upper()+"{\n"
file_content += create_field_parameters_struct(modolus_p,bit_count_p,limb_p,True,ntt_size,"fp_config")
file_content += create_field_parameters_struct(modolus_q,bit_count_q,limb_q,False,0,"fq_config")
file_content += " static constexpr unsigned weierstrass_b = " + str(weierstrass_b)+ ";\n"
file_content += create_gen()
file_content+="}\n"
return file_content
def get_weier_params(weierstrass_b, weierstrass_b_g2_re, weierstrass_b_g2_im, size):
return {
'weier_b': to_hex(weierstrass_b, size),
'weier_b_g2_re': to_hex(weierstrass_b_g2_re, size),
'weier_b_g2_im': to_hex(weierstrass_b_g2_im, size),
}
def get_params(config):
global ntt_size
curve_name = config["curve_name"]
modulus_p = config["modulus_p"]
bit_count_p = config["bit_count_p"]
limb_p = config["limb_p"]
ntt_size = config["ntt_size"]
modulus_q = config["modulus_q"]
bit_count_q = config["bit_count_q"]
limb_q = config["limb_q"]
root_of_unity = config["root_of_unity"]
if root_of_unity == modulus_p:
sys.exit("Invalid root_of_unity value; please update in curve parameters")
weierstrass_b = config["weierstrass_b"]
weierstrass_b_g2_re = config["weierstrass_b_g2_re"]
weierstrass_b_g2_im = config["weierstrass_b_g2_im"]
g1_gen_x = config["g1_gen_x"]
g1_gen_y = config["g1_gen_y"]
g2_generator_x_re = config["g2_gen_x_re"]
g2_generator_x_im = config["g2_gen_x_im"]
g2_generator_y_re = config["g2_gen_y_re"]
g2_generator_y_im = config["g2_gen_y_im"]
params = {
'curve_name_U': curve_name.upper(),
'fp_num_limbs': limb_p,
'fq_num_limbs': limb_q,
'fp_modulus_bit_count': bit_count_p,
'fq_modulus_bit_count': bit_count_q,
'num_omegas': ntt_size
}
fp_params = get_fp_params(modulus_p, bit_count_p, limb_p, root_of_unity, ntt_size)
fq_params = get_fq_params(modulus_q, bit_count_q, limb_q, g1_gen_x, g1_gen_y, g2_generator_x_re, g2_generator_x_im, g2_generator_y_re, g2_generator_y_im)
weier_params = get_weier_params(weierstrass_b, weierstrass_b_g2_re, weierstrass_b_g2_im, 8*limb_q)
return {
**params,
**fp_params,
**fq_params,
**weier_params
}
config = None
with open(sys.argv[1]) as json_file:
config = json.load(json_file)
curve_name_lower = config["curve_name"].lower()
curve_name_upper = config["curve_name"].upper()
limb_q = config["limb_q"]
limb_p = config["limb_p"]
# Create Cuda interface
newpath = "./icicle/curves/"+curve_name
newpath = f'./icicle/curves/{curve_name_lower}'
if not os.path.exists(newpath):
os.makedirs(newpath)
fc = get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b)
text_file = open("./icicle/curves/"+curve_name+"/params.cuh", "w")
n = text_file.write(fc)
text_file.close()
with open("./icicle/curves/curve_template/params.cuh", "r") as params_file:
params_file_template = Template(params_file.read())
params = get_params(config)
params_content = params_file_template.safe_substitute(params)
with open(f'./icicle/curves/{curve_name_lower}/params.cuh', 'w') as f:
f.write(params_content)
with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
content = lde_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/lde.cu", "w")
n = text_file.write(content)
text_file.close()
template_content = Template(lde_file.read())
lde_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
CURVE_NAME_L=curve_name_lower
)
with open(f'./icicle/curves/{curve_name_lower}/lde.cu', 'w') as f:
f.write(lde_content)
with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
content = msm_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/msm.cu", "w")
n = text_file.write(content)
text_file.close()
template_content = Template(msm_file.read())
msm_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
CURVE_NAME_L=curve_name_lower
)
with open(f'./icicle/curves/{curve_name_lower}/msm.cu', 'w') as f:
f.write(msm_content)
with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
content = ve_mod_mult_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/ve_mod_mult.cu", "w")
n = text_file.write(content)
text_file.close()
template_content = Template(ve_mod_mult_file.read())
ve_mod_mult_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
CURVE_NAME_L=curve_name_lower
)
with open(f'./icicle/curves/{curve_name_lower}/ve_mod_mult.cu', 'w') as f:
f.write(ve_mod_mult_content)
namespace = '#include "params.cuh"\n'+'''namespace CURVE_NAME_U {
typedef Field<PARAMS_CURVE_NAME_U::fp_config> scalar_field_t;\
typedef scalar_field_t scalar_t;\
typedef Field<PARAMS_CURVE_NAME_U::fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, PARAMS_CURVE_NAME_U::group_generator, PARAMS_CURVE_NAME_U::weierstrass_b> projective_t;
typedef Affine<point_field_t> affine_t;
}'''
with open('./icicle/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
f.write(namespace.replace("CURVE_NAME_U",curve_name.upper()))
with open(f'./icicle/curves/curve_template/curve_config.cuh', 'r') as cc:
template_content = Template(cc.read())
cc_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
)
with open(f'./icicle/curves/{curve_name_lower}/curve_config.cuh', 'w') as f:
f.write(cc_content)
eq = '''
#include <cuda.h>\n
#include "curve_config.cuh"\n
#include "../../primitives/projective.cuh"\n
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2)
{
return (*point1 == *point2);
}'''
with open('./icicle/curves/'+curve_name+'/projective.cu', 'w') as f:
f.write(eq.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
with open(f'./icicle/curves/curve_template/projective.cu', 'r') as proj:
template_content = Template(proj.read())
proj_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
CURVE_NAME_L=curve_name_lower
)
with open(f'./icicle/curves/{curve_name_lower}/projective.cu', 'w') as f:
f.write(proj_content)
supported_operations = '''
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"
'''
with open('./icicle/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
f.write(supported_operations.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
with open('./icicle/curves/index.cu', 'a') as f:
f.write('\n#include "'+curve_name.lower()+'/supported_operations.cu"')
with open(f'./icicle/curves/curve_template/supported_operations.cu', 'r') as supp_ops:
template_content = Template(supp_ops.read())
supp_ops_content = template_content.safe_substitute()
with open(f'./icicle/curves/{curve_name_lower}/supported_operations.cu', 'w') as f:
f.write(supp_ops_content)
with open('./icicle/curves/index.cu', 'r+') as f:
index_text = f.read()
if index_text.find(curve_name_lower) == -1:
f.write(f'\n#include "{curve_name_lower}/supported_operations.cu"')
@@ -168,36 +277,40 @@ with open('./icicle/curves/index.cu', 'a') as f:
if limb_p == limb_q:
with open("./src/curve_templates/curve_same_limbs.rs", "r") as curve_file:
content = curve_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
content = content.replace("CURVE_NAME_U",curve_name_upper)
content = content.replace("CURVE_NAME_L",curve_name_lower)
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
content = content.replace("limbs_p",str(limb_p))
text_file = open("./src/curves/"+curve_name+".rs", "w")
text_file = open("./src/curves/"+curve_name_lower+".rs", "w")
n = text_file.write(content)
text_file.close()
else:
with open("./src/curve_templates/curve_different_limbs.rs", "r") as curve_file:
content = curve_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
content = content.replace("CURVE_NAME_U",curve_name_upper)
content = content.replace("CURVE_NAME_L",curve_name_lower)
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
content = content.replace("limbs_p",str(limb_p))
content = content.replace("_limbs_q",str(limb_q * 8 * 4))
content = content.replace("limbs_q",str(limb_q))
text_file = open("./src/curves/"+curve_name+".rs", "w")
text_file = open("./src/curves/"+curve_name_lower+".rs", "w")
n = text_file.write(content)
text_file.close()
with open("./src/curve_templates/test.rs", "r") as test_file:
content = test_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./src/test_"+curve_name+".rs", "w")
content = content.replace("CURVE_NAME_U",curve_name_upper)
content = content.replace("CURVE_NAME_L",curve_name_lower)
text_file = open("./src/test_"+curve_name_lower+".rs", "w")
n = text_file.write(content)
text_file.close()
with open('./src/curves/mod.rs', 'a') as f:
f.write('\n pub mod ' + curve_name + ';')
with open('./src/curves/mod.rs', 'r+') as f:
mod_text = f.read()
if mod_text.find(curve_name_lower) == -1:
f.write('\npub mod ' + curve_name_lower + ';')
with open('./src/lib.rs', 'a') as f:
f.write('\npub mod ' + curve_name + ';')
with open('./src/lib.rs', 'r+') as f:
lib_text = f.read()
if lib_text.find(curve_name_lower) == -1:
f.write('\npub mod ' + curve_name_lower + ';')

File diff suppressed because it is too large Load Diff

View File

@@ -3,19 +3,19 @@
#pragma once
template <typename S, typename P, typename A>
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device);
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, bool big_triangle, cudaStream_t stream);
template <typename S, typename P, typename A>
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device);
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
template <typename S, typename P, typename A>
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device);
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
template <typename S, typename P, typename A>
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device);
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, bool big_triangle, cudaStream_t stream);
template <typename S, typename P, typename A>
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device);
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);
template <typename A, typename S, typename P>
void reference_msm(S* scalars, A* a_points, unsigned size);

View File

@@ -5,83 +5,19 @@
#include "../../utils/cuda_utils.cuh"
#include "../../primitives/projective.cuh"
#include "../../primitives/field.cuh"
#include "../../curves/bls12_381/curve_config.cuh"
// #include "../../curves/bls12_377/curve_config.cuh"
#include "../../curves/bn254/curve_config.cuh"
using namespace BLS12_381;
struct fake_point
{
unsigned val = 0;
__host__ __device__ inline fake_point operator+(fake_point fp) {
return {val+fp.val};
}
__host__ __device__ fake_point zero() {
fake_point p;
return p;
}
};
std::ostream& operator<<(std::ostream &strm, const fake_point &a) {
return strm <<a.val;
}
struct fake_scalar
{
unsigned val = 0;
unsigned bitsize = 32;
// __host__ __device__ unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width){
// return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
// }
__host__ __device__ int get_scalar_digit(int digit_num, int digit_width){
return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
}
__host__ __device__ inline fake_point operator*(fake_point fp) {
fake_point p1;
fake_point p2;
unsigned x = val;
if (x == 0) return fake_point().zero();
unsigned i = 1;
unsigned c_bit = (x & (1<<(bitsize-1)))>>(bitsize-1);
while (c_bit==0 && i<bitsize){
i++;
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
}
p1 = fp;
p2 = p1+p1;
while (i<bitsize){
i++;
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
if (c_bit){
p1 = p1 + p2;
p2 = p2 + p2;
}
else {
p2 = p1 + p2;
p1 = p1 + p1;
}
}
return p1;
}
};
// using namespace BLS12_377;
using namespace BN254;
class Dummy_Scalar {
public:
static constexpr unsigned NBITS = 32;
unsigned x;
// unsigned p = 10;
unsigned p = 1<<30;
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
os << scalar.x;
@@ -93,7 +29,7 @@ class Dummy_Scalar {
}
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
return {p1.x+p2.x};
return {(p1.x+p2.x)%p1.p};
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
@@ -104,10 +40,11 @@ class Dummy_Scalar {
return (p1.x == p2);
}
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
// return {Dummy_Scalar::neg(point.x)};
// }
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
return {scalar.p-scalar.x};
}
static HOST_INLINE Dummy_Scalar rand_host() {
// return {(unsigned)rand()%10};
return {(unsigned)rand()};
}
};
@@ -121,6 +58,10 @@ class Dummy_Projective {
return {0};
}
static HOST_DEVICE_INLINE Dummy_Projective one() {
return {1};
}
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
return {point.x};
}
@@ -129,9 +70,9 @@ class Dummy_Projective {
return {point.x};
}
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
// return {Dummy_Scalar::neg(point.x)};
// }
static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
return {Dummy_Scalar::neg(point.x)};
}
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
return {p1.x+p2.x};
@@ -187,62 +128,68 @@ typedef affine_t test_affine;
int main()
{
unsigned batch_size = 4;
unsigned msm_size = 1<<15;
unsigned batch_size = 1;
unsigned msm_size = 1<<24;
unsigned N = batch_size*msm_size;
test_scalar *scalars = new test_scalar[N];
test_affine *points = new test_affine[N];
for (unsigned i=0;i<N;i++){
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
// scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
// scalars[i] = test_scalar::rand_host();
scalars[i] = test_scalar::rand_host();
// points[i] = test_projective::to_affine(test_projective::rand_host());
}
std::cout<<"finished generating"<<std::endl;
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
test_projective large_res[batch_size];
test_projective batched_large_res[batch_size];
test_projective large_res[batch_size*2];
// test_projective batched_large_res[batch_size];
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
// fake_point batched_large_res[256];
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
for (unsigned i=0;i<batch_size;i++){
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
// for (unsigned i=0;i<batch_size;i++){
// large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
// std::cout<<"final result large"<<std::endl;
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
}
// }
auto begin = std::chrono::high_resolution_clock::now();
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
// batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false, true,0);
// std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res+1, false, false,0);
// test_reduce_triangle(scalars);
// test_reduce_rectangle(scalars);
// test_reduce_single(scalars);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
std::cout<<test_projective::to_affine(large_res[1])<<std::endl;
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
std::cout<<"final results batched large"<<std::endl;
bool success = true;
for (unsigned i = 0; i < batch_size; i++)
{
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
std::cout<<"good"<<std::endl;
}
else{
std::cout<<"miss"<<std::endl;
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
success = false;
}
}
if (success){
std::cout<<"success!"<<std::endl;
}
// std::cout<<"final results batched large"<<std::endl;
// bool success = true;
// for (unsigned i = 0; i < batch_size; i++)
// {
// std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
// if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
// std::cout<<"good"<<std::endl;
// }
// else{
// std::cout<<"miss"<<std::endl;
// std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
// success = false;
// }
// }
// if (success){
// std::cout<<"success!"<<std::endl;
// }
// std::cout<<batched_large_res[0]<<std::endl;
// std::cout<<batched_large_res[1]<<std::endl;

View File

@@ -15,19 +15,20 @@
* @param n Length of `d_domain` array, also equal to the number of evaluations of each polynomial.
* @param batch_size The size of the batch; the length of `d_evaluations` is `n` * `batch_size`.
*/
template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size) {
template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
uint32_t logn = uint32_t(log(n) / log(2));
cudaMemcpy(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice);
cudaMemcpyAsync(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice, stream);
int NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int NUM_BLOCKS = batch_size * max(int((n / 2) / NUM_THREADS), 1);
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
}
NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, S::inv_log_size(logn));
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>> (d_out, n * batch_size, S::inv_log_size(logn));
cudaStreamSynchronize(stream);
return 0;
}
@@ -39,8 +40,8 @@ template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluat
* @param d_domain Domain on which the polynomial is evaluated. Must be a subgroup.
* @param n Length of `d_evaluations` and the size `d_domain` arrays (they should have equal length).
*/
template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n) {
return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1);
template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n, cudaStream_t stream) {
return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1, stream);
}
template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
@@ -62,7 +63,7 @@ template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
*/
template <typename E, typename S>
int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers) {
int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers, cudaStream_t stream) {
uint32_t logn = uint32_t(log(domain_size) / log(2));
if (domain_size > n) {
// allocate and initialize an array of stream handles to parallelize data copying across batches
@@ -80,18 +81,19 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
cudaStreamDestroy(memcpy_streams[i]);
}
} else
cudaMemcpy(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice);
cudaMemcpyAsync(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice, stream);
if (coset)
batch_vector_mult(coset_powers, d_out, domain_size, batch_size);
batch_vector_mult(coset_powers, d_out, domain_size, batch_size, stream);
int NUM_THREADS = min(domain_size / 2, MAX_THREADS_BATCH);
int chunks = max(int((domain_size / 2) / NUM_THREADS), 1);
int NUM_BLOCKS = batch_size * chunks;
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
}
cudaStreamSynchronize(stream);
return 0;
}
@@ -107,76 +109,76 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
*/
template <typename E, typename S>
int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers) {
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers);
int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers, cudaStream_t stream) {
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers, stream);
}
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n) {
return interpolate(d_out, d_evaluations, d_domain, n);
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n) {
return interpolate(d_out, d_evaluations, d_domain, n);
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
S* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
S* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
S* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size) {
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
S* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
template <typename E, typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
#endif

View File

@@ -3,44 +3,44 @@
#pragma once
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n);
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n);
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size);
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers);
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
template <typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers);
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers);
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers);
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
#endif

View File

@@ -28,11 +28,12 @@ const uint32_t MAX_THREADS_BATCH = 256;
* @param n_twiddles number of twiddle factors.
* @param omega multiplying factor.
*/
template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega) {
template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega, cudaStream_t stream) {
size_t size_twiddles = n_twiddles * sizeof(S);
S * d_twiddles;
cudaMalloc( & d_twiddles, size_twiddles);
twiddle_factors_kernel<S> <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
cudaMallocAsync(& d_twiddles, size_twiddles, stream);
twiddle_factors_kernel<S> <<< 1, 1, 0, stream>>> (d_twiddles, n_twiddles, omega);
cudaStreamSynchronize(stream);
return d_twiddles;
}
@@ -89,14 +90,14 @@ template < typename T > __global__ void reverse_order_kernel(T* arr, T* arr_reve
* @param logn log(n).
* @param batch_size the size of the batch.
*/
template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size) {
template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size, cudaStream_t stream) {
T* arr_reversed;
cudaMalloc(&arr_reversed, n * batch_size * sizeof(T));
cudaMallocAsync(&arr_reversed, n * batch_size * sizeof(T), stream);
int number_of_threads = MAX_THREADS_BATCH;
int number_of_blocks = (n * batch_size + number_of_threads - 1) / number_of_threads;
reverse_order_kernel <<<number_of_blocks, number_of_threads>>> (arr, arr_reversed, n, logn, batch_size);
cudaMemcpy(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice);
cudaFree(arr_reversed);
reverse_order_kernel <<<number_of_blocks, number_of_threads, 0, stream>>> (arr, arr_reversed, n, logn, batch_size);
cudaMemcpyAsync(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice, stream);
cudaFreeAsync(arr_reversed, stream);
}
/**
@@ -107,8 +108,8 @@ template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t lo
* @param n length of `arr`.
* @param logn log(n).
*/
template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn) {
reverse_order_batch(arr, n, logn, 1);
template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn, cudaStream_t stream) {
reverse_order_batch(arr, n, logn, 1, stream);
}
/**
@@ -155,14 +156,15 @@ template < typename E, typename S > __global__ void template_normalize_kernel(E
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
*/
template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles) {
template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles, cudaStream_t stream) {
uint32_t m = 2;
// TODO: optimize with separate streams for each iteration
for (uint32_t s = 0; s < logn; s++) {
for (uint32_t i = 0; i < n; i += m) {
uint32_t shifted_m = m >> 1;
uint32_t number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
uint32_t number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks, 0, stream >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
}
m <<= 1;
}
@@ -177,21 +179,22 @@ template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr
* @param n_twiddles length of d_twiddles.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_twiddles, uint32_t n_twiddles, bool inverse) {
template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_twiddles, uint32_t n_twiddles, bool inverse, cudaStream_t stream) {
uint32_t logn = uint32_t(log(n) / log(2));
size_t size_E = n * sizeof(E);
E * arrReversed = template_reverse_order < E > (arr, n, logn);
E * d_arrReversed;
cudaMalloc( & d_arrReversed, size_E);
cudaMemcpy(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice);
template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles);
cudaMallocAsync( & d_arrReversed, size_E, stream);
cudaMemcpyAsync(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice, stream);
template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles, stream);
if (inverse) {
int NUM_THREADS = MAX_NUM_THREADS;
int NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arrReversed, n, S::inv_log_size(logn));
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream >>> (d_arrReversed, n, S::inv_log_size(logn));
}
cudaMemcpy(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost);
cudaFree(d_arrReversed);
cudaMemcpyAsync(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(d_arrReversed, stream);
cudaStreamSynchronize(stream);
return arrReversed;
}
@@ -201,21 +204,22 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
* @param n length of d_arr.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse) {
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse, cudaStream_t stream) {
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n;
S * twiddles = new S[n_twiddles];
S * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
}
E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse);
E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse, stream);
for(int i = 0; i < n; i++){
arr[i] = result[i];
}
cudaFree(d_twiddles);
cudaFreeAsync(d_twiddles, stream);
cudaStreamSynchronize(stream);
return 0; // TODO add
}
@@ -336,42 +340,45 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
* @param n size of batch.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse) {
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse, cudaStream_t stream) {
int batches = int(arr_size / n);
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(E);
S * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
}
E * d_arr;
cudaMalloc( & d_arr, size_E);
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
cudaMallocAsync( & d_arr, size_E, stream);
cudaMemcpyAsync(d_arr, arr, size_E, cudaMemcpyHostToDevice, stream);
int NUM_THREADS = MAX_THREADS_BATCH;
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, logn, batches);
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int chunks = max(int((n / 2) / NUM_THREADS), 1);
int total_tasks = batches * chunks;
NUM_BLOCKS = total_tasks;
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
//TODO: this loop also can be unrolled
for (uint32_t s = 0; s < logn; s++)
{
ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
cudaStreamSynchronize(stream);
}
if (inverse == true)
{
NUM_THREADS = MAX_NUM_THREADS;
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, S::inv_log_size(logn));
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream>>> (d_arr, arr_size, S::inv_log_size(logn));
}
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
cudaFree(d_arr);
cudaFree(d_twiddles);
cudaMemcpyAsync(arr, d_arr, size_E, cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(d_arr, stream);
cudaFreeAsync(d_twiddles, stream);
cudaStreamSynchronize(stream);
return 0;
}

View File

@@ -0,0 +1,51 @@
#pragma once
#include <map>
#include <stdexcept>
#include <cassert>
#include "constants/constants_2.h"
#include "constants/constants_4.h"
#include "constants/constants_8.h"
#include "constants/constants_11.h"
uint32_t partial_rounds_number_from_arity(const uint32_t arity) {
switch (arity) {
case 2:
return 55;
case 4:
return 56;
case 8:
return 57;
case 11:
return 57;
default:
throw std::invalid_argument( "unsupported arity" );
}
};
// TO-DO: change to mapping
const uint32_t FULL_ROUNDS_DEFAULT = 4;
// TO-DO: for now, the constants are only generated in bls12_381
template <typename S>
S * load_constants(const uint32_t arity) {
unsigned char * constants;
switch (arity) {
case 2:
constants = constants_2;
break;
case 4:
constants = constants_4;
break;
case 8:
constants = constants_8;
break;
case 11:
constants = constants_11;
break;
default:
throw std::invalid_argument( "unsupported arity" );
}
return reinterpret_cast< S * >(constants);
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,995 @@
unsigned char constants_2[] = {
0xd8, 0xd3, 0x6e, 0x9d, 0x00, 0x0a, 0x32, 0xa7, 0x36, 0x8b, 0x75, 0xa2,
0x92, 0xac, 0x1e, 0x50, 0x24, 0x4a, 0xbb, 0x1d, 0x86, 0x51, 0xbd, 0x23,
0x7a, 0xe1, 0x3a, 0xfa, 0x4b, 0x06, 0x9f, 0x66, 0x15, 0x3f, 0x9d, 0x2b,
0x84, 0xab, 0x72, 0x6e, 0x34, 0x27, 0xac, 0x45, 0x96, 0x7c, 0xe6, 0xee,
0x6c, 0xa6, 0x4f, 0xc8, 0xf2, 0x7f, 0x53, 0xe4, 0x36, 0xca, 0xac, 0xfb,
0xde, 0xa8, 0x61, 0x0a, 0xd5, 0x65, 0x81, 0x12, 0x71, 0x47, 0x23, 0x1e,
0x30, 0x49, 0xaa, 0x1a, 0x4e, 0x2b, 0x29, 0x17, 0x5b, 0x27, 0xdf, 0x45,
0x8a, 0x1e, 0x1b, 0xf9, 0x09, 0x9d, 0xb8, 0x24, 0xfa, 0xce, 0xe9, 0x21,
0xd1, 0xa0, 0x12, 0x2f, 0xca, 0x56, 0x5f, 0x2f, 0x1b, 0x40, 0x6c, 0x31,
0x90, 0x55, 0x2f, 0x1f, 0x2b, 0xd0, 0xd2, 0xd2, 0xc9, 0x24, 0x26, 0x38,
0x05, 0x18, 0x53, 0x38, 0x1d, 0x42, 0xfc, 0x0b, 0xc8, 0xc5, 0x8b, 0x5a,
0xf3, 0x19, 0xca, 0xff, 0xf5, 0x3b, 0xef, 0x15, 0x4e, 0xf4, 0xcc, 0xbe,
0xe8, 0x42, 0x69, 0x68, 0xf4, 0xfc, 0xd3, 0xc3, 0xf0, 0x5d, 0x03, 0x89,
0x4a, 0xae, 0xb0, 0x13, 0x43, 0x39, 0xaa, 0x45, 0xb2, 0x41, 0x38, 0xf8,
0x20, 0x2d, 0xd1, 0x1f, 0x3c, 0xc4, 0xaa, 0xf1, 0x40, 0xd0, 0x26, 0xe4,
0x81, 0x74, 0x41, 0xc1, 0xb4, 0xd0, 0x64, 0x8d, 0xf9, 0xdd, 0x9a, 0x4b,
0x38, 0x45, 0x02, 0xcc, 0x01, 0x65, 0x25, 0x72, 0x24, 0x2b, 0xba, 0x3f,
0x2c, 0x1a, 0xbf, 0xc6, 0x3c, 0xcf, 0xa1, 0xef, 0x9c, 0xda, 0x2e, 0x9b,
0x14, 0xc7, 0x81, 0x65, 0x85, 0xc3, 0x24, 0x2c, 0x65, 0xc6, 0x51, 0x3a,
0xd3, 0xc1, 0xd1, 0xd5, 0x42, 0x8f, 0x2f, 0x3c, 0x0e, 0x61, 0xaf, 0xb8,
0xf6, 0x3c, 0x32, 0x1a, 0x9f, 0x28, 0x91, 0x9d, 0x02, 0x18, 0x97, 0x47,
0x79, 0x21, 0xf9, 0x61, 0x40, 0x5c, 0x16, 0xa9, 0xc5, 0x6e, 0xca, 0x9f,
0x37, 0xf1, 0x2a, 0x13, 0xf1, 0xf0, 0xf0, 0xef, 0xb4, 0x56, 0xf4, 0x08,
0x6a, 0x47, 0x52, 0x4a, 0x32, 0xbf, 0xb3, 0xab, 0xa5, 0xdf, 0x36, 0x12,
0x63, 0x5f, 0x2e, 0xc2, 0xf4, 0x17, 0xa4, 0x0c, 0xfd, 0xeb, 0x3d, 0xe9,
0xc7, 0x1d, 0x97, 0x5e, 0x52, 0x61, 0x75, 0x96, 0xfb, 0x11, 0x60, 0xcd,
0xf8, 0xca, 0xa8, 0x11, 0xdc, 0x6e, 0xcd, 0x59, 0xf3, 0x37, 0x41, 0xd6,
0x61, 0xb3, 0x74, 0xe5, 0xa8, 0xc1, 0x51, 0xf5, 0xa2, 0x57, 0x2e, 0x32,
0xe4, 0x0e, 0xd2, 0xed, 0x73, 0xca, 0x58, 0x7a, 0x81, 0x16, 0x9c, 0xa0,
0xa0, 0xc0, 0xaa, 0x65, 0xe0, 0x3f, 0x43, 0xb7, 0x03, 0xb0, 0x35, 0x84,
0x61, 0xf6, 0x60, 0x0e, 0x18, 0xb3, 0x0a, 0xc0, 0x59, 0x98, 0x57, 0x80,
0x7e, 0x26, 0x8b, 0x26, 0x0f, 0x94, 0x44, 0xbc, 0xc9, 0x71, 0xf8, 0x19,
0x9a, 0x3b, 0x0a, 0xea, 0x9a, 0xc0, 0x41, 0x26, 0x9b, 0x50, 0xe7, 0x5d,
0x1b, 0x59, 0x22, 0x26, 0x79, 0x3a, 0xae, 0x39, 0x61, 0x13, 0x9c, 0x8f,
0x8e, 0xd0, 0xbf, 0x84, 0xb8, 0xca, 0x3f, 0x71, 0x41, 0x70, 0x35, 0x88,
0x03, 0x63, 0x0d, 0xc5, 0x1a, 0xcb, 0x63, 0x11, 0x32, 0x90, 0xb6, 0xaa,
0xfb, 0xdc, 0xd9, 0xc3, 0xa1, 0x93, 0x41, 0xe8, 0xa1, 0xfb, 0x2d, 0x88,
0x9e, 0xe6, 0x37, 0x21, 0xb2, 0xbe, 0xfc, 0x64, 0x18, 0x37, 0x87, 0xbc,
0x36, 0xf2, 0xe4, 0x08, 0x5e, 0x87, 0x5f, 0x78, 0xbc, 0xbd, 0x4c, 0x91,
0x53, 0xb5, 0xf3, 0x3c, 0xe9, 0x8c, 0x1d, 0xa7, 0x0a, 0x95, 0x90, 0x55,
0xfd, 0xfd, 0x61, 0xd1, 0x38, 0x21, 0xca, 0x5c, 0x8f, 0xc0, 0xc9, 0x39,
0x81, 0x8e, 0x2d, 0x7c, 0xa7, 0xab, 0x84, 0xef, 0x09, 0xd3, 0x1f, 0xb2,
0xc8, 0xe7, 0x6a, 0x9c, 0xe5, 0x0d, 0xea, 0x15, 0x0f, 0xdf, 0x55, 0x7e,
0x25, 0x01, 0xad, 0x36, 0xdc, 0xfe, 0x2c, 0xc2, 0xf5, 0xd1, 0x57, 0xef,
0xf2, 0x1d, 0xdd, 0x82, 0xb0, 0x20, 0xbf, 0xfe, 0x8a, 0xa8, 0x4d, 0xb5,
0xd2, 0x03, 0x0d, 0x49, 0x43, 0xaf, 0x4a, 0xac, 0x95, 0x64, 0x6b, 0x62,
0x6e, 0x75, 0x84, 0x85, 0x56, 0x8f, 0x99, 0x6d, 0xfa, 0xb4, 0x37, 0x30,
0xc4, 0x06, 0x82, 0x32, 0xf0, 0x86, 0x6e, 0x5f, 0xde, 0x62, 0xa3, 0x61,
0xdc, 0x17, 0x37, 0x5c, 0xc8, 0x9b, 0x78, 0x6a, 0xf1, 0xa2, 0x77, 0x76,
0x44, 0x93, 0xbe, 0x6b, 0x71, 0x39, 0x0a, 0x35, 0x86, 0xa3, 0x4c, 0x84,
0x0f, 0xb1, 0xbf, 0x51, 0x88, 0x18, 0x88, 0x57, 0x09, 0x97, 0x55, 0xdf,
0x29, 0xe6, 0xff, 0xaa, 0xaf, 0x7b, 0x27, 0x29, 0xca, 0xf5, 0x11, 0x64,
0xa2, 0x2e, 0xb9, 0x99, 0xc5, 0xc4, 0x56, 0x1b, 0x03, 0x0c, 0xf1, 0x7e,
0x9b, 0xf1, 0x8b, 0x57, 0xc7, 0x4c, 0x4a, 0x05, 0x84, 0x78, 0x67, 0x3c,
0x82, 0xee, 0xe4, 0x55, 0xfb, 0xf2, 0x2e, 0xcb, 0x3a, 0x64, 0x88, 0x44,
0x15, 0x1b, 0x23, 0xaa, 0xe9, 0x9a, 0x04, 0xbd, 0xb4, 0xbd, 0x34, 0x28,
0x84, 0x34, 0x55, 0x13, 0x2c, 0xbd, 0x43, 0x1c, 0x3b, 0xaf, 0x1d, 0x95,
0x28, 0x18, 0x5f, 0xe7, 0x33, 0xa2, 0x4c, 0x58, 0xca, 0x42, 0xbe, 0x9e,
0x8e, 0x72, 0xae, 0xf1, 0x08, 0x40, 0x8f, 0x55, 0x61, 0x68, 0xa3, 0x2e,
0xff, 0x75, 0xe7, 0x38, 0x44, 0x68, 0x4a, 0x40, 0x05, 0x3f, 0x64, 0xf2,
0xf3, 0xd7, 0x8d, 0xd4, 0x3d, 0x69, 0x2e, 0xc9, 0x94, 0x3f, 0xc8, 0x75,
0xa1, 0xa1, 0xe5, 0x0b, 0x26, 0xec, 0x36, 0xe9, 0x29, 0x67, 0x4b, 0xc9,
0x2b, 0x0f, 0x4b, 0xa0, 0x56, 0xaf, 0x8b, 0x81, 0xea, 0x11, 0xf5, 0x42,
0xd3, 0xf2, 0x6e, 0x91, 0xf3, 0x35, 0x60, 0xe6, 0xa0, 0x80, 0x09, 0x45,
0xfe, 0x29, 0x4c, 0xde, 0x96, 0x76, 0x6e, 0x27, 0xfc, 0x64, 0xd3, 0xf7,
0xb4, 0xbf, 0xfa, 0x8c, 0x13, 0x68, 0x52, 0xf7, 0x9c, 0x86, 0x74, 0xe1,
0x8a, 0x01, 0x97, 0x73, 0x69, 0x29, 0x21, 0x0d, 0xae, 0xcf, 0xa7, 0x83,
0xf2, 0x8b, 0x93, 0x8d, 0xef, 0xf2, 0x7c, 0xc1, 0xfd, 0x50, 0xca, 0x95,
0x53, 0x77, 0x46, 0xa8, 0xe0, 0xb9, 0x6f, 0x4e, 0xe5, 0x55, 0x35, 0x2b,
0x6b, 0x67, 0x5e, 0x4e, 0x24, 0x51, 0x85, 0xf3, 0x19, 0x3d, 0x4c, 0x5c,
0x94, 0x7c, 0xb4, 0xe5, 0x49, 0xe8, 0xdf, 0xdd, 0x34, 0x4c, 0x64, 0x13,
0x6e, 0x67, 0x6a, 0xc6, 0x5e, 0x82, 0x1f, 0xdc, 0x0e, 0xf6, 0x15, 0x2a,
0x6f, 0xdd, 0x3a, 0x5c, 0x7d, 0x20, 0xbf, 0xd5, 0x89, 0xa1, 0x25, 0x2f,
0x59, 0xe7, 0xca, 0xa2, 0xb4, 0xde, 0x72, 0x2c, 0xe8, 0xe6, 0xc5, 0x3d,
0x93, 0xa5, 0xe0, 0x47, 0x7d, 0xe5, 0x65, 0x58, 0x59, 0xec, 0x62, 0x79,
0xc5, 0x69, 0x21, 0xfb, 0x12, 0x45, 0xe7, 0xb3, 0xa0, 0x5c, 0xba, 0xfb,
0x70, 0x38, 0x8b, 0x80, 0x95, 0x90, 0x72, 0x85, 0xf8, 0x61, 0xb3, 0x6f,
0x5f, 0x9d, 0x2d, 0x36, 0x9f, 0xe0, 0xeb, 0xc2, 0xd2, 0xcd, 0x33, 0x5a,
0x26, 0x78, 0xa7, 0x7f, 0x24, 0x52, 0x52, 0x3a, 0xe6, 0xf6, 0xf4, 0xa0,
0x9c, 0x52, 0x1d, 0xd5, 0x26, 0x5d, 0x9a, 0x7b, 0x9f, 0xba, 0x63, 0x6a,
0xda, 0xb9, 0xed, 0xec, 0x37, 0x8b, 0x24, 0x76, 0xcf, 0x1d, 0xa0, 0x3e,
0x1e, 0xc7, 0x60, 0x73, 0xc5, 0x5b, 0x7f, 0x93, 0x84, 0x62, 0x9b, 0xe8,
0x28, 0x07, 0xac, 0x77, 0xe7, 0xb3, 0x7d, 0x6f, 0x51, 0x91, 0xc7, 0xf3,
0x4d, 0x17, 0xeb, 0xe7, 0xc5, 0x31, 0x1e, 0x2d, 0x75, 0x2e, 0x30, 0xd8,
0xe8, 0x75, 0x4c, 0x37, 0x7a, 0xd6, 0x5c, 0x75, 0x1d, 0xc0, 0xb4, 0x99,
0xa2, 0x49, 0xe0, 0x72, 0xe2, 0xb3, 0x30, 0xed, 0x8b, 0xa7, 0x7e, 0x07,
0x79, 0x36, 0x77, 0xee, 0x15, 0x71, 0x1f, 0xe0, 0x0a, 0x98, 0x0a, 0xee,
0xcf, 0x0c, 0x59, 0xcc, 0xc7, 0x48, 0x50, 0xd3, 0xea, 0x41, 0xe1, 0x66,
0xd4, 0x3b, 0x24, 0xe9, 0x63, 0x4c, 0x16, 0xec, 0x51, 0x8e, 0x06, 0xc2,
0x11, 0x53, 0x58, 0x35, 0xd0, 0xd1, 0x77, 0x43, 0x59, 0x7f, 0xdb, 0x35,
0xe6, 0xea, 0x04, 0x1b, 0x69, 0x2e, 0x03, 0x2e, 0x5e, 0xa9, 0x67, 0xc7,
0x24, 0x52, 0xef, 0x5e, 0x1d, 0x8c, 0xe8, 0xa3, 0xa4, 0x8e, 0xc4, 0xcb,
0x5d, 0x8a, 0x57, 0x31, 0xdf, 0x3c, 0x38, 0xdf, 0xe6, 0xaf, 0x21, 0x77,
0x49, 0x02, 0xbc, 0x32, 0xde, 0x1e, 0x9f, 0x6a, 0x95, 0x9f, 0x94, 0x3b,
0x84, 0xdc, 0xea, 0x0b, 0x09, 0x76, 0x2f, 0x93, 0x70, 0x12, 0x8c, 0xb6,
0xd0, 0x20, 0xc3, 0xe2, 0x94, 0x8a, 0xb6, 0x2f, 0x9a, 0x03, 0xef, 0x5b,
0xc0, 0x47, 0xbf, 0xd0, 0xa7, 0x90, 0xe6, 0x13, 0xac, 0xc9, 0x2e, 0x10,
0xef, 0x10, 0xd1, 0x81, 0x65, 0x5d, 0xfa, 0x50, 0x65, 0xc0, 0xd6, 0x59,
0x3a, 0xe0, 0x5c, 0x94, 0xbd, 0xf8, 0xc6, 0x25, 0x85, 0x61, 0x2f, 0xa5,
0x5c, 0x0d, 0x7e, 0xe1, 0xa8, 0x04, 0x3b, 0x1f, 0x61, 0x34, 0x4b, 0x30,
0xf3, 0x84, 0x8e, 0x89, 0xb1, 0x58, 0xe2, 0x48, 0xf4, 0x79, 0x7f, 0x5f,
0x95, 0x1d, 0xe7, 0x71, 0x47, 0x5d, 0x43, 0x69, 0xd4, 0x7b, 0xe6, 0x87,
0x9e, 0x11, 0x12, 0x2a, 0x4f, 0xf7, 0x0c, 0xfb, 0x3c, 0x0b, 0x1d, 0xe7,
0xa3, 0x0b, 0xdf, 0xc7, 0xd1, 0x35, 0xdb, 0x7d, 0x58, 0x7b, 0x46, 0x40,
0x3e, 0xf6, 0xc1, 0xb6, 0x22, 0x99, 0x13, 0xd0, 0xd9, 0x3f, 0x28, 0xc5,
0xef, 0xeb, 0x6a, 0xda, 0xf5, 0xfb, 0x2d, 0x9d, 0x3c, 0x23, 0x23, 0x7d,
0x1f, 0x81, 0x55, 0xaf, 0xd4, 0xec, 0x7b, 0x09, 0x79, 0xe1, 0x90, 0xde,
0xe3, 0xff, 0x9a, 0x13, 0x2b, 0x4e, 0x70, 0x5c, 0x63, 0x72, 0x88, 0xfa,
0x74, 0x4f, 0xb7, 0xd1, 0x33, 0x3b, 0x8a, 0xec, 0x2e, 0x9b, 0x77, 0x0b,
0x8c, 0x3a, 0x91, 0x2c, 0x63, 0x3c, 0x03, 0x40, 0x1e, 0x78, 0x83, 0x4c,
0xcc, 0x0a, 0x3b, 0x99, 0x8d, 0x10, 0x54, 0x79, 0x3e, 0x85, 0x9d, 0xab,
0x2f, 0xd6, 0x9b, 0xab, 0x63, 0x85, 0x7a, 0x80, 0xe2, 0x43, 0xc0, 0x31,
0xa9, 0x77, 0x9a, 0x12, 0xf6, 0xcb, 0x8d, 0xfb, 0x65, 0xed, 0xb7, 0x11,
0xff, 0x5c, 0xe0, 0x8f, 0x16, 0xc6, 0x9b, 0x36, 0x56, 0x2b, 0x8a, 0xe1,
0x9b, 0xe1, 0xfc, 0x01, 0x3f, 0xa4, 0x49, 0x5d, 0x59, 0x19, 0xbd, 0xbe,
0x17, 0x49, 0xe5, 0xa1, 0xa7, 0xf7, 0x26, 0x19, 0xa4, 0x0f, 0xd3, 0x5b,
0x74, 0xa9, 0xfe, 0x53, 0x88, 0x51, 0xa8, 0x9c, 0x3f, 0xde, 0xbd, 0x19,
0xa0, 0x40, 0x31, 0x50, 0x1f, 0x8b, 0x92, 0x97, 0xb2, 0x1c, 0xc7, 0xb0,
0xdd, 0xd5, 0xae, 0x88, 0x92, 0x00, 0x4a, 0xd7, 0xb7, 0xf8, 0x02, 0xaa,
0x25, 0xbb, 0x05, 0x89, 0x78, 0xda, 0x9c, 0x00, 0xb5, 0x48, 0x2c, 0x0d,
0xf3, 0xfa, 0xfc, 0x4e, 0x6f, 0x3d, 0x96, 0x74, 0x92, 0xb5, 0x16, 0x01,
0x88, 0xb2, 0x4a, 0x9c, 0x43, 0x35, 0x75, 0xef, 0x3d, 0x6e, 0xd0, 0x92,
0xc0, 0x24, 0xf6, 0xd6, 0xc0, 0x01, 0xef, 0x23, 0xb0, 0x6e, 0x27, 0x21,
0x5e, 0xa1, 0x8c, 0x0f, 0x69, 0xbc, 0x09, 0x47, 0x2c, 0x13, 0x5d, 0xba,
0x32, 0x3c, 0x37, 0x62, 0x3a, 0xdf, 0x38, 0x5a, 0x17, 0xe2, 0xfc, 0xe3,
0x8e, 0xe2, 0xd6, 0x6d, 0x50, 0x1b, 0xd1, 0xcc, 0x4b, 0x9d, 0x66, 0x0a,
0x90, 0x85, 0x01, 0x3b, 0xa2, 0x77, 0xd4, 0x95, 0x90, 0x63, 0x49, 0x5e,
0x27, 0xe7, 0xab, 0xc5, 0xf1, 0xf9, 0xa8, 0xf2, 0x40, 0xb1, 0x14, 0x35,
0x4d, 0x69, 0x4c, 0x51, 0x3b, 0x9b, 0x10, 0x50, 0x70, 0x34, 0xf4, 0xbe,
0x14, 0x88, 0xb5, 0x40, 0x1a, 0x68, 0x74, 0x40, 0x4c, 0xa3, 0xa7, 0x0d,
0x32, 0x64, 0xaa, 0xef, 0xf5, 0x7b, 0x1a, 0x60, 0x1d, 0xfc, 0x33, 0xf2,
0x50, 0xc6, 0x39, 0x28, 0x53, 0xe7, 0x98, 0xbf, 0xbd, 0x1e, 0xac, 0x80,
0x35, 0x5d, 0x7a, 0x18, 0x96, 0x8f, 0xb1, 0x41, 0xc2, 0xcb, 0x7d, 0xd0,
0x75, 0xd4, 0xc2, 0x11, 0x78, 0xd8, 0xa1, 0x98, 0x53, 0x1c, 0x59, 0x72,
0xac, 0xc1, 0x37, 0x0f, 0x42, 0x13, 0x0b, 0x98, 0xf9, 0x6e, 0x6f, 0x36,
0x53, 0x8d, 0x66, 0x46, 0x65, 0xf0, 0x27, 0xd3, 0xe3, 0xf0, 0x10, 0x5d,
0x1b, 0xae, 0x8d, 0x49, 0xec, 0xe6, 0x40, 0xfc, 0xfa, 0xbe, 0x55, 0x60,
0x4b, 0xfe, 0xd0, 0xca, 0x6a, 0x45, 0xd0, 0xd5, 0xe1, 0x5f, 0x20, 0x67,
0x09, 0x4e, 0x6d, 0x59, 0xef, 0xba, 0xec, 0x57, 0x41, 0xfa, 0x62, 0x1c,
0x54, 0xa4, 0x74, 0x46, 0xd1, 0x91, 0x48, 0xc9, 0xa6, 0x07, 0x01, 0xd1,
0x43, 0xa0, 0xe7, 0x7f, 0x35, 0xa0, 0x6f, 0xe4, 0x57, 0xb0, 0xb8, 0x99,
0x7c, 0x93, 0x4a, 0x0d, 0x4b, 0x0a, 0xd6, 0x24, 0xb2, 0x27, 0xd1, 0xa8,
0x2e, 0x5b, 0x3c, 0xcc, 0x17, 0xb2, 0x8a, 0x70, 0x93, 0x2b, 0x00, 0x96,
0x2d, 0x90, 0x4d, 0x67, 0x62, 0xb8, 0xc6, 0xd1, 0x46, 0xda, 0x3b, 0x6d,
0xdf, 0xd6, 0x03, 0xf2, 0x01, 0xa2, 0x89, 0x6c, 0x50, 0xd5, 0xf0, 0xb1,
0xd2, 0x24, 0xdd, 0x02, 0x42, 0xde, 0x1d, 0x5b, 0x00, 0xe0, 0x5f, 0x5f,
0x31, 0xf8, 0x59, 0x9d, 0xc8, 0xa4, 0x70, 0x4d, 0x49, 0x54, 0xc3, 0x94,
0xbc, 0x58, 0x2e, 0x03, 0x02, 0xba, 0x43, 0x2b, 0xfd, 0x0f, 0x9c, 0x0f,
0x91, 0x28, 0xf4, 0x3b, 0xe7, 0xb1, 0x3b, 0x69, 0xbd, 0x6a, 0x8f, 0x20,
0xab, 0x8f, 0xd2, 0x5a, 0xf4, 0x00, 0x92, 0xcd, 0x45, 0xd5, 0x96, 0x37,
0x31, 0x0e, 0xfd, 0x75, 0xda, 0xa4, 0x0c, 0x57, 0xcf, 0x7b, 0x1b, 0xf5,
0xa9, 0xcd, 0xff, 0xaf, 0xe8, 0x54, 0x52, 0x8a, 0x9e, 0x03, 0x97, 0x5e,
0x62, 0x3f, 0x09, 0x6d, 0x54, 0x61, 0x7d, 0xfc, 0x7a, 0x33, 0x85, 0x38,
0x9a, 0x67, 0x4d, 0xb2, 0x24, 0xa7, 0x7d, 0x33, 0xff, 0x3d, 0xe5, 0x7f,
0x7d, 0x09, 0x60, 0x87, 0xa6, 0xe4, 0x96, 0x2d, 0x3d, 0x1a, 0xa4, 0x3d,
0x2e, 0x49, 0xcd, 0xb3, 0x62, 0x45, 0xa9, 0x84, 0xb3, 0xd8, 0xa5, 0x94,
0x07, 0xf0, 0x67, 0x39, 0xbc, 0x85, 0x9d, 0x3f, 0x14, 0xd2, 0x53, 0x83,
0x2e, 0x85, 0x89, 0x69, 0xc7, 0xe7, 0x88, 0xbf, 0x3e, 0x1d, 0x40, 0x53,
0x95, 0xc8, 0x78, 0x03, 0x87, 0x80, 0x93, 0x9c, 0x88, 0x32, 0x70, 0x2e,
0x91, 0x7b, 0x8f, 0x2b, 0x83, 0xd7, 0x32, 0x88, 0x5c, 0x94, 0x65, 0x4b,
0x1a, 0x31, 0xe1, 0x16, 0x25, 0x03, 0x6e, 0xfd, 0x91, 0x7c, 0x33, 0x81,
0xcd, 0x36, 0xbb, 0xf5, 0xd2, 0x7a, 0x65, 0x29, 0x34, 0xd7, 0x0e, 0x58,
0x75, 0xef, 0xda, 0x5e, 0xc0, 0x38, 0x16, 0x02, 0xff, 0x42, 0x1a, 0xad,
0xc5, 0x17, 0x61, 0x89, 0x83, 0xe1, 0xc0, 0x2c, 0x5f, 0xae, 0xaa, 0x4e,
0x0f, 0x4c, 0xd6, 0xe6, 0x14, 0xf2, 0xe9, 0x06, 0xc0, 0x16, 0x05, 0x9d,
0xd4, 0xa3, 0x32, 0x69, 0xa8, 0x8f, 0x51, 0x8c, 0x23, 0xfe, 0x66, 0x8b,
0x79, 0x79, 0xc2, 0x6c, 0xe8, 0xff, 0x1f, 0x24, 0xf9, 0x7e, 0xe1, 0x17,
0x23, 0x65, 0xf1, 0x53, 0x8e, 0x74, 0xd5, 0xb8, 0xc8, 0x95, 0x65, 0x00,
0xf3, 0x5f, 0x88, 0x99, 0x77, 0x8f, 0x71, 0xe5, 0xac, 0xee, 0x85, 0x4a,
0x22, 0x8b, 0x3b, 0xb9, 0xa6, 0x71, 0x54, 0x0a, 0x03, 0x60, 0x21, 0x82,
0x2f, 0xd6, 0x20, 0x91, 0x5d, 0xd9, 0x33, 0x5e, 0x54, 0x48, 0xf7, 0xfb,
0x6b, 0xd1, 0xef, 0x89, 0x0e, 0xd6, 0x4a, 0x18, 0x7d, 0x89, 0x19, 0x45,
0xae, 0x60, 0x2c, 0x91, 0x0a, 0x2e, 0x9c, 0xae, 0x8b, 0xd4, 0xd8, 0x03,
0x3d, 0x33, 0xc1, 0x31, 0x68, 0x7e, 0xed, 0xa8, 0xe3, 0xa8, 0x13, 0x65,
0x64, 0xc6, 0x5c, 0x5d, 0x60, 0xef, 0xfa, 0xf1, 0x2d, 0x33, 0x32, 0xcb,
0xc5, 0x4d, 0x0b, 0x48, 0xec, 0x84, 0x39, 0x92, 0x9a, 0xdc, 0x60, 0x0b,
0x0a, 0x66, 0xd9, 0xaa, 0x04, 0x53, 0x84, 0xe4, 0xeb, 0x71, 0x40, 0x82,
0xc6, 0x6c, 0xb8, 0xc0, 0xd1, 0x44, 0x9b, 0xb2, 0xb8, 0xa1, 0x5b, 0x14,
0x3d, 0xc7, 0x13, 0x1b, 0xee, 0x19, 0xb0, 0x60, 0xc5, 0x0a, 0xc6, 0x40,
0x7f, 0x0c, 0x2e, 0x75, 0x3c, 0x6f, 0x49, 0xba, 0x38, 0x0d, 0x03, 0x54,
0x04, 0xa3, 0x51, 0x0e, 0xaf, 0x3e, 0x34, 0xcb, 0x13, 0x22, 0x15, 0xfa,
0xc1, 0x16, 0x9e, 0x58, 0x78, 0x47, 0x9c, 0xad, 0x06, 0xcd, 0xf4, 0x10,
0xee, 0x0d, 0x78, 0x64, 0x8d, 0x70, 0x45, 0x55, 0xc2, 0x76, 0x45, 0x40,
0xf0, 0xe2, 0xfc, 0x0e, 0x8a, 0xc4, 0x78, 0x56, 0x7f, 0x8f, 0x05, 0x16,
0x31, 0xd3, 0x42, 0xd7, 0xfe, 0xbb, 0xaa, 0x89, 0x3c, 0x53, 0xd8, 0xa6,
0x95, 0x43, 0x72, 0xa2, 0x00, 0x15, 0xe7, 0x83, 0xea, 0x9e, 0xad, 0x69,
0x93, 0xc5, 0x9d, 0xfc, 0xc2, 0x8d, 0x3c, 0x58, 0xd8, 0x70, 0xd7, 0xcb,
0x65, 0x53, 0xc0, 0x9e, 0x83, 0x7e, 0x02, 0xbf, 0x6a, 0x90, 0x2f, 0xa2,
0xbe, 0x5d, 0x68, 0xeb, 0xbd, 0xad, 0xaf, 0xee, 0x91, 0x8a, 0xeb, 0x4e,
0x98, 0xc9, 0xed, 0x24, 0x0d, 0x0d, 0x8d, 0xdf, 0x16, 0xf6, 0x8e, 0x10,
0x59, 0x38, 0xa4, 0xd1, 0x0c, 0x09, 0x61, 0x06, 0x63, 0xcc, 0x5a, 0x84,
0x63, 0x1d, 0x90, 0xc8, 0xfb, 0x7b, 0x49, 0xcb, 0xcf, 0x5b, 0x34, 0x47,
0x96, 0x4e, 0xb1, 0x13, 0x66, 0xd5, 0x1f, 0x18, 0x30, 0xdd, 0xd7, 0x9d,
0xb8, 0x98, 0x41, 0xf2, 0x13, 0x1d, 0xc0, 0xbc, 0xf9, 0x03, 0x37, 0x48,
0x06, 0x89, 0xcf, 0xa8, 0x2e, 0x68, 0x3f, 0x16, 0xf3, 0x41, 0x22, 0xfc,
0x11, 0x2d, 0xb2, 0x29, 0x5c, 0x56, 0x0f, 0xf9, 0x84, 0x22, 0xee, 0x59,
0xb3, 0x95, 0x0b, 0xc7, 0xf0, 0x3a, 0xe4, 0x08, 0xce, 0x91, 0xdc, 0xa4,
0x1e, 0xad, 0x5e, 0x30, 0x9a, 0x0e, 0xe3, 0xb2, 0x81, 0x13, 0x97, 0x57,
0x31, 0x8f, 0x9c, 0xe1, 0x92, 0xa3, 0xab, 0x69, 0x94, 0x02, 0x5c, 0x68,
0xe9, 0x41, 0x1a, 0xc7, 0x17, 0xde, 0xaf, 0x77, 0xc5, 0x44, 0x2d, 0x6a,
0x2f, 0xd0, 0x16, 0x41, 0x40, 0x74, 0x57, 0x01, 0xd0, 0xaa, 0xe6, 0x70,
0x13, 0xf5, 0x3f, 0x0a, 0x39, 0x4d, 0xb1, 0x82, 0x87, 0x9b, 0xd6, 0x2b,
0xbd, 0xec, 0xa7, 0xcb, 0xdb, 0x57, 0x4e, 0x49, 0xa5, 0x8a, 0xa4, 0xbe,
0x83, 0xe6, 0x7c, 0x8b, 0x79, 0x4a, 0x7f, 0x39, 0x23, 0x07, 0xa8, 0xa9,
0x4e, 0xbd, 0xc3, 0xbb, 0xcf, 0xbc, 0xe2, 0x48, 0xa3, 0x60, 0xc0, 0x2c,
0x8d, 0x60, 0x26, 0x49, 0x07, 0x92, 0x78, 0xdb, 0x99, 0x94, 0x0e, 0x02,
0x27, 0x08, 0x9c, 0xc8, 0x23, 0x03, 0xfb, 0x6d, 0x18, 0x89, 0xe7, 0x3c,
0x08, 0xe5, 0x93, 0x31, 0xaa, 0x06, 0xa9, 0x86, 0x70, 0x40, 0x9e, 0x08,
0x5d, 0x7d, 0x8d, 0xa8, 0xee, 0xa0, 0x31, 0x49, 0x35, 0x99, 0x78, 0xf3,
0x97, 0x77, 0x05, 0x5f, 0xb2, 0xc8, 0xdc, 0xd8, 0xec, 0x9d, 0x48, 0xb5,
0xa4, 0x13, 0x01, 0x45, 0xe9, 0xe3, 0x84, 0x1c, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x01, 0x00, 0x00, 0x40, 0xff, 0xff, 0xff, 0x3f,
0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe, 0x03, 0x62, 0x39, 0x07,
0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f, 0x7e, 0x3d, 0xf2, 0x56,
0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66,
0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0x01, 0x00, 0x00, 0x40,
0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
0x7e, 0x3d, 0xf2, 0x56, 0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc,
0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
0x01, 0x00, 0x00, 0x80, 0x54, 0x55, 0x55, 0xd5, 0xa9, 0x4c, 0xa9, 0x2a,
0xad, 0x08, 0x1e, 0x1b, 0xaf, 0xde, 0x06, 0x08, 0x5c, 0x89, 0x05, 0x80,
0x11, 0x93, 0x58, 0x4d, 0xc5, 0x60, 0x9b, 0x60, 0x34, 0x33, 0x33, 0x33,
0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
0x98, 0x97, 0x8e, 0x45, 0x01, 0x00, 0x00, 0x80, 0x54, 0x55, 0x55, 0xd5,
0xa9, 0x4c, 0xa9, 0x2a, 0xad, 0x08, 0x1e, 0x1b, 0xaf, 0xde, 0x06, 0x08,
0x5c, 0x89, 0x05, 0x80, 0x11, 0x93, 0x58, 0x4d, 0xc5, 0x60, 0x9b, 0x60,
0x25, 0x49, 0x92, 0x24, 0xdb, 0xb6, 0x6d, 0xdb, 0x48, 0x1a, 0x24, 0x49,
0xdc, 0x2e, 0x36, 0xaa, 0x4a, 0x62, 0x77, 0x70, 0x4b, 0x62, 0xc7, 0x57,
0x82, 0x91, 0x51, 0xe7, 0x60, 0x54, 0x1f, 0x21, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x3e, 0x2c, 0xe4, 0xf5, 0x63, 0xa7, 0xf2, 0x0e,
0x51, 0x85, 0x27, 0x20, 0x86, 0xec, 0x5b, 0x36, 0xe4, 0xea, 0xf3, 0x0c,
0x61, 0xaa, 0xc6, 0x06, 0x74, 0xa3, 0xcc, 0xc1, 0x24, 0x1c, 0x32, 0x69,
0x95, 0x82, 0xd1, 0xb0, 0xf9, 0x5d, 0x48, 0x7e, 0x90, 0xf2, 0x18, 0x4e,
0xad, 0x05, 0x11, 0x3b, 0xc9, 0xfa, 0xfe, 0xf4, 0xe4, 0x3a, 0xfc, 0xd1,
0xb8, 0x4b, 0xee, 0xab, 0x1c, 0x80, 0x90, 0x0d, 0x01, 0x00, 0x00, 0x40,
0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
0x7e, 0x3d, 0xf2, 0x56, 0xa5, 0xd2, 0xd6, 0x97, 0x8d, 0x8a, 0xf9, 0x2a,
0x81, 0x39, 0xfd, 0xef, 0xe5, 0xd1, 0x41, 0x7d, 0xb2, 0xa6, 0x20, 0x46,
0x9a, 0xc4, 0x02, 0xcc, 0x8e, 0x38, 0xd8, 0xa2, 0x2d, 0x8f, 0xa7, 0x5e,
0xc2, 0x91, 0x1b, 0xf6, 0x2c, 0xb3, 0x21, 0xed, 0x91, 0x8c, 0x3e, 0xb5,
0x8e, 0x10, 0x26, 0x1f, 0x86, 0x4d, 0xae, 0x08, 0x65, 0x72, 0x2b, 0x5d,
0x7c, 0x64, 0x10, 0xfa, 0xc1, 0xa6, 0x20, 0x55, 0x34, 0x33, 0x33, 0x33,
0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
0x98, 0x97, 0x8e, 0x45, 0xc2, 0x91, 0x1b, 0xf6, 0x2c, 0xb3, 0x21, 0xed,
0x91, 0x8c, 0x3e, 0xb5, 0x8e, 0x10, 0x26, 0x1f, 0x86, 0x4d, 0xae, 0x08,
0x65, 0x72, 0x2b, 0x5d, 0x7c, 0x64, 0x10, 0xfa, 0xc1, 0xa6, 0x20, 0x55,
0x54, 0x24, 0x25, 0x3c, 0xb1, 0x71, 0x3e, 0xfe, 0x4e, 0xcd, 0x04, 0x70,
0xb5, 0xcd, 0x65, 0x35, 0x9b, 0xb9, 0x69, 0x70, 0xfc, 0x9e, 0xc1, 0xa4,
0x2c, 0xe6, 0xb5, 0xd6, 0x38, 0x75, 0x07, 0x52, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xa2, 0x92, 0xa6, 0xd6, 0xd4, 0x73, 0x2f, 0x8d,
0x27, 0x96, 0x8a, 0xc6, 0xc2, 0xbe, 0x74, 0x03, 0x5f, 0x90, 0x8e, 0xac,
0x46, 0x55, 0x68, 0x45, 0x37, 0x1a, 0xb9, 0xf9, 0x97, 0x93, 0xd3, 0x59,
0x64, 0xe7, 0x4f, 0xee, 0x6e, 0x02, 0x20, 0x8d, 0xbb, 0xe5, 0x84, 0x23,
0xf2, 0x41, 0x5f, 0x9f, 0xb2, 0xcf, 0xe4, 0x7d, 0xa9, 0x3a, 0xde, 0xdf,
0xd5, 0xb6, 0x90, 0xd5, 0x24, 0xa0, 0xe0, 0x47, 0xd4, 0xfc, 0x91, 0xd8,
0xff, 0x9b, 0x40, 0xeb, 0x00, 0x2b, 0x35, 0x8c, 0x86, 0x3c, 0x71, 0xa0,
0x84, 0xfc, 0x18, 0xf1, 0x16, 0x08, 0x99, 0xe5, 0x0c, 0x47, 0x83, 0xcb,
0x7e, 0x7f, 0x96, 0x58, 0x72, 0x40, 0x10, 0x59, 0x28, 0x31, 0x2f, 0x0c,
0x1c, 0xa1, 0x19, 0x7f, 0x78, 0x5a, 0x5b, 0x8b, 0x40, 0xe2, 0x52, 0xa3,
0xda, 0xea, 0xec, 0x8f, 0x71, 0xbc, 0x5d, 0x9a, 0xb2, 0xe8, 0x86, 0x6b,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xf6, 0xd4, 0x61, 0x0e,
0xe7, 0x6c, 0x28, 0x1a, 0x40, 0x1b, 0x06, 0x94, 0x15, 0xb8, 0x36, 0x75,
0xd6, 0x24, 0xc8, 0x9b, 0x0d, 0xde, 0x46, 0xc4, 0x4d, 0x88, 0x8c, 0xb5,
0x17, 0xfd, 0x3a, 0x6b, 0x8b, 0xfc, 0x1a, 0xd8, 0x20, 0x32, 0xfa, 0x3d,
0xe4, 0xac, 0xa3, 0xc4, 0x8c, 0xce, 0xac, 0xbf, 0x42, 0x77, 0x63, 0x24,
0x8d, 0x33, 0x7f, 0xc1, 0x04, 0x61, 0x88, 0x6e, 0xae, 0x24, 0x52, 0x2f,
0xc6, 0xe7, 0x50, 0x37, 0x91, 0x5c, 0xb7, 0x6a, 0x69, 0xeb, 0xfc, 0x51,
0x70, 0x80, 0xa5, 0x77, 0x83, 0xe5, 0x3e, 0xba, 0x99, 0xfa, 0x5b, 0xfa,
0x44, 0xc2, 0x17, 0xe5, 0x7b, 0xab, 0xfe, 0x58, 0x07, 0x0a, 0x3e, 0x04,
0x21, 0x6c, 0x74, 0x03, 0x24, 0x15, 0x15, 0x1a, 0x63, 0xa3, 0xf5, 0xaf,
0x38, 0xf9, 0xf0, 0x3f, 0x1c, 0xf2, 0x2f, 0xba, 0x9e, 0x7d, 0xac, 0x24,
0x16, 0xc3, 0x93, 0x1e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x7c, 0x21, 0x29, 0xeb, 0xdd, 0xd9, 0xc2, 0xc2, 0x87, 0x30, 0x82, 0xe0,
0xe9, 0xa7, 0x35, 0x64, 0xc9, 0x67, 0xa9, 0xed, 0xd0, 0xc5, 0x91, 0x3b,
0xc9, 0xf2, 0xd4, 0xd9, 0xc5, 0x89, 0x91, 0x4f, 0xc3, 0xd3, 0xbe, 0x3f,
0xb1, 0x31, 0x98, 0x25, 0x25, 0x83, 0x24, 0xcd, 0x54, 0x99, 0xdb, 0x6f,
0xa7, 0x2d, 0x31, 0xc4, 0x53, 0xe1, 0x69, 0xa6, 0x35, 0xd5, 0x8d, 0x11,
0x70, 0xfa, 0x26, 0x1e, 0x28, 0xbd, 0xfe, 0x69, 0x57, 0x63, 0x6c, 0x33,
0xe6, 0xb6, 0x10, 0x41, 0xb8, 0xbe, 0x1f, 0xf6, 0x3a, 0xbe, 0xb5, 0x6a,
0x57, 0x66, 0xd0, 0xe4, 0x2a, 0x6b, 0xc3, 0xaa, 0x4f, 0xf2, 0xba, 0x5b,
0xd7, 0xbe, 0xb3, 0xcc, 0x01, 0x81, 0x30, 0xdc, 0x9a, 0xd6, 0xab, 0x6a,
0x87, 0x56, 0x69, 0x23, 0xef, 0x37, 0xac, 0xbc, 0xaf, 0xec, 0x35, 0xea,
0x74, 0xc9, 0xbf, 0x9d, 0x06, 0x76, 0xbc, 0x1d, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xcb, 0xf6, 0x96, 0x28, 0xa7, 0x00, 0x6b, 0x60,
0x4b, 0xd1, 0x5a, 0x13, 0x03, 0xa3, 0xda, 0x7d, 0xb6, 0x2d, 0xaa, 0x2b,
0x12, 0xf8, 0x5b, 0x0c, 0x81, 0xb4, 0x61, 0x51, 0x55, 0x40, 0x5c, 0x5e,
0xb1, 0x5a, 0x71, 0xf9, 0x06, 0x13, 0xed, 0x2b, 0x89, 0xc0, 0x01, 0x55,
0xa8, 0xaa, 0x45, 0x36, 0xc8, 0x84, 0xfc, 0x78, 0x69, 0x40, 0x55, 0x89,
0x88, 0xbf, 0x02, 0xd3, 0xa7, 0xd9, 0xc5, 0x48, 0x6a, 0x73, 0xaa, 0x9b,
0x54, 0x06, 0x6c, 0x88, 0x2b, 0xef, 0x5a, 0x3f, 0x70, 0xbd, 0xb8, 0x38,
0x75, 0x9d, 0xe1, 0xf2, 0x8a, 0x8a, 0x69, 0x26, 0x6f, 0x07, 0xf1, 0x5b,
0x00, 0x0a, 0x28, 0x41, 0x3a, 0xb1, 0xf8, 0x8d, 0xaa, 0x7f, 0xd5, 0x90,
0x34, 0xdf, 0xc9, 0xa0, 0x59, 0x13, 0x36, 0x53, 0x28, 0x15, 0xed, 0x06,
0x7c, 0x1c, 0x2d, 0x43, 0xab, 0x50, 0x3c, 0xc9, 0x4b, 0x14, 0xda, 0x36,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x7a, 0xc0, 0x5b, 0x5c,
0x19, 0x50, 0x23, 0xa3, 0x8b, 0x3d, 0x80, 0x09, 0x63, 0x38, 0x5e, 0x65,
0x16, 0xd0, 0xa3, 0xa3, 0xe7, 0x28, 0x5a, 0x5f, 0xa7, 0x0a, 0x4f, 0x0c,
0x34, 0x95, 0x26, 0x70, 0x85, 0x11, 0x97, 0x36, 0xa2, 0x76, 0x64, 0xd9,
0x7a, 0x8e, 0xa6, 0xa1, 0x51, 0x9a, 0x89, 0xda, 0x38, 0x46, 0xd0, 0x68,
0x30, 0xde, 0x70, 0xf8, 0x88, 0x7d, 0xa9, 0x19, 0x45, 0x62, 0xb0, 0x6f,
0xf3, 0xc4, 0xd7, 0xfd, 0x95, 0xb9, 0xd1, 0x1c, 0x7e, 0xb5, 0x58, 0xa6,
0x63, 0xaf, 0xcb, 0x4b, 0x52, 0x83, 0x85, 0x0e, 0xed, 0x33, 0xfa, 0xb3,
0x61, 0x90, 0x61, 0x68, 0xc2, 0xba, 0x54, 0x5d, 0x23, 0xc8, 0xfb, 0x0e,
0x7d, 0x7a, 0x8c, 0xa9, 0x09, 0x2a, 0x21, 0x4e, 0x23, 0x02, 0xf2, 0x0d,
0x84, 0xd1, 0xab, 0x8a, 0x13, 0x8c, 0x9a, 0x6a, 0x20, 0xd2, 0x09, 0x82,
0xfb, 0x1e, 0xe6, 0x17, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0xd1, 0x0f, 0xb4, 0xbd, 0x0e, 0x68, 0x31, 0x1a, 0x30, 0xc8, 0x92, 0xd8,
0xaa, 0xcd, 0x04, 0x05, 0x87, 0x5c, 0x49, 0x68, 0xd2, 0xba, 0x3e, 0xb2,
0x09, 0xa3, 0xb0, 0x8a, 0x59, 0xcf, 0xd2, 0x4b, 0xeb, 0xf7, 0xaa, 0x35,
0xf6, 0x48, 0x57, 0x40, 0x6b, 0x0e, 0x26, 0x37, 0x2a, 0x91, 0x0c, 0xea,
0x3f, 0x26, 0x97, 0xc8, 0xc4, 0x4a, 0x18, 0xd3, 0x06, 0x12, 0x93, 0xa5,
0xad, 0x9a, 0x69, 0x6a, 0xc1, 0x4d, 0x2c, 0x31, 0x45, 0x03, 0x3a, 0x2e,
0x24, 0xd5, 0xd0, 0xdf, 0xeb, 0xdb, 0xdf, 0xd0, 0x6f, 0x3d, 0x14, 0xa8,
0x7a, 0x5f, 0x53, 0xe3, 0x9e, 0xb8, 0x68, 0x4d, 0x91, 0xe2, 0x0a, 0x48,
0x94, 0x73, 0xd9, 0x84, 0x9a, 0x55, 0x3e, 0x42, 0x99, 0xe7, 0x1e, 0x73,
0x64, 0xd4, 0x8d, 0x26, 0xf0, 0x16, 0x36, 0x08, 0x7b, 0xde, 0x8c, 0xe9,
0x0a, 0x27, 0x3a, 0x90, 0x6f, 0x6c, 0x90, 0x66, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xd1, 0x81, 0xc0, 0x8c, 0xc5, 0x6d, 0x18, 0x32,
0xd7, 0x82, 0x81, 0xb4, 0x0f, 0x0b, 0x34, 0x91, 0xe1, 0xec, 0x57, 0xf2,
0x17, 0xca, 0x56, 0x15, 0x1f, 0x7d, 0xa7, 0x27, 0x3a, 0xaf, 0xc3, 0x24,
0x0e, 0x26, 0x24, 0x5e, 0x31, 0x77, 0x45, 0xfb, 0x9c, 0x71, 0xaf, 0x19,
0x73, 0xd0, 0x33, 0x0b, 0x22, 0xd8, 0xde, 0xd0, 0x42, 0x79, 0xc0, 0x40,
0x23, 0x44, 0x1e, 0xa3, 0xdf, 0x65, 0x48, 0x50, 0x96, 0xdc, 0xc5, 0x98,
0x9b, 0x13, 0xa8, 0x29, 0x6e, 0x79, 0x02, 0xef, 0x50, 0xd0, 0xdf, 0x71,
0x29, 0xd3, 0xa4, 0x3a, 0xa4, 0x13, 0xc9, 0x0f, 0xa3, 0xff, 0x73, 0x25,
0xb0, 0xb8, 0xe0, 0x07, 0x02, 0xfd, 0xb5, 0x6f, 0xb4, 0x95, 0xc5, 0x49,
0x8b, 0x13, 0xb4, 0xb9, 0x4f, 0xba, 0xd3, 0x83, 0xc2, 0x8c, 0x7a, 0xbe,
0x9a, 0x0b, 0x7d, 0x03, 0x75, 0xf6, 0xbd, 0x84, 0xbd, 0xc0, 0xd4, 0x33,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x60, 0x31, 0xd7, 0xab,
0x48, 0x21, 0x3b, 0x89, 0xc4, 0x3f, 0xe9, 0x30, 0xf6, 0xbc, 0x55, 0xf0,
0xcf, 0x49, 0x4d, 0xf3, 0x3f, 0x66, 0xbe, 0x39, 0x3c, 0xcd, 0x53, 0x36,
0xc6, 0xd6, 0x04, 0x62, 0x37, 0x64, 0x6e, 0x86, 0x83, 0x2f, 0x1a, 0xe3,
0xd8, 0xcb, 0x6b, 0xcc, 0x18, 0x8c, 0xbc, 0x89, 0x97, 0x69, 0xa7, 0xe9,
0x61, 0x1f, 0xe6, 0x92, 0x3e, 0x34, 0x7b, 0xfa, 0xee, 0x9d, 0xcb, 0x03,
0x26, 0x8e, 0xd5, 0xc7, 0x11, 0xfb, 0x18, 0xc6, 0xe0, 0xd0, 0x7e, 0xb7,
0x77, 0x2c, 0x6e, 0xc0, 0x48, 0x33, 0x34, 0x11, 0x1c, 0x7d, 0x55, 0xa5,
0xca, 0xb3, 0x2d, 0xc6, 0x06, 0x59, 0x9b, 0x27, 0x8a, 0x1a, 0xd6, 0xa4,
0x5c, 0x48, 0x9f, 0x72, 0x20, 0x69, 0xdc, 0xbd, 0xf0, 0xba, 0x39, 0x4c,
0x70, 0xa5, 0x78, 0xb5, 0x87, 0x9c, 0x00, 0xe0, 0xc8, 0xf1, 0x8c, 0x03,
0x3a, 0x2c, 0x1c, 0x2e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x83, 0x09, 0xd7, 0x49, 0xb5, 0x30, 0x90, 0x05, 0x98, 0x2a, 0x2f, 0x01,
0x25, 0x9f, 0x29, 0xf4, 0xa1, 0x30, 0x62, 0x62, 0x05, 0xbb, 0xa6, 0xda,
0x2f, 0x82, 0x41, 0xad, 0x2f, 0x4a, 0x49, 0x2f, 0x06, 0x35, 0xd8, 0x2f,
0x0c, 0xfa, 0xa5, 0x8c, 0x8e, 0xe7, 0x8a, 0x31, 0x83, 0x67, 0xf4, 0x34,
0xa2, 0xa2, 0x88, 0x6c, 0x71, 0xc7, 0xf1, 0x4c, 0xca, 0xba, 0x0d, 0x57,
0xc8, 0xef, 0x8f, 0x42, 0x9b, 0x2d, 0x86, 0x4a, 0x6a, 0x2c, 0xe7, 0x42,
0x56, 0xe5, 0x36, 0x46, 0xf6, 0xa6, 0x25, 0x4c, 0x83, 0xc1, 0x46, 0x19,
0x22, 0xf9, 0xcd, 0x19, 0x31, 0x55, 0xaa, 0x2b, 0xa5, 0x59, 0x78, 0x70,
0x90, 0x84, 0x93, 0x55, 0xb8, 0x46, 0x4d, 0x54, 0xaa, 0x13, 0x1e, 0x5f,
0x72, 0x9a, 0xb5, 0x05, 0x48, 0x28, 0x3d, 0x78, 0xaf, 0xd3, 0x25, 0x46,
0x9b, 0xd1, 0x06, 0x60, 0x74, 0x34, 0x4e, 0x38, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x79, 0xc6, 0x8a, 0xe3, 0xae, 0xf7, 0xaf, 0x3a,
0xb6, 0xe4, 0xd3, 0xdd, 0x69, 0x6a, 0x24, 0x45, 0x6e, 0x16, 0x97, 0x1c,
0x3e, 0xb8, 0x34, 0x46, 0xf3, 0xd2, 0x73, 0x3f, 0x83, 0x89, 0xd2, 0x0c,
0x5a, 0x95, 0x79, 0x68, 0x8d, 0x99, 0x66, 0xad, 0xfc, 0x2d, 0x15, 0xfb,
0x2b, 0xce, 0x6d, 0x15, 0x95, 0x82, 0x9d, 0xae, 0x31, 0x31, 0x3b, 0xd5,
0x7a, 0xe3, 0x66, 0x40, 0x34, 0x8b, 0xc0, 0x2f, 0x94, 0x52, 0x55, 0x33,
0xce, 0x37, 0x27, 0xe3, 0x35, 0x3f, 0x63, 0x58, 0x7f, 0x92, 0x2a, 0x4e,
0xbd, 0x43, 0x10, 0x6e, 0xc6, 0xc3, 0x86, 0x31, 0xd8, 0xb8, 0xe0, 0x39,
0x48, 0xf1, 0xa0, 0x49, 0xec, 0x14, 0x25, 0x1b, 0xf1, 0x2d, 0x6f, 0x1a,
0x96, 0xb2, 0x0c, 0x08, 0x86, 0x9b, 0x9f, 0xfa, 0xe5, 0x1a, 0x00, 0xb6,
0x54, 0x35, 0xcd, 0x4a, 0xb2, 0x93, 0x6e, 0x09, 0xb4, 0xb1, 0x61, 0x4c,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xdb, 0x04, 0x30, 0x03,
0xcb, 0xf5, 0x5e, 0xe4, 0xbc, 0x0e, 0xcf, 0x6f, 0x40, 0x4d, 0xfa, 0x18,
0x32, 0x71, 0x77, 0x86, 0x99, 0x1d, 0xb7, 0xb6, 0x34, 0x8a, 0x42, 0x0e,
0x08, 0x86, 0x14, 0x50, 0x2e, 0x67, 0x22, 0x47, 0x1e, 0x39, 0xfb, 0x7d,
0x35, 0xc4, 0x12, 0x18, 0x0e, 0x30, 0x86, 0x19, 0xfb, 0x76, 0x16, 0x19,
0x6d, 0x8c, 0x75, 0x95, 0x23, 0x69, 0x48, 0x28, 0xfd, 0x9b, 0x0b, 0x64,
0x91, 0xe6, 0x92, 0xd3, 0x1a, 0x8d, 0x5f, 0x08, 0xa9, 0xee, 0x34, 0x8b,
0xe2, 0x71, 0x86, 0x8c, 0xf8, 0xa8, 0x27, 0x08, 0xd1, 0x00, 0x12, 0x77,
0xce, 0x0b, 0xae, 0x05, 0x38, 0x38, 0x3b, 0x6f, 0xc8, 0xda, 0x82, 0x9c,
0x50, 0x72, 0x45, 0xaf, 0xad, 0x71, 0x4a, 0x6b, 0x19, 0x6b, 0x7c, 0x1e,
0x6e, 0xe8, 0x87, 0xaa, 0x9b, 0xe5, 0x38, 0x9a, 0x22, 0x19, 0xd2, 0x9a,
0x94, 0x15, 0x70, 0x4c, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0xd6, 0xbc, 0xf5, 0x83, 0xf1, 0xfe, 0xde, 0xf5, 0xd8, 0xae, 0xf8, 0xb3,
0x54, 0x9d, 0x69, 0x55, 0x8b, 0x2b, 0xa9, 0x5e, 0x78, 0xaf, 0x24, 0x05,
0x58, 0x70, 0x69, 0xcd, 0x88, 0xc4, 0x0f, 0x4f, 0x68, 0xc6, 0x43, 0x2f,
0xa6, 0x92, 0xea, 0x6e, 0xb9, 0x77, 0x74, 0xae, 0x8c, 0xfd, 0x9f, 0x79,
0xc4, 0xe1, 0x7a, 0x07, 0x6c, 0x38, 0x40, 0xdd, 0xf9, 0x1c, 0x6d, 0x19,
0xc8, 0xf1, 0xe0, 0x18, 0xc2, 0xa5, 0xf2, 0x5f, 0xde, 0x70, 0x37, 0x1c,
0x82, 0x56, 0x5e, 0xde, 0x09, 0x70, 0x48, 0xad, 0xb8, 0x73, 0xe7, 0x90,
0x36, 0x88, 0x4d, 0x68, 0x32, 0x0b, 0x1d, 0x77, 0x71, 0x9a, 0x21, 0x1c,
0x12, 0x3a, 0x4e, 0x82, 0x34, 0xc7, 0xfa, 0xa9, 0x2b, 0x10, 0xa1, 0x6b,
0x9b, 0x11, 0xdb, 0x82, 0x42, 0x91, 0x02, 0x88, 0xe4, 0xba, 0x5f, 0x57,
0xd9, 0xac, 0x30, 0x98, 0x05, 0xa8, 0x2c, 0x4d, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xc5, 0xb5, 0x00, 0xde, 0x7e, 0x5c, 0xfe, 0x18,
0xec, 0xaa, 0x55, 0x85, 0xcb, 0x66, 0x55, 0x52, 0xad, 0xcb, 0x74, 0x28,
0x93, 0x85, 0xd8, 0x94, 0x97, 0x4d, 0x64, 0x6e, 0xd8, 0xfe, 0x99, 0x3a,
0x57, 0x1d, 0x51, 0xc1, 0xea, 0xdd, 0xd2, 0x38, 0xdd, 0x3d, 0xba, 0x2a,
0x71, 0x96, 0xa2, 0x97, 0xa6, 0x00, 0xc9, 0xc6, 0x65, 0xbb, 0xc6, 0x27,
0xd6, 0x04, 0x49, 0x3a, 0x5c, 0xb3, 0xb0, 0x05, 0x05, 0xea, 0x70, 0x92,
0xe7, 0xf2, 0x43, 0x67, 0x49, 0x6b, 0x96, 0x0f, 0x95, 0x7a, 0x15, 0x3a,
0x4f, 0x0f, 0xf5, 0xf6, 0xf1, 0x51, 0xe7, 0x12, 0x83, 0x8d, 0x2a, 0xad,
0xb3, 0x29, 0x0b, 0x66, 0x20, 0x30, 0xf5, 0x8f, 0xb9, 0x81, 0x61, 0x2d,
0xb4, 0x44, 0xe7, 0x89, 0x89, 0x1b, 0x4b, 0xb8, 0x90, 0x2d, 0x54, 0xa7,
0x8e, 0x58, 0x6c, 0x3c, 0xe1, 0x31, 0xba, 0x3e, 0xbf, 0xe2, 0xb4, 0x5c,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x72, 0xdc, 0xc0, 0xee,
0x8c, 0x48, 0x9a, 0x30, 0x4b, 0x5f, 0xfc, 0x54, 0xe7, 0x0b, 0xc7, 0x1d,
0xfc, 0x66, 0x27, 0x13, 0x6d, 0x4d, 0x0c, 0x15, 0x76, 0x71, 0x51, 0xf3,
0x25, 0xfd, 0x2c, 0x37, 0xf7, 0xc0, 0xce, 0xcc, 0xa3, 0x90, 0x0b, 0xbd,
0x5e, 0x6e, 0x09, 0x8a, 0xde, 0x6f, 0x9c, 0x6d, 0xdc, 0xf1, 0xeb, 0x6b,
0xd0, 0x3d, 0x38, 0x8c, 0xd0, 0xbf, 0xaf, 0xbf, 0xe2, 0xb3, 0x95, 0x4f,
0x4e, 0x11, 0x43, 0xe0, 0x90, 0x60, 0xca, 0x61, 0xc5, 0xab, 0xfc, 0xd5,
0x19, 0x02, 0xe6, 0xee, 0x32, 0xd3, 0x93, 0xc1, 0x6c, 0x69, 0x07, 0xba,
0x37, 0x7a, 0x55, 0xed, 0xb3, 0xda, 0xbf, 0x3a, 0x56, 0x67, 0xbb, 0x94,
0xc3, 0x70, 0x3e, 0xf6, 0x35, 0xdf, 0x5d, 0xf6, 0xec, 0x4c, 0x76, 0x88,
0x22, 0xcb, 0xb8, 0x87, 0x6f, 0x73, 0x48, 0xc5, 0xae, 0x2f, 0x89, 0x18,
0xab, 0x89, 0x0c, 0x4e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x62, 0x6e, 0xaf, 0x1e, 0xc0, 0xc8, 0xa0, 0x0f, 0x41, 0x9c, 0x9a, 0x41,
0x06, 0xeb, 0x2e, 0xe5, 0x1a, 0xf4, 0x0d, 0x48, 0x40, 0x95, 0x0e, 0xb0,
0xbb, 0xc3, 0x0c, 0x66, 0x7a, 0xd9, 0xb4, 0x0c, 0x5b, 0x03, 0x93, 0xa6,
0xa8, 0x9a, 0xea, 0x96, 0x44, 0xcb, 0x12, 0xae, 0x40, 0x60, 0x03, 0xfc,
0xb7, 0x1b, 0x2d, 0xa0, 0x12, 0xd3, 0x30, 0x74, 0x66, 0xcc, 0xa4, 0xfa,
0xca, 0x5b, 0x20, 0x25, 0x7f, 0x66, 0x6d, 0xad, 0x3a, 0x65, 0x13, 0xc3,
0x51, 0x00, 0x54, 0x5c, 0x61, 0x0c, 0x76, 0xb7, 0x8b, 0xe6, 0x97, 0xb1,
0x94, 0x78, 0x4d, 0x2c, 0x33, 0xc7, 0xf0, 0x09, 0xba, 0x2d, 0xf5, 0x60,
0x6d, 0x86, 0x75, 0x71, 0xed, 0xc9, 0x73, 0x2d, 0x73, 0x2d, 0x8a, 0xfb,
0x53, 0xa3, 0x1a, 0xc0, 0xe6, 0x8c, 0xd0, 0x6f, 0x98, 0xfc, 0x00, 0xfe,
0x8e, 0xd9, 0x2a, 0x39, 0x5c, 0xef, 0x79, 0x1b, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x1b, 0x0e, 0xb3, 0x6e, 0xec, 0x41, 0x47, 0x9c,
0x86, 0x65, 0xba, 0x43, 0xfd, 0xef, 0xb4, 0x42, 0xca, 0x96, 0x39, 0xec,
0x62, 0x22, 0x73, 0xf7, 0xed, 0xd3, 0x27, 0xef, 0x57, 0x7f, 0x9b, 0x61,
0x98, 0x00, 0x63, 0xbb, 0x4f, 0x68, 0x42, 0x1e, 0xe3, 0xea, 0x08, 0xf3,
0xa4, 0xe2, 0x9e, 0x71, 0x0a, 0x45, 0x90, 0x7f, 0x93, 0x18, 0x1d, 0x1e,
0xf1, 0x47, 0x79, 0x91, 0xe4, 0x73, 0xbf, 0x5f, 0x7e, 0xcb, 0x83, 0xde,
0xde, 0x88, 0xa7, 0xe0, 0x65, 0x01, 0x5b, 0x94, 0x64, 0xa8, 0x12, 0xb3,
0xde, 0x22, 0x82, 0x62, 0x5d, 0x30, 0x11, 0xc7, 0x7a, 0xa8, 0xa0, 0xd6,
0xab, 0x70, 0x88, 0x59, 0x92, 0x3f, 0xf2, 0x6b, 0x25, 0xbb, 0x11, 0xb8,
0xda, 0x29, 0x5e, 0xde, 0x29, 0xf0, 0x50, 0x8d, 0xf2, 0x76, 0x05, 0xc0,
0xb8, 0x9f, 0xa1, 0xe7, 0x7e, 0x85, 0x07, 0xfa, 0xda, 0xed, 0x97, 0x0a,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x0e, 0xa5, 0x09, 0x60,
0x5e, 0xeb, 0xb2, 0x0a, 0xda, 0x15, 0x84, 0xad, 0xd7, 0xf2, 0x0e, 0x1a,
0xce, 0x93, 0xea, 0xfa, 0x1d, 0xf5, 0xfb, 0x2d, 0x45, 0x02, 0x26, 0x89,
0xf5, 0x4a, 0x1f, 0x4a, 0x4b, 0x39, 0xfb, 0xdd, 0x38, 0xef, 0x39, 0x55,
0x55, 0xdf, 0xc4, 0x5e, 0x23, 0x83, 0x02, 0x48, 0x60, 0xf7, 0x1b, 0xcc,
0xe8, 0x6f, 0x86, 0xdf, 0x0e, 0xcf, 0xa1, 0x3d, 0x6e, 0xd4, 0xb4, 0x20,
0x5d, 0xee, 0xf2, 0xe7, 0x78, 0xb8, 0x40, 0xf7, 0xad, 0xe2, 0x81, 0xe7,
0xfa, 0x26, 0x92, 0x0f, 0x24, 0x87, 0xc9, 0x44, 0x5f, 0x90, 0x9d, 0x25,
0xaa, 0xb7, 0x95, 0x76, 0x89, 0x53, 0xd5, 0x5f, 0xe8, 0xa6, 0x59, 0xba,
0x77, 0xf1, 0x05, 0xe8, 0x2e, 0x5d, 0x24, 0x31, 0x59, 0xf8, 0x13, 0x02,
0xfe, 0x9e, 0x22, 0x7b, 0xa7, 0xb1, 0xa1, 0xba, 0xb0, 0xb7, 0xbd, 0xd1,
0x12, 0x38, 0x99, 0x61, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0xb2, 0xcc, 0xab, 0x2f, 0x02, 0xe8, 0x24, 0xdd, 0x52, 0xf7, 0xe4, 0x98,
0x90, 0x56, 0x66, 0x7c, 0xef, 0x7c, 0x42, 0xf3, 0x4a, 0x71, 0x47, 0x18,
0x17, 0x8c, 0x37, 0xac, 0x03, 0xde, 0xf7, 0x55, 0x75, 0xd4, 0x7a, 0x60,
0x4e, 0xc3, 0xd0, 0xc4, 0xd4, 0x29, 0xd6, 0xc2, 0x5a, 0x10, 0x42, 0x98,
0x1f, 0xcd, 0x91, 0xe2, 0xe5, 0xc3, 0x62, 0x26, 0x01, 0xda, 0x4b, 0xde,
0xf2, 0x37, 0xab, 0x37, 0x30, 0xaf, 0x44, 0x39, 0xb0, 0xa0, 0xa1, 0x84,
0xbb, 0x01, 0x51, 0x77, 0x94, 0x9f, 0x1f, 0x67, 0x92, 0x39, 0x00, 0x69,
0x7a, 0x68, 0x79, 0xf4, 0x8e, 0x28, 0xde, 0xe0, 0x4f, 0x0a, 0x4d, 0x00,
0xf0, 0xc3, 0xf8, 0x2a, 0xd3, 0x5e, 0xc2, 0x92, 0x9d, 0x1f, 0x7e, 0x77,
0x88, 0x25, 0x9e, 0xd7, 0xaa, 0x95, 0xbd, 0x5a, 0x85, 0x61, 0x54, 0x41,
0xad, 0x8f, 0x8f, 0x02, 0x6b, 0xd5, 0x8d, 0x40, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xc0, 0xb2, 0x14, 0x11, 0x7c, 0xd7, 0xd4, 0x99,
0xa2, 0xd1, 0x3f, 0xf2, 0xf9, 0x9c, 0x7e, 0xd9, 0x72, 0x9b, 0x8b, 0x73,
0xeb, 0x6c, 0x2b, 0xc0, 0x16, 0x3b, 0x8d, 0xa3, 0x36, 0x95, 0xfa, 0x44,
0xd9, 0xd8, 0x58, 0xfd, 0x23, 0x67, 0x7f, 0xa2, 0xc4, 0x67, 0x69, 0xbb,
0x18, 0x52, 0xc8, 0x38, 0xef, 0xad, 0x36, 0x79, 0x0e, 0x43, 0x17, 0x87,
0x3d, 0x1e, 0x6e, 0xf7, 0x06, 0xa5, 0xc2, 0x10, 0x55, 0x73, 0x3a, 0x04,
0x3a, 0x32, 0x33, 0xde, 0x21, 0x54, 0xbf, 0xde, 0xd0, 0x5f, 0x2d, 0xe8,
0x3a, 0x6f, 0x9b, 0xcb, 0x59, 0x32, 0x95, 0xb7, 0x63, 0xea, 0x6a, 0x07,
0x64, 0xa7, 0x6f, 0x3d, 0x55, 0x2a, 0x89, 0x52, 0xda, 0x87, 0xbf, 0xaa,
0xd4, 0xbf, 0x97, 0xc0, 0xea, 0xfc, 0xc3, 0x2f, 0x2f, 0xcf, 0x8f, 0xf5,
0x7d, 0xfe, 0x0f, 0xf3, 0x13, 0x23, 0x91, 0x76, 0xa8, 0xc5, 0x61, 0x5a,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x5c, 0xbf, 0x5f, 0xe9,
0x15, 0x89, 0xfb, 0xdb, 0xaf, 0x98, 0x7b, 0x9c, 0x9d, 0x4f, 0x11, 0xba,
0xaf, 0x71, 0x71, 0xc8, 0x09, 0x4e, 0xaa, 0xbe, 0x20, 0x14, 0x24, 0xc8,
0x5d, 0xa1, 0x18, 0x3b, 0xf6, 0x48, 0xd9, 0x1a, 0x75, 0x26, 0x54, 0xcf,
0xe7, 0xbe, 0xab, 0x24, 0x8f, 0x0c, 0x9c, 0xca, 0x23, 0x33, 0xb6, 0xd6,
0x42, 0x65, 0x37, 0x5f, 0x35, 0xe9, 0x06, 0xe3, 0x0f, 0xb6, 0x73, 0x1e,
0x4f, 0x5e, 0x94, 0x44, 0x6e, 0xdf, 0xe1, 0x2a, 0x17, 0x97, 0x9a, 0xa1,
0x19, 0x1b, 0x3f, 0xa3, 0x25, 0x7a, 0xf1, 0x51, 0xfa, 0xdd, 0x5f, 0x80,
0x35, 0xcc, 0x90, 0x2b, 0x8e, 0xa7, 0x5b, 0x4d, 0x9d, 0x0f, 0x13, 0xc7,
0xa6, 0x87, 0x8a, 0xce, 0xe4, 0x45, 0xf9, 0xdc, 0xbf, 0xe8, 0xbc, 0xc1,
0x5b, 0xfa, 0x51, 0x81, 0xa9, 0x7b, 0x26, 0xa9, 0xdd, 0xa8, 0xdf, 0xcf,
0x29, 0xb3, 0xbe, 0x14, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x4d, 0x90, 0x6a, 0xf7, 0xb0, 0xff, 0x6a, 0xac, 0xaf, 0x33, 0xf0, 0x2d,
0x6c, 0x58, 0x3e, 0x8d, 0x45, 0x5f, 0x8b, 0xd9, 0x08, 0x69, 0x0e, 0x8d,
0x4f, 0x07, 0x87, 0x5b, 0xdb, 0x17, 0xb9, 0x4e, 0xcb, 0x48, 0x41, 0x0c,
0x88, 0xa8, 0x23, 0x79, 0x4a, 0x9d, 0x9a, 0x31, 0x72, 0xe3, 0x1b, 0xd4,
0x4d, 0xc5, 0xb1, 0x90, 0xdb, 0x9b, 0xef, 0x03, 0xff, 0x11, 0x42, 0x29,
0xb6, 0xc6, 0xc2, 0x1e, 0x70, 0x80, 0xaf, 0xea, 0x0a, 0xd3, 0x2d, 0xb9,
0xea, 0x6e, 0xbd, 0x5c, 0xac, 0x97, 0xa9, 0x08, 0x7d, 0x51, 0xb3, 0x72,
0x62, 0x70, 0x4f, 0x19, 0xa9, 0xee, 0xe4, 0xc7, 0x06, 0x3f, 0xe3, 0x3d,
0x1e, 0x02, 0x0e, 0xb5, 0x57, 0x3f, 0x14, 0x23, 0xda, 0x5b, 0xf3, 0xf3,
0xf6, 0x62, 0x4e, 0x33, 0x72, 0x41, 0x4d, 0xb1, 0x87, 0xa6, 0xee, 0x68,
0x62, 0xb1, 0x71, 0x7e, 0xb8, 0xa9, 0xa7, 0x6f, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xa9, 0x53, 0x42, 0x6f, 0xe4, 0x79, 0xbd, 0xc7,
0x37, 0x51, 0xc9, 0xe6, 0xfe, 0x9b, 0x63, 0x03, 0x25, 0xdb, 0xb6, 0xf9,
0xdb, 0x80, 0x91, 0x01, 0x3f, 0xdd, 0xee, 0xe9, 0x9e, 0x60, 0xa6, 0x37,
0x43, 0x16, 0x9b, 0x92, 0x8b, 0xf9, 0xd9, 0x21, 0xa7, 0xf9, 0x05, 0x21,
0x00, 0xaa, 0x35, 0x9c, 0x08, 0xa5, 0x66, 0xba, 0xcb, 0xe3, 0x45, 0xfd,
0x8e, 0xbb, 0x5e, 0x97, 0x2a, 0xf9, 0x99, 0x6c, 0x8f, 0x92, 0xe8, 0x7d,
0x4f, 0x6d, 0x9c, 0x6d, 0xae, 0x17, 0xe1, 0x16, 0xde, 0x03, 0x35, 0x01,
0x76, 0xe6, 0x5a, 0x3b, 0x45, 0xb5, 0x43, 0x21, 0x59, 0xa9, 0x87, 0x38,
0x2f, 0x07, 0x94, 0x60, 0x26, 0xb9, 0xd2, 0xb5, 0xe3, 0x3b, 0x57, 0xa0,
0xb0, 0xb6, 0xfe, 0x10, 0x2e, 0xb6, 0xbd, 0xdf, 0x24, 0xe1, 0xc3, 0x8c,
0xbb, 0x08, 0x62, 0x84, 0x7e, 0x37, 0x25, 0x5c, 0xd1, 0x65, 0xf2, 0x45,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x4f, 0x72, 0x7c, 0xfb,
0xb3, 0x7d, 0x0e, 0xa5, 0x2f, 0x39, 0x76, 0x80, 0xff, 0x08, 0xac, 0xe7,
0x21, 0x18, 0x3f, 0x03, 0xab, 0xb2, 0xde, 0x9c, 0x83, 0x47, 0xa7, 0x90,
0x1e, 0x2c, 0xa3, 0x11, 0x14, 0x4e, 0x27, 0x52, 0x70, 0x56, 0xa6, 0x87,
0x1a, 0x45, 0x52, 0xf4, 0x9f, 0xed, 0x73, 0xb2, 0xa2, 0x01, 0x55, 0xbd,
0x6a, 0x32, 0xd4, 0x15, 0x2e, 0x7e, 0x05, 0xbb, 0xb5, 0x27, 0x46, 0x58,
0x49, 0x2b, 0xab, 0x60, 0x30, 0xbb, 0x6d, 0x8a, 0xd8, 0x85, 0x78, 0x25,
0x19, 0x8a, 0x9d, 0xc9, 0x3e, 0xf2, 0xb0, 0x65, 0x1b, 0xe2, 0xfd, 0x30,
0x66, 0x4b, 0xb5, 0x6b, 0xfd, 0x7e, 0xe3, 0x1c, 0x31, 0x08, 0xa6, 0xb2,
0xda, 0x48, 0x8d, 0xeb, 0xb0, 0xac, 0xf0, 0xf5, 0x6e, 0xe5, 0x4b, 0xc7,
0xf9, 0xfb, 0xca, 0xbd, 0x8c, 0x4e, 0x28, 0xa1, 0xe9, 0xf6, 0x7f, 0x12,
0xd3, 0x5d, 0x88, 0x0b, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x8b, 0x4e, 0xfd, 0x3f, 0x25, 0xc1, 0xc0, 0x59, 0x5a, 0xd5, 0x46, 0xa0,
0x79, 0x62, 0x4e, 0x29, 0xc6, 0x1f, 0x75, 0x8a, 0x8c, 0x82, 0x5e, 0x15,
0x72, 0xd5, 0x58, 0x66, 0x81, 0x74, 0x9c, 0x40, 0x1d, 0x98, 0x69, 0x66,
0xf6, 0xcb, 0x81, 0x16, 0xd4, 0xa4, 0xbc, 0x13, 0xd6, 0x85, 0x54, 0x5d,
0xb3, 0x1b, 0x28, 0xa3, 0x56, 0x36, 0x46, 0xf6, 0xc2, 0x98, 0x24, 0xbb,
0x35, 0xfe, 0xa2, 0x6a, 0x0b, 0xb7, 0x27, 0x6c, 0xb4, 0xda, 0x41, 0x1b,
0x37, 0x2f, 0x76, 0x99, 0x63, 0x28, 0x79, 0xc0, 0xc5, 0x38, 0x5d, 0xf2,
0x16, 0x23, 0x40, 0xa5, 0xb7, 0x36, 0x5a, 0xd1, 0x91, 0x4a, 0xbe, 0x6f,
0x76, 0x8e, 0x06, 0x8f, 0x7c, 0x29, 0x10, 0x51, 0x6a, 0x42, 0xde, 0x5b,
0xed, 0x80, 0x38, 0xf4, 0xf3, 0xf2, 0x5c, 0x76, 0xf2, 0x78, 0xbd, 0x0d,
0x0c, 0xe8, 0x78, 0x68, 0xfc, 0x7c, 0x9c, 0x55, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x65, 0xfe, 0xff, 0xbf, 0x05, 0x2f, 0x3c, 0x1e,
0x02, 0xfa, 0x35, 0x81, 0xef, 0x42, 0xb9, 0xa8, 0x5f, 0x6d, 0x50, 0xaf,
0x80, 0x74, 0xb5, 0x76, 0xad, 0xd0, 0x5a, 0xc0, 0x44, 0xb0, 0x49, 0x2a,
0x1c, 0x35, 0x33, 0x93, 0x5f, 0x00, 0x08, 0x49, 0x94, 0xb7, 0xab, 0x66,
0xd3, 0xd7, 0xcc, 0xfe, 0x68, 0x9e, 0xf0, 0xae, 0x7e, 0x26, 0x1d, 0x4a,
0x85, 0xf7, 0x0c, 0xaa, 0xd6, 0x4f, 0x0c, 0x6a, 0xde, 0xd6, 0x8b, 0xb8,
0xc0, 0xbe, 0x0b, 0xab, 0x9a, 0x3f, 0xe4, 0x8a, 0x1c, 0x1b, 0x81, 0x1b,
0x8d, 0x6e, 0xeb, 0xa3, 0xac, 0x44, 0x9a, 0x51, 0x29, 0x50, 0x2d, 0x93,
0xa0, 0x23, 0xb8, 0x30, 0x33, 0x15, 0xe0, 0x11, 0x8b, 0x6e, 0xe3, 0x6f,
0x62, 0xc1, 0xba, 0x8b, 0x25, 0xac, 0xd3, 0x13, 0x47, 0xee, 0xc5, 0x56,
0x62, 0x9e, 0xbd, 0x5e, 0x03, 0xdb, 0x78, 0xde, 0x5e, 0xf9, 0xb9, 0x4e,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0xd0,
0x87, 0x1a, 0x49, 0x23, 0x54, 0x58, 0x48, 0x00, 0x80, 0x87, 0x16, 0x01,
0xa0, 0xbd, 0x8c, 0x8b, 0x01, 0x1a, 0xdc, 0xfc, 0x0f, 0xe2, 0x98, 0xa5,
0x10, 0x78, 0x25, 0x6f, 0x9b, 0x99, 0x99, 0x59, 0x82, 0xef, 0x3b, 0xc0,
0xa7, 0xd6, 0x7c, 0xb6, 0x5e, 0xe0, 0xd5, 0x9b, 0x31, 0x32, 0xf2, 0xf9,
0xde, 0x89, 0xa6, 0x89, 0x90, 0x6d, 0xa8, 0x10, 0xb0, 0x71, 0x7a, 0x1e,
0x85, 0xb0, 0x81, 0x18, 0x2a, 0x71, 0xb1, 0x7b, 0xbd, 0x29, 0xdc, 0x08,
0x0e, 0xc6, 0xc5, 0xfd, 0x4d, 0xce, 0x87, 0x98, 0xd2, 0xe6, 0x0f, 0x11,
0x2f, 0x7e, 0xf0, 0xe0, 0xd8, 0x47, 0x1d, 0x04, 0xc6, 0x9b, 0xde, 0x35,
0x20, 0x57, 0x3e, 0xd4, 0x57, 0xb8, 0x20, 0x9d, 0x95, 0xae, 0xb6, 0x21,
0x82, 0xac, 0xde, 0xdd, 0x9b, 0x60, 0x94, 0xe9, 0x4b, 0xb4, 0x59, 0xef,
0x06, 0xa7, 0xe4, 0x65, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x00, 0x00, 0x00, 0x30, 0xc2, 0x35, 0xb2, 0xc0, 0xe8, 0x0e, 0x8a, 0xaf,
0x28, 0x09, 0x1e, 0x7e, 0x9e, 0x37, 0x96, 0x87, 0x92, 0x36, 0x29, 0xd7,
0x70, 0xd5, 0x84, 0xef, 0xb7, 0x50, 0x42, 0x00, 0xcd, 0xcc, 0xcc, 0x04,
0x5a, 0x14, 0xea, 0x45, 0xb8, 0x82, 0x05, 0x94, 0x10, 0x12, 0xd3, 0x2e,
0x7d, 0x94, 0x1b, 0x04, 0x9a, 0x3c, 0x6d, 0xa5, 0x6c, 0xbb, 0xbd, 0x4e,
0x34, 0xe1, 0xe0, 0x16, 0xcb, 0x55, 0x94, 0x0a, 0x7a, 0x18, 0xa0, 0xdd,
0x7b, 0x74, 0xd0, 0x54, 0x3c, 0xb9, 0xcd, 0x54, 0xd0, 0x97, 0xfd, 0xfa,
0xd5, 0x19, 0xe9, 0x34, 0x4e, 0x6c, 0xa1, 0xea, 0xf3, 0x7f, 0x31, 0x5e,
0xf8, 0x88, 0xbe, 0xec, 0x02, 0xf0, 0xce, 0x75, 0xdb, 0x51, 0xfa, 0x68,
0x41, 0x49, 0x10, 0x03, 0xab, 0xff, 0x7f, 0x3a, 0xf5, 0x29, 0xa9, 0x1c,
0xc3, 0x3d, 0x5f, 0x4d, 0x66, 0x9b, 0x65, 0x04, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0x24, 0x7a, 0x63, 0x86, 0x07,
0x09, 0xeb, 0x71, 0x66, 0xa4, 0x43, 0x09, 0xeb, 0xbc, 0x2d, 0x8c, 0x2c,
0xae, 0xeb, 0x7e, 0xde, 0xbf, 0x45, 0x0a, 0x22, 0x91, 0x27, 0x00, 0x00,
0x67, 0x66, 0x66, 0x76, 0xa0, 0x3c, 0xc5, 0xd4, 0xd2, 0xcf, 0xd3, 0x0d,
0x97, 0x92, 0x52, 0x7c, 0xf8, 0x5c, 0x73, 0x4d, 0xc5, 0x31, 0x62, 0x40,
0x76, 0xe7, 0xb5, 0xfb, 0xaa, 0x68, 0x8e, 0x45, 0xc2, 0xf6, 0xff, 0x3b,
0xf3, 0x53, 0xd0, 0x30, 0x9b, 0x29, 0x1d, 0x86, 0x25, 0xbd, 0x0f, 0xf9,
0x53, 0x91, 0x10, 0x4c, 0xf5, 0x4a, 0xf3, 0x13, 0x04, 0xf4, 0x79, 0x12,
0xc1, 0x4a, 0x58, 0x21, 0x3c, 0x74, 0xe0, 0x5d, 0x22, 0x2e, 0x60, 0x2a,
0xca, 0x3d, 0x5f, 0xc2, 0xcb, 0xf2, 0x47, 0x71, 0xd0, 0xd6, 0x63, 0xca,
0xf3, 0xd0, 0xe8, 0x39, 0x99, 0x3b, 0xff, 0x06, 0x3e, 0xe5, 0xcd, 0x6b,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0xac,
0x99, 0x37, 0xb3, 0x60, 0xfa, 0x50, 0xcb, 0x16, 0x8f, 0x95, 0x6b, 0xcc,
0x70, 0x69, 0x74, 0x2e, 0x07, 0xd5, 0x81, 0x98, 0xee, 0x16, 0x88, 0x9b,
0x17, 0x00, 0x00, 0x00, 0x34, 0x33, 0x33, 0xa9, 0x30, 0x56, 0xe3, 0x6e,
0x00, 0xe2, 0x23, 0x99, 0x51, 0x52, 0x76, 0xe2, 0x4c, 0x35, 0xcc, 0xf4,
0xbe, 0x8b, 0x0f, 0xdf, 0xbf, 0xaa, 0x2a, 0xbb, 0x59, 0x1f, 0xbe, 0x5c,
0xc0, 0x27, 0xda, 0x2d, 0x45, 0x2e, 0x95, 0x8f, 0xb7, 0x50, 0x74, 0xd9,
0xec, 0x14, 0xb8, 0x4c, 0x0d, 0x7b, 0xc9, 0xe0, 0x02, 0xd1, 0x9e, 0x56,
0xc9, 0x27, 0xf7, 0xa8, 0xa6, 0xa1, 0xdc, 0x38, 0x3f, 0xff, 0xf9, 0x30,
0xfa, 0x8c, 0xc8, 0xab, 0x60, 0x86, 0xf0, 0x52, 0xc4, 0x01, 0xc2, 0x9c,
0xb3, 0x42, 0x51, 0x52, 0x54, 0x35, 0x83, 0x05, 0xb2, 0xd5, 0xc6, 0x9b,
0x7a, 0x43, 0xa6, 0x6c, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x00, 0x00, 0x00, 0xa5, 0xd1, 0xee, 0x7c, 0xd0, 0xba, 0xcb, 0x9d, 0x26,
0x7f, 0xd1, 0xed, 0xba, 0xb3, 0x89, 0xf0, 0x52, 0xc2, 0x4f, 0x0c, 0xf0,
0x23, 0xdd, 0x15, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x99, 0x99, 0xe5,
0x87, 0xd2, 0xdc, 0x99, 0xaf, 0xaf, 0x6f, 0xca, 0x7f, 0x10, 0x1c, 0x8b,
0x6e, 0xb8, 0xd8, 0xc5, 0x9e, 0xd0, 0x03, 0xce, 0x57, 0x95, 0xbd, 0xcc,
0xba, 0x0f, 0x5f, 0x2e, 0xbf, 0x91, 0x50, 0x10, 0xfb, 0xe2, 0x60, 0x0a,
0x57, 0xb1, 0xc7, 0xdb, 0xc3, 0x18, 0x08, 0x02, 0x48, 0xc8, 0xea, 0xa0,
0x46, 0x32, 0xaa, 0x31, 0x46, 0x05, 0xbb, 0xbc, 0x03, 0x8d, 0xee, 0x36,
0xff, 0xa5, 0x22, 0x99, 0x0d, 0xd2, 0x0a, 0x51, 0x1a, 0x45, 0x34, 0x44,
0x68, 0x5d, 0x72, 0xde, 0x20, 0x7d, 0xa1, 0xa1, 0x77, 0xce, 0xd3, 0xc1,
0x91, 0xa1, 0x29, 0x1e, 0xc6, 0xe3, 0x35, 0x14, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0x83, 0x1e, 0x97, 0x8a, 0xf6,
0x06, 0xe2, 0xcb, 0xfe, 0xa5, 0x10, 0x1f, 0x59, 0x8b, 0xd6, 0x2f, 0x75,
0xeb, 0x3d, 0xff, 0xdf, 0x6e, 0x67, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
0xcd, 0xcc, 0x4c, 0xbc, 0x97, 0xb3, 0x89, 0x2c, 0x3e, 0x8b, 0x8b, 0xaf,
0x9b, 0x36, 0xe5, 0x38, 0x1c, 0x8d, 0x76, 0x2b, 0xf5, 0x49, 0xe6, 0x15,
0x6c, 0x21, 0xaf, 0x6e, 0xdd, 0x87, 0x2f, 0x17, 0xee, 0x4e, 0x3a, 0xda,
0x8f, 0x49, 0x3f, 0xd1, 0xcc, 0xc1, 0xd6, 0x95, 0x9e, 0x09, 0x52, 0x40,
0xab, 0xac, 0x43, 0xc0, 0x04, 0x7a, 0x13, 0x51, 0x40, 0xae, 0xd1, 0x27,
0x84, 0xee, 0xf6, 0x4e, 0x8e, 0xd8, 0x03, 0x5c, 0x37, 0x93, 0x2c, 0x43,
0x49, 0xac, 0x48, 0xd9, 0xa0, 0x82, 0x6b, 0x9f, 0x73, 0x22, 0xc6, 0x35,
0x95, 0x41, 0x89, 0x43, 0xb1, 0x1f, 0x04, 0xfa, 0x25, 0x24, 0x59, 0x31,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x40, 0xe4,
0x16, 0xab, 0x32, 0x30, 0xf2, 0x5a, 0x04, 0x1d, 0x4c, 0x5c, 0xea, 0xfe,
0x39, 0xbb, 0xe4, 0x26, 0x4f, 0x5e, 0x35, 0xa6, 0x03, 0x05, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x67, 0x66, 0x66, 0xdb, 0x44, 0x0e, 0xbd, 0xc1,
0x9b, 0x8c, 0x59, 0xb7, 0xb9, 0xc3, 0xb0, 0x7d, 0x2a, 0x27, 0x26, 0x19,
0x9e, 0xa2, 0x35, 0xe4, 0x38, 0x45, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
0xbb, 0xda, 0xd7, 0xf5, 0xdc, 0xf5, 0xee, 0x05, 0x0b, 0x14, 0x99, 0x1d,
0x2c, 0x77, 0x0b, 0xa8, 0xe0, 0x4c, 0x30, 0xd5, 0xe7, 0x25, 0xa5, 0x00,
0x81, 0x5f, 0xbf, 0x8a, 0x0c, 0x7c, 0xdd, 0x18, 0xb0, 0x6c, 0xf4, 0x8f,
0x59, 0x92, 0xf6, 0x79, 0xf6, 0x8d, 0x23, 0xf9, 0xaa, 0xc2, 0x33, 0xf1,
0x15, 0xa6, 0x48, 0x92, 0x1c, 0xd8, 0x44, 0x2d, 0xe3, 0xdf, 0x33, 0x4c,
0xab, 0x74, 0x9b, 0x1a, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x00, 0x00, 0xc0, 0x5a, 0x43, 0x4f, 0x54, 0x90, 0x5b, 0x8f, 0x29, 0xf1,
0xbe, 0xb3, 0x8e, 0xbf, 0x2b, 0x63, 0x9b, 0xa4, 0x6d, 0x29, 0xe3, 0xfd,
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x33, 0x93, 0x51,
0xa5, 0xa4, 0x2f, 0xc2, 0xf4, 0x64, 0x4f, 0xb1, 0x7e, 0xa0, 0x7e, 0xc7,
0x62, 0x51, 0xe8, 0xcd, 0xbb, 0xdf, 0x39, 0x36, 0x36, 0x64, 0xe4, 0xba,
0x75, 0x1f, 0xbe, 0x5c, 0x26, 0xa3, 0x44, 0xc5, 0xda, 0xd1, 0xc0, 0x1f,
0x86, 0x3b, 0x67, 0x70, 0xe2, 0x82, 0x02, 0x91, 0x2b, 0x40, 0xb0, 0x46,
0xe2, 0xcf, 0xe7, 0x51, 0x2b, 0x63, 0x28, 0xac, 0x8c, 0x0d, 0x7e, 0x2f,
0xce, 0x8f, 0x89, 0x09, 0xf4, 0x60, 0xb2, 0x30, 0xa1, 0xfd, 0x80, 0xf7,
0xf8, 0x2d, 0xa8, 0xa8, 0x0f, 0x4d, 0x4e, 0xaa, 0x92, 0x83, 0x95, 0xa1,
0x6b, 0x2c, 0xb2, 0x3e, 0x6f, 0x3e, 0x33, 0x5c, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xd0, 0xb0, 0xb1, 0x11, 0x5d, 0x5f,
0x5d, 0xc3, 0x95, 0x1a, 0x6e, 0xc2, 0xfe, 0xd5, 0xed, 0x59, 0xd2, 0x9f,
0x2b, 0xf7, 0xc8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x9a, 0x99, 0x59, 0xbf, 0xae, 0x61, 0x76, 0x5e, 0x8d, 0xfd, 0x9d, 0xc7,
0xb2, 0xa0, 0x04, 0x7f, 0xbb, 0x90, 0x38, 0xa3, 0x7e, 0x5e, 0x2c, 0xdf,
0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e, 0x1a, 0xc0, 0x6a, 0xe1,
0x44, 0x05, 0x40, 0x61, 0xb6, 0x93, 0x2b, 0x00, 0x16, 0x8a, 0x70, 0x5b,
0xbe, 0x39, 0x1f, 0x89, 0xbd, 0x8f, 0x95, 0xd9, 0xad, 0x4a, 0x11, 0xe6,
0x30, 0xc0, 0xdb, 0x53, 0x2a, 0xb9, 0x1b, 0x91, 0x35, 0x4b, 0x6b, 0xe3,
0x17, 0xdf, 0x05, 0x6f, 0xfd, 0xd9, 0x0a, 0xc7, 0x4d, 0x05, 0x6d, 0x5e,
0x2d, 0xc9, 0x42, 0x0f, 0x43, 0x96, 0x20, 0xf6, 0x13, 0xd5, 0x35, 0x2d,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x30, 0x32,
0x38, 0x7a, 0x48, 0x1a, 0x77, 0x0f, 0x63, 0xf3, 0x88, 0x03, 0x96, 0xed,
0xc5, 0x2c, 0x20, 0xd4, 0xa5, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xcd, 0xcc, 0x84, 0x26, 0x24, 0x60, 0xdd, 0x3b,
0x8b, 0xb9, 0x0a, 0xa7, 0x81, 0xbc, 0x27, 0xda, 0x6d, 0xbc, 0x60, 0x76,
0xd7, 0xe7, 0xa3, 0x70, 0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17,
0xa4, 0x2d, 0x51, 0x76, 0xad, 0x02, 0x00, 0xd3, 0xd3, 0x9a, 0x21, 0x0c,
0x03, 0x85, 0xab, 0x50, 0x0b, 0xbf, 0x2d, 0x26, 0x8b, 0x88, 0x87, 0xdd,
0xb0, 0xc1, 0xe5, 0x0f, 0x6a, 0xf8, 0xca, 0x23, 0x3a, 0xb0, 0xb8, 0xed,
0x03, 0xb6, 0x19, 0x17, 0xb2, 0xd7, 0xaf, 0x25, 0x0f, 0xe8, 0x5d, 0x96,
0x88, 0x6b, 0xd7, 0x7f, 0xe0, 0xb0, 0x41, 0x57, 0xa1, 0x14, 0x1d, 0x55,
0x14, 0x63, 0xdb, 0x2a, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x00, 0x00, 0x64, 0x7f, 0x44, 0x75, 0xc4, 0x97, 0x37, 0x63, 0x51, 0x19,
0x64, 0xd6, 0x26, 0x62, 0xe4, 0x5c, 0xc4, 0xac, 0xa2, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x66, 0xf6, 0x93,
0xf5, 0xd8, 0x74, 0xee, 0xd7, 0xdd, 0xc1, 0x95, 0x0c, 0x6d, 0x23, 0x58,
0x1b, 0x30, 0xd1, 0x7c, 0xdd, 0x80, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
0x98, 0x97, 0x8e, 0x45, 0xd3, 0x87, 0x93, 0x2f, 0xa7, 0x92, 0xea, 0x68,
0xc2, 0x30, 0x3c, 0x9d, 0xfc, 0xe5, 0x9a, 0xe5, 0x63, 0x86, 0xfe, 0xd6,
0x66, 0x52, 0x9b, 0xc9, 0x7e, 0x85, 0xd7, 0x18, 0x5e, 0x6c, 0x19, 0x6d,
0x74, 0x08, 0x36, 0xf3, 0xae, 0xf9, 0x4c, 0xa7, 0xa7, 0x41, 0xb5, 0x25,
0x15, 0x62, 0xff, 0xae, 0x96, 0x82, 0xd3, 0x78, 0xf7, 0x79, 0x28, 0x91,
0xd6, 0xb7, 0xed, 0x30, 0xea, 0x73, 0x77, 0x25, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xac, 0xba, 0x36, 0xd5, 0x4b, 0x79,
0xf3, 0x04, 0xf5, 0x8c, 0x51, 0xd7, 0x9c, 0xc4, 0x42, 0x49, 0x0f, 0x61,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x34, 0x33, 0x09, 0xd5, 0x70, 0xac, 0x80, 0x04, 0x8f, 0xd9, 0xd8, 0xe8,
0xa6, 0xb4, 0x00, 0x28, 0x46, 0x98, 0xca, 0xc7, 0xd2, 0xac, 0x94, 0xc2,
0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c, 0xcf, 0x90, 0x68, 0xf1,
0x6e, 0x4c, 0xca, 0x87, 0x8f, 0x21, 0x48, 0xa0, 0x16, 0x8b, 0x99, 0xd9,
0x87, 0x24, 0x9b, 0x40, 0x22, 0xa8, 0x2f, 0x78, 0xa6, 0xa1, 0x66, 0xe9,
0x3d, 0x18, 0x20, 0x3d, 0x8f, 0xe7, 0x2a, 0x62, 0x9d, 0x7a, 0x59, 0x9f,
0x19, 0xad, 0x4c, 0x22, 0xf7, 0xd8, 0xff, 0x2f, 0xd1, 0x08, 0x62, 0xfa,
0xea, 0x32, 0xde, 0xeb, 0x34, 0xb7, 0x2f, 0xce, 0x59, 0xae, 0x58, 0x5e,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xb5, 0x26,
0x62, 0x47, 0xcf, 0x65, 0x84, 0xa1, 0x99, 0xdc, 0x4d, 0x33, 0x06, 0x56,
0x18, 0xe9, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x9a, 0x99, 0x05, 0xdc, 0x12, 0x36, 0xab, 0x88,
0x30, 0xba, 0x5f, 0xa0, 0xfb, 0x47, 0x07, 0x26, 0x35, 0x41, 0x2f, 0x9d,
0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e,
0x8a, 0xde, 0x53, 0x36, 0x9b, 0x28, 0xd7, 0xc2, 0x9a, 0xe9, 0x54, 0x8f,
0x12, 0x82, 0x62, 0x7c, 0x80, 0x95, 0xb6, 0xe4, 0xb6, 0x95, 0x68, 0x3a,
0x12, 0x91, 0xc6, 0x53, 0x8d, 0xb2, 0x5f, 0x00, 0x98, 0x26, 0x75, 0x9a,
0x48, 0x00, 0x22, 0x78, 0xd9, 0x15, 0xe9, 0x48, 0x9c, 0x41, 0x43, 0x00,
0x86, 0x64, 0xcf, 0xef, 0x41, 0xe0, 0x64, 0xc8, 0x06, 0xbd, 0xf3, 0x38,
0x14, 0x4d, 0xb4, 0x29, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x00, 0x00, 0x03, 0xc6, 0x05, 0x53, 0x9b, 0xa2, 0x22, 0xa8, 0x0c, 0xfa,
0x63, 0x9c, 0xc2, 0x5e, 0x8d, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcd, 0x4c, 0x74, 0x4a,
0xab, 0x56, 0x10, 0x49, 0x78, 0xb6, 0xd1, 0x04, 0x5c, 0xb0, 0x7e, 0x50,
0x06, 0xcf, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e,
0xdd, 0x87, 0x2f, 0x17, 0xa3, 0xd0, 0xde, 0x7e, 0x48, 0x51, 0x64, 0x73,
0x21, 0x48, 0x05, 0xf8, 0xb9, 0xaf, 0xa6, 0xc6, 0xfa, 0xf1, 0xaa, 0xea,
0xdc, 0x98, 0xbb, 0xb0, 0x67, 0xa5, 0xd0, 0x60, 0x25, 0x56, 0x02, 0x04,
0x13, 0x70, 0x85, 0x7a, 0x7e, 0xf8, 0xf8, 0x6c, 0x77, 0xe5, 0x5b, 0x32,
0x93, 0xdd, 0x77, 0x1c, 0xa8, 0x36, 0x1c, 0x19, 0x15, 0xd6, 0x65, 0x8a,
0x9f, 0x83, 0x4a, 0xdf, 0x0f, 0x3c, 0x5c, 0x71, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x40, 0x58, 0x89, 0xea, 0xfc, 0xd4, 0xce,
0xe7, 0xab, 0x47, 0x2a, 0x12, 0x95, 0x93, 0x9d, 0x14, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x67, 0x66, 0xa3, 0xab, 0x8c, 0xef, 0x89, 0x25, 0x47, 0x52, 0x2b, 0x9e,
0xa5, 0x3b, 0xf4, 0xbe, 0xea, 0xe7, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0xce, 0xc3, 0x34, 0xb3,
0x70, 0x4e, 0x4d, 0xbe, 0xc5, 0x5a, 0x0c, 0x13, 0x5b, 0xee, 0x07, 0xbf,
0x95, 0x12, 0x7e, 0xed, 0x4d, 0xa4, 0x8e, 0xc1, 0x70, 0x34, 0x9e, 0x6a,
0x3b, 0x06, 0xa6, 0x65, 0x0f, 0x2c, 0x93, 0x88, 0xf7, 0xef, 0xc9, 0x9c,
0x0d, 0xd3, 0xa8, 0x1f, 0x8b, 0xd7, 0x35, 0xe3, 0xcb, 0xa0, 0x72, 0x39,
0x0b, 0xa8, 0x5a, 0x6f, 0x95, 0xeb, 0x29, 0x88, 0x52, 0x3e, 0x01, 0x12,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0xc0, 0x8a, 0x2f,
0x82, 0xda, 0x05, 0x96, 0x7e, 0x40, 0x9e, 0xef, 0x9e, 0xdb, 0x4c, 0x0c,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x34, 0x93, 0x07, 0x17, 0x22, 0x7d, 0xc9, 0xe4,
0xa3, 0x4b, 0x66, 0x4e, 0x3b, 0x84, 0x67, 0x34, 0x04, 0xe0, 0xe7, 0x3a,
0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c,
0x0d, 0x88, 0x5f, 0xf8, 0xac, 0x4e, 0xb4, 0x30, 0x5c, 0xc6, 0xff, 0x6c,
0x72, 0xb2, 0xcf, 0x0b, 0x70, 0xaf, 0x96, 0x28, 0xa9, 0x31, 0xaf, 0xbc,
0x71, 0x72, 0xf3, 0xf2, 0x77, 0x8e, 0xac, 0x18, 0x59, 0xc0, 0x96, 0x6f,
0xa6, 0xde, 0x5d, 0x3b, 0x9f, 0x66, 0x1f, 0x89, 0xf0, 0x8b, 0xe8, 0x62,
0xe1, 0x64, 0x16, 0x30, 0xad, 0xff, 0xfd, 0xd9, 0x7f, 0xd8, 0xb0, 0x25,
0x9b, 0x02, 0xa4, 0x00, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x00, 0xd0, 0x69, 0x1e, 0xe4, 0x15, 0x78, 0x19, 0x0a, 0xda, 0x1f, 0xdf,
0xc2, 0x56, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x59, 0xe1, 0xb5,
0xf5, 0x60, 0x05, 0x8f, 0x81, 0xda, 0xfd, 0xe9, 0x29, 0x5a, 0x76, 0x21,
0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd,
0xba, 0x0f, 0x5f, 0x2e, 0xfd, 0xdf, 0x81, 0x19, 0x3c, 0xad, 0xc6, 0x8b,
0x58, 0xfe, 0x51, 0x28, 0xb5, 0xf8, 0xdd, 0x10, 0x20, 0x61, 0x30, 0xb3,
0xb5, 0xf5, 0x76, 0x71, 0xa8, 0xc3, 0x57, 0xe4, 0x0c, 0xda, 0x70, 0x11,
0x1e, 0xbd, 0x07, 0x1a, 0xc6, 0xa1, 0x7e, 0x7c, 0xf2, 0x1c, 0xd0, 0x90,
0x6d, 0x8f, 0x21, 0x11, 0xde, 0x84, 0x40, 0xb2, 0x19, 0x13, 0x42, 0x9c,
0xf6, 0xc4, 0x72, 0xf5, 0x9e, 0x74, 0xbe, 0x50, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x30, 0x32, 0x71, 0x23, 0x2e, 0xdd, 0x8b,
0xf5, 0xda, 0xc1, 0xf5, 0x60, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xcd, 0x04, 0x3e, 0x2b, 0xd5, 0x66, 0x7c, 0x0c, 0x14, 0xe9, 0xcd, 0x52,
0x02, 0x82, 0xbf, 0x10, 0x01, 0xf8, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70,
0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17, 0xc0, 0x5b, 0xf8, 0x94,
0x48, 0x19, 0xd1, 0x27, 0xe0, 0x36, 0x2a, 0x8e, 0xe7, 0x18, 0x70, 0x0f,
0xe1, 0x8e, 0xf4, 0x48, 0x72, 0x5c, 0x8e, 0x9c, 0xfb, 0xb9, 0x62, 0xb7,
0xa6, 0x15, 0x84, 0x5e, 0x6f, 0x6b, 0x14, 0xb3, 0x43, 0x24, 0xad, 0xe3,
0x9e, 0x76, 0x84, 0x7a, 0x92, 0x8d, 0x04, 0x40, 0x51, 0x73, 0x6b, 0x18,
0xc1, 0x91, 0x25, 0x20, 0x40, 0x67, 0x02, 0x0e, 0x72, 0x76, 0x27, 0x6b,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0xa4, 0xd8, 0x1d,
0xa2, 0x5e, 0xa7, 0x37, 0x70, 0xad, 0xd1, 0x9c, 0x02, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x67, 0x76, 0x60, 0xee, 0x6c, 0x59, 0x24, 0x5c,
0x74, 0x80, 0x2a, 0x4d, 0x98, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
0x22, 0x62, 0xe0, 0xd7, 0x4f, 0x07, 0xfe, 0x3a, 0x15, 0x87, 0xb6, 0x74,
0x37, 0x21, 0xa4, 0x0a, 0x98, 0x5e, 0x28, 0x12, 0x77, 0xf1, 0x1f, 0x50,
0xd9, 0x8f, 0xf9, 0x40, 0x41, 0x6f, 0x0f, 0x4b, 0x87, 0x7d, 0x5e, 0x14,
0x57, 0xf5, 0xe8, 0x74, 0xc4, 0xd4, 0x27, 0xfc, 0x86, 0xd9, 0x56, 0x66,
0x17, 0xd6, 0x97, 0xe5, 0x69, 0x2a, 0x6b, 0x03, 0x85, 0x38, 0xda, 0x70,
0xb2, 0xf6, 0xdd, 0x52, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x00, 0xac, 0x7f, 0x7a, 0x7f, 0xcc, 0x6b, 0x89, 0xc3, 0x0c, 0x8f, 0x01,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x69, 0x13, 0xea,
0xec, 0xb4, 0x88, 0x9c, 0xa2, 0x9b, 0x58, 0x31, 0xcf, 0x1c, 0xfe, 0x42,
0x04, 0xe0, 0xe7, 0x3a, 0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba,
0x75, 0x1f, 0xbe, 0x5c, 0x4c, 0x07, 0x42, 0x14, 0x93, 0xe2, 0x83, 0x93,
0xa3, 0xa9, 0x72, 0x37, 0xa9, 0xb2, 0x58, 0xa7, 0x6a, 0x94, 0x53, 0xb5,
0xeb, 0xad, 0x3b, 0x56, 0x46, 0x6b, 0x61, 0xa3, 0x8c, 0x14, 0x56, 0x32,
0xd4, 0x10, 0xf3, 0xf6, 0x64, 0x82, 0xef, 0xb0, 0x24, 0xd2, 0xc2, 0x79,
0xde, 0xb0, 0x42, 0x9e, 0x65, 0x05, 0x88, 0x74, 0x2f, 0x8b, 0x58, 0xd5,
0x42, 0x1a, 0x06, 0xa3, 0xd4, 0xd1, 0xfc, 0x12, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x00, 0xc5, 0x1c, 0xd9, 0xfc, 0x35, 0x0a, 0xad,
0x17, 0xee, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x9a, 0x25, 0x9e, 0xe6, 0xbc, 0xaf, 0xb1, 0x88, 0x37, 0xd7, 0x97, 0x99,
0x67, 0x0e, 0x7f, 0x21, 0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1,
0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e, 0x5d, 0x53, 0x09, 0xab,
0xc2, 0x58, 0x42, 0x94, 0x15, 0x71, 0x15, 0x1b, 0xfa, 0xfb, 0xcd, 0xc6,
0xbc, 0xa4, 0x00, 0x49, 0x74, 0xd3, 0x9c, 0x04, 0x81, 0xd7, 0x09, 0x28,
0x7d, 0x37, 0x37, 0x58, 0x8a, 0x94, 0x1a, 0x79, 0x8a, 0x7e, 0x94, 0x29,
0xbd, 0xab, 0x65, 0x5f, 0xc6, 0x94, 0xed, 0xdd, 0x94, 0xc1, 0xf1, 0xd8,
0xed, 0x88, 0x80, 0x47, 0x31, 0x52, 0xda, 0x94, 0x30, 0x60, 0xac, 0x0a,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x83, 0x86, 0xfc,
0xaf, 0x41, 0xb9, 0x0e, 0x8e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x4d, 0xac, 0x2d, 0x08, 0x88, 0xfd, 0x14, 0x1e,
0x24, 0x78, 0xcc, 0xcc, 0x33, 0x87, 0xbf, 0x10, 0x01, 0xf8, 0xb9, 0xce,
0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17,
0xbb, 0xc3, 0xc0, 0xea, 0xbd, 0x9b, 0xd6, 0x22, 0x75, 0x96, 0x6c, 0xca,
0xc1, 0x2b, 0x45, 0x76, 0x2e, 0x4e, 0x2b, 0xf7, 0xfa, 0x9e, 0x64, 0x2d,
0x4b, 0xd8, 0xd9, 0x06, 0x24, 0xa3, 0xce, 0x49, 0x17, 0xd6, 0x83, 0xe8,
0x15, 0xc0, 0xbf, 0x4f, 0xf8, 0x85, 0x9d, 0xaf, 0xb6, 0x85, 0x62, 0x31,
0x89, 0x06, 0x10, 0xd3, 0x0a, 0xf1, 0xd5, 0x03, 0x50, 0x2c, 0x1e, 0xf4,
0xa8, 0xb6, 0x91, 0x2e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x40, 0x0c, 0xab, 0x80, 0x53, 0x26, 0xc2, 0x54, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6b, 0xbb, 0x49,
0x36, 0x72, 0x46, 0x68, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
0x98, 0x97, 0x8e, 0x45, 0x80, 0x00, 0x64, 0x53, 0xae, 0xef, 0x99, 0x13,
0xc9, 0x1c, 0x53, 0x45, 0x0c, 0xdc, 0x97, 0xd5, 0x80, 0x4b, 0x56, 0x86,
0xa3, 0xbc, 0x78, 0xc4, 0xc2, 0x0c, 0xf7, 0xaf, 0xfd, 0x42, 0xe9, 0x1b,
0x5f, 0x2f, 0xa6, 0x4f, 0xa5, 0x53, 0xb5, 0xec, 0x36, 0x50, 0xc6, 0xd1,
0x3f, 0xdf, 0xaf, 0x63, 0x9f, 0x25, 0x1d, 0x40, 0xd2, 0xb5, 0x83, 0x24,
0x67, 0x57, 0x4d, 0x0c, 0x7b, 0x8c, 0x90, 0x3d, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xc0, 0xfa, 0x49, 0xea, 0x2a, 0x92, 0x32, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x94, 0x9d, 0x8c, 0xc4, 0x08, 0x6c, 0x2a, 0x66, 0x32, 0xe3, 0x31, 0x33,
0xcf, 0x1c, 0xfe, 0x42, 0x04, 0xe0, 0xe7, 0x3a, 0xd3, 0xac, 0x94, 0xc2,
0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c, 0x57, 0x7e, 0xfd, 0x4b,
0xb2, 0xe8, 0x7d, 0x9b, 0x0e, 0x3c, 0x51, 0xa9, 0x15, 0xcb, 0x9e, 0xfa,
0x45, 0x73, 0x63, 0x75, 0xac, 0xb6, 0x70, 0xff, 0xcb, 0x0a, 0xb6, 0xd9,
0xd9, 0xf6, 0x7d, 0x5e, 0x31, 0x38, 0x8e, 0xff, 0x0c, 0xba, 0x37, 0xee,
0x71, 0x85, 0x91, 0xa1, 0xc8, 0x13, 0x11, 0xc4, 0x7c, 0x0f, 0x6e, 0xca,
0x37, 0x32, 0xdb, 0x2a, 0x5a, 0xf3, 0x69, 0x45, 0x0f, 0x15, 0xe4, 0x1e,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xd0, 0xb2, 0x9d, 0x4f,
0x2c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x5a, 0x83, 0xa6, 0xa4, 0x69, 0x0f, 0x33, 0x33,
0x99, 0xf1, 0x98, 0x99, 0x67, 0x0e, 0x7f, 0x21, 0x02, 0xf0, 0x73, 0x9d,
0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e,
0xa0, 0xbc, 0x92, 0xcb, 0x0d, 0xff, 0xd0, 0xea, 0x29, 0x6b, 0x52, 0x84,
0x98, 0xe7, 0xf8, 0xf0, 0xfb, 0x66, 0xbe, 0x18, 0xab, 0x4c, 0x1b, 0x71,
0x16, 0xec, 0xb7, 0x81, 0x69, 0x77, 0xeb, 0x57, 0x47, 0xad, 0x0d, 0x3a,
0x20, 0x42, 0x92, 0xf1, 0xc1, 0xd3, 0xef, 0x25, 0xf5, 0x26, 0x55, 0xc3,
0x98, 0x49, 0x24, 0xec, 0xbe, 0x30, 0x09, 0xa8, 0x07, 0x14, 0x8f, 0xce,
0x6f, 0xdd, 0xa9, 0x71, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0x30, 0xc2, 0xb2, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0xed, 0x68, 0x0c,
0x84, 0x99, 0x99, 0x99, 0xcc, 0x78, 0xcc, 0xcc, 0x33, 0x87, 0xbf, 0x10,
0x01, 0xf8, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e,
0xdd, 0x87, 0x2f, 0x17, 0x88, 0x7c, 0xf4, 0x83, 0xfd, 0x16, 0xf9, 0xf0,
0x1a, 0xcc, 0xc4, 0xea, 0xf5, 0xba, 0x03, 0x19, 0x4a, 0x1b, 0x5d, 0x52,
0xe2, 0xfd, 0x78, 0xb1, 0x3d, 0xd3, 0x03, 0x25, 0x35, 0x11, 0x34, 0x47,
0x4d, 0xd6, 0x17, 0x27, 0xbf, 0xde, 0x87, 0x2c, 0xde, 0x46, 0x07, 0xcd,
0x2f, 0xa1, 0x31, 0x26, 0xe5, 0x8d, 0x87, 0xe1, 0xda, 0xac, 0xfe, 0x37,
0x97, 0x79, 0x72, 0xc7, 0x3f, 0x28, 0x4a, 0x5e, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0xe4, 0xc5, 0xbd, 0x0a, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0xf7, 0xec, 0x75, 0x26, 0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66,
0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0xb3, 0xc4, 0x59, 0xb6,
0x35, 0x71, 0xaf, 0xe7, 0x73, 0x4c, 0x03, 0xe2, 0x69, 0x50, 0x19, 0x36,
0x65, 0x43, 0xd5, 0x33, 0x7f, 0x31, 0xf1, 0x0e, 0x89, 0xa3, 0x4f, 0x55,
0xdd, 0xf3, 0x67, 0x57, 0xf1, 0xcb, 0xe8, 0x3c, 0x7f, 0x68, 0x6a, 0x29,
0xe5, 0x60, 0x35, 0x3e, 0x72, 0x40, 0x3c, 0x8b, 0x39, 0x01, 0xa5, 0x9e,
0x73, 0x99, 0x7f, 0x87, 0x18, 0xb4, 0x68, 0xc6, 0xd2, 0x7b, 0xa9, 0x71,
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xac, 0x68, 0x06, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0xca, 0xff, 0x91, 0x99, 0x65, 0x66, 0x66, 0x66,
0x32, 0xe3, 0x31, 0x33, 0xcf, 0x1c, 0xfe, 0x42, 0x04, 0xe0, 0xe7, 0x3a,
0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c,
0x71, 0x02, 0x27, 0x40, 0x1d, 0xe1, 0x11, 0xee, 0x3b, 0xc9, 0x13, 0xe2,
0x90, 0x29, 0xf8, 0x0b, 0x70, 0x4b, 0x7b, 0xfa, 0xd2, 0xd6, 0xf5, 0x8f,
0x45, 0xff, 0xc8, 0xc4, 0xc9, 0x4c, 0xf9, 0x5e, 0x0e, 0x66, 0x85, 0x5f,
0x88, 0xff, 0xd3, 0xb6, 0xd8, 0x0a, 0x67, 0xd6, 0x91, 0x18, 0x5b, 0xe5,
0xc9, 0xa9, 0xd9, 0x55, 0xb4, 0x30, 0xde, 0x83, 0xe5, 0x10, 0x9a, 0x05,
0x55, 0x53, 0xde, 0x55, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
0xd5, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0xc8, 0xcc, 0xcc,
0x32, 0x33, 0x33, 0x33, 0x99, 0xf1, 0x98, 0x99, 0x67, 0x0e, 0x7f, 0x21,
0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd,
0xba, 0x0f, 0x5f, 0x2e, 0x01, 0x00, 0x00, 0xc0, 0xa9, 0xaa, 0xaa, 0x6a,
0x54, 0xd4, 0x53, 0x15, 0x58, 0xd6, 0x6d, 0x37, 0x5a, 0x5b, 0xd4, 0x08,
0xb2, 0xb0, 0x9f, 0xd9, 0x2c, 0x08, 0x7b, 0x3b, 0x0c, 0x84, 0x44, 0x6a,
0x08, 0x75, 0x50, 0x67, 0x4d, 0xe0, 0x04, 0xae, 0x38, 0x92, 0x4a, 0x99,
0x8b, 0x1e, 0xbb, 0x5f, 0x89, 0x70, 0x45, 0x82, 0x02, 0xe8, 0xe3, 0xe9,
0xea, 0x60, 0x2a, 0xd0, 0xa1, 0xe3, 0x59, 0x47, 0x01, 0x00, 0x00, 0x00,
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
0x37, 0x1a, 0x49, 0x4d, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x65, 0x66, 0x66, 0xe6, 0x98, 0x99, 0x99, 0x19, 0xcc, 0xa6, 0xcb, 0x4c,
0x35, 0x59, 0x9e, 0xba, 0x03, 0xe4, 0x8a, 0xd3, 0x38, 0x17, 0x42, 0x8a,
0xb2, 0xd7, 0x87, 0x03, 0x87, 0x5b, 0x26, 0x51, 0x01, 0x00, 0x00, 0x40,
0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
0x7e, 0x3d, 0xf2, 0x56, 0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc,
0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45
};
unsigned int constants_2_len = 11904;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,273 @@
#include "poseidon.cuh"
template <typename S>
__global__ void prepare_poseidon_states(S * states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config) {
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
int state_number = idx / config.t;
if (state_number >= number_of_states) {
return;
}
int element_number = idx % config.t;
S prepared_element;
// Domain separation
if (element_number == 0) {
prepared_element = domain_tag;
} else {
prepared_element = states[state_number * config.t + element_number - 1];
}
// Add pre-round constant
prepared_element = prepared_element + config.round_constants[element_number];
// Store element in state
states[idx] = prepared_element;
}
template <typename S>
__device__ __forceinline__ S sbox_alpha_five(S element) {
S result = S::sqr(element);
result = S::sqr(result);
return result * element;
}
template <typename S>
__device__ S vecs_mul_matrix(S element, S * matrix, int element_number, int vec_number, int size, S * shared_states) {
shared_states[threadIdx.x] = element;
__syncthreads();
element = S::zero();
for (int i = 0; i < size; i++) {
element = element + (shared_states[vec_number * size + i] * matrix[i * size + element_number]);
}
__syncthreads();
return element;
}
template <typename S>
__device__ S full_round(S element,
size_t rc_offset,
int local_state_number,
int element_number,
bool multiply_by_mds,
bool add_round_constant,
S * shared_states,
const PoseidonConfiguration<S> config) {
element = sbox_alpha_five(element);
if (add_round_constant) {
element = element + config.round_constants[rc_offset + element_number];
}
// Multiply all the states by mds matrix
S * matrix = multiply_by_mds ? config.mds_matrix : config.non_sparse_matrix;
return vecs_mul_matrix(element, matrix, element_number, local_state_number, config.t, shared_states);
}
// Execute full rounds
template <typename S>
__global__ void full_rounds(S * states, size_t number_of_states, size_t rc_offset, bool first_half, const PoseidonConfiguration<S> config) {
extern __shared__ S shared_states[];
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
int state_number = idx / config.t;
if (state_number >= number_of_states) {
return;
}
int local_state_number = threadIdx.x / config.t;
int element_number = idx % config.t;
for (int i = 0; i < config.full_rounds_half - 1; i++) {
states[idx] = full_round(states[idx],
rc_offset,
local_state_number,
element_number,
true,
true,
shared_states,
config);
rc_offset += config.t;
}
states[idx] = full_round(states[idx],
rc_offset,
local_state_number,
element_number,
!first_half,
first_half,
shared_states,
config);
}
template <typename S>
__device__ S partial_round(S * state,
size_t rc_offset,
int round_number,
const PoseidonConfiguration<S> config) {
S element = state[0];
element = sbox_alpha_five(element);
element = element + config.round_constants[rc_offset];
S * sparse_matrix = &config.sparse_matrices[(config.t * 2 - 1) * round_number];
state[0] = element * sparse_matrix[0];
for (int i = 1; i < config.t; i++) {
state[0] = state[0] + (state[i] * sparse_matrix[i]);
}
for (int i = 1; i < config.t; i++) {
state[i] = state[i] + (element * sparse_matrix[config.t + i - 1]);
}
}
// Execute partial rounds
template <typename S>
__global__ void partial_rounds(S * states, size_t number_of_states, size_t rc_offset, const PoseidonConfiguration<S> config) {
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) {
return;
}
S * state = &states[idx * config.t];
for (int i = 0; i < config.partial_rounds; i++) {
partial_round(state, rc_offset, i, config);
rc_offset++;
}
}
// These function is just doing copy from the states to the output
template <typename S>
__global__ void get_hash_results(S * states, size_t number_of_states, S * out, int t) {
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) {
return;
}
out[idx] = states[idx * t + 1];
}
template <typename S>
__host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream) {
S * states;
// allocate memory for {blocks} states of {t} scalars each
if (cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream) != cudaSuccess) {
throw std::runtime_error("Failed memory allocation on the device");
}
// This is where the input matrix of size Arity x NumberOfBlocks is
// padded and coppied to device in a T x NumberOfBlocks matrix
cudaMemcpy2DAsync(states, this->t * sizeof(S), // Device pointer and device pitch
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
(this->t - 1) * sizeof(S), blocks, // Size of the source matrix (Arity x NumberOfBlocks)
cudaMemcpyHostToDevice, stream);
size_t rc_offset = 0;
// The logic behind this is that 1 thread only works on 1 element
// We have {t} elements in each state, and {blocks} states total
int number_of_threads = (256 / this->t) * this->t;
int hashes_per_block = number_of_threads / this->t;
int total_number_of_threads = blocks * this->t;
int number_of_blocks = total_number_of_threads / number_of_threads +
static_cast<bool>(total_number_of_threads % number_of_threads);
// The partial rounds operates on the whole state, so we define
// the parallelism params for processing a single hash preimage per thread
int singlehash_block_size = 128;
int number_of_singlehash_blocks = blocks / singlehash_block_size + static_cast<bool>(blocks % singlehash_block_size);
// Pick the domain_tag accordinaly
S domain_tag;
switch (hash_type) {
case HashType::ConstInputLen:
domain_tag = this->const_input_no_pad_domain_tag;
break;
case HashType::MerkleTree:
domain_tag = this->tree_domain_tag;
}
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
auto start_time = std::chrono::high_resolution_clock::now();
#endif
// Domain separation and adding pre-round constants
prepare_poseidon_states <<< number_of_blocks, number_of_threads, 0, stream >>> (states, blocks, domain_tag, this->config);
rc_offset += this->t;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Domain separation: " << rc_offset << std::endl;
//print_buffer_from_cuda<S>(states, blocks * this->t);
auto end_time = std::chrono::high_resolution_clock::now();
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// execute half full rounds
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, true, this->config);
rc_offset += this->t * this->config.full_rounds_half;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
// print_buffer_from_cuda<S>(states, blocks * this->t);
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// execute partial rounds
partial_rounds <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, rc_offset, this->config);
rc_offset += this->config.partial_rounds;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
//print_buffer_from_cuda<S>(states, blocks * this->t);
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// execute half full rounds
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, false, this->config);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
//print_buffer_from_cuda<S>(states, blocks * this->t);
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// get output
S * out_device;
cudaMalloc(&out_device, blocks * sizeof(S));
get_hash_results <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, out_device, this->config.t);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Get hash results" << std::endl;
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
#endif
cudaMemcpyAsync(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(out_device, stream);
cudaFreeAsync(states, stream);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaDeviceReset();
#endif
}

View File

@@ -0,0 +1,163 @@
#pragma once
#include "constants.cuh"
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <chrono>
template <typename S>
__host__ void print_buffer_from_cuda(S * device_ptr, size_t size, size_t t) {
S * buffer = static_cast< S * >(malloc(size * sizeof(S)));
cudaMemcpy(buffer, device_ptr, size * sizeof(S), cudaMemcpyDeviceToHost);
std::cout << "Start print" << std::endl;
for(int i = 0; i < size / t; i++) {
std::cout << "State #" << i << std::endl;
for (int j = 0; j < t; j++) {
std::cout << buffer[i * t + j] << std::endl;
}
std::cout << std::endl;
}
std::cout << std::endl;
free(buffer);
}
#endif
#ifdef DEBUG
template <typename S>
__device__ void print_scalar(S element, int data) {
printf("D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
data,
threadIdx.x,
element.limbs_storage.limbs[0],
element.limbs_storage.limbs[1],
element.limbs_storage.limbs[2],
element.limbs_storage.limbs[3],
element.limbs_storage.limbs[4],
element.limbs_storage.limbs[5],
element.limbs_storage.limbs[6],
element.limbs_storage.limbs[7]
);
}
#endif
template <typename S>
struct PoseidonConfiguration {
uint32_t partial_rounds, full_rounds_half, t;
S * round_constants, * mds_matrix, * non_sparse_matrix, *sparse_matrices;
};
template <typename S>
class Poseidon {
public:
uint32_t t;
PoseidonConfiguration<S> config;
enum HashType {
ConstInputLen,
MerkleTree,
};
Poseidon(const uint32_t arity, cudaStream_t stream) {
t = arity + 1;
this->config.t = t;
this->stream = stream;
// Pre-calculate domain tags
// Domain tags will vary for different applications of Poseidon
uint32_t tree_domain_tag_value = 1;
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
tree_domain_tag = S::from(tree_domain_tag_value);
const_input_no_pad_domain_tag = S::one();
// TO-DO: implement binary shifts for scalar type
// const_input_no_pad_domain_tag = S::one() << 64;
// const_input_no_pad_domain_tag *= S::from(arity);
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
uint32_t mds_matrix_len = t * t;
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
// All the constants are stored in a single file
S * constants = load_constants<S>(arity);
S * mds_offset = constants + round_constants_len;
S * non_sparse_offset = mds_offset + mds_matrix_len;
S * sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
#endif
// Create streams for copying constants
cudaStream_t stream_copy_round_constants, stream_copy_mds_matrix, stream_copy_non_sparse, stream_copy_sparse_matrices;
cudaStreamCreate(&stream_copy_round_constants);
cudaStreamCreate(&stream_copy_mds_matrix);
cudaStreamCreate(&stream_copy_non_sparse);
cudaStreamCreate(&stream_copy_sparse_matrices);
// Create events for copying constants
cudaEvent_t event_copied_round_constants, event_copy_mds_matrix, event_copy_non_sparse, event_copy_sparse_matrices;
cudaEventCreateWithFlags(&event_copied_round_constants, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_mds_matrix, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_non_sparse, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_sparse_matrices, cudaEventDisableTiming);
// Malloc memory for copying constants
cudaMallocAsync(&this->config.round_constants, sizeof(S) * round_constants_len, stream_copy_round_constants);
cudaMallocAsync(&this->config.mds_matrix, sizeof(S) * mds_matrix_len, stream_copy_mds_matrix);
cudaMallocAsync(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len, stream_copy_non_sparse);
cudaMallocAsync(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len, stream_copy_sparse_matrices);
// Copy constants
cudaMemcpyAsync(this->config.round_constants, constants,
sizeof(S) * round_constants_len,
cudaMemcpyHostToDevice, stream_copy_round_constants
);
cudaMemcpyAsync(this->config.mds_matrix, mds_offset,
sizeof(S) * mds_matrix_len,
cudaMemcpyHostToDevice, stream_copy_mds_matrix
);
cudaMemcpyAsync(this->config.non_sparse_matrix, non_sparse_offset,
sizeof(S) * mds_matrix_len,
cudaMemcpyHostToDevice, stream_copy_non_sparse
);
cudaMemcpyAsync(this->config.sparse_matrices, sparse_matrices_offset,
sizeof(S) * sparse_matrices_len,
cudaMemcpyHostToDevice, stream_copy_sparse_matrices
);
// Record finished copying event for streams
cudaEventRecord(event_copied_round_constants, stream_copy_round_constants);
cudaEventRecord(event_copy_mds_matrix, stream_copy_mds_matrix);
cudaEventRecord(event_copy_non_sparse, stream_copy_non_sparse);
cudaEventRecord(event_copy_sparse_matrices, stream_copy_sparse_matrices);
// Main stream waits for copying to finish
cudaStreamWaitEvent(stream, event_copied_round_constants);
cudaStreamWaitEvent(stream, event_copy_mds_matrix);
cudaStreamWaitEvent(stream, event_copy_non_sparse);
cudaStreamWaitEvent(stream, event_copy_sparse_matrices);
}
~Poseidon() {
cudaFreeAsync(this->config.round_constants, this->stream);
cudaFreeAsync(this->config.mds_matrix, this->stream);
cudaFreeAsync(this->config.non_sparse_matrix, this->stream);
cudaFreeAsync(this->config.sparse_matrices, this->stream);
}
// Hash multiple preimages in parallel
void hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream);
private:
S tree_domain_tag, const_input_no_pad_domain_tag;
cudaStream_t stream;
};

File diff suppressed because it is too large Load Diff

View File

@@ -19,7 +19,7 @@ __global__ void vectorModMult(S *scalar_vec, E *element_vec, E *result, size_t n
}
template <typename E, typename S>
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments) // TODO: in place so no need for third result vector
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments, cudaStream_t stream) // TODO: in place so no need for third result vector
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
@@ -28,23 +28,24 @@ int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments) // TODO: in
// Allocate memory on the device for the input vectors, the output vector, and the modulus
S *d_vec_a;
E *d_vec_b, *d_result;
cudaMalloc(&d_vec_a, n_elments * sizeof(S));
cudaMalloc(&d_vec_b, n_elments * sizeof(E));
cudaMalloc(&d_result, n_elments * sizeof(E));
cudaMallocAsync(&d_vec_a, n_elments * sizeof(S), stream);
cudaMallocAsync(&d_vec_b, n_elments * sizeof(E), stream);
cudaMallocAsync(&d_result, n_elments * sizeof(E), stream);
// Copy the input vectors and the modulus from the host to the device
cudaMemcpy(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice);
cudaMemcpy(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice);
cudaMemcpyAsync(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice, stream);
// Call the kernel to perform element-wise modular multiplication
vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);
vectorModMult<<<num_blocks, threads_per_block, 0, stream>>>(d_vec_a, d_vec_b, d_result, n_elments);
cudaMemcpy(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost);
cudaMemcpyAsync(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost, stream);
cudaFree(d_vec_a);
cudaFree(d_vec_b);
cudaFree(d_result);
cudaFreeAsync(d_vec_a, stream);
cudaFreeAsync(d_vec_b, stream);
cudaFreeAsync(d_result, stream);
cudaStreamSynchronize(stream);
return 0;
}
@@ -60,12 +61,12 @@ __global__ void batchVectorMult(S *scalar_vec, E *element_vec, unsigned n_scalar
}
template <typename E, typename S>
int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size)
int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size, cudaStream_t stream)
{
// Set the grid and block dimensions
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS>>>(scalar_vec, element_vec, n_scalars, batch_size);
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(scalar_vec, element_vec, n_scalars, batch_size);
return 0;
}
@@ -83,7 +84,7 @@ __global__ void matrixVectorMult(E *matrix_elements, E *vector_elements, E *resu
}
template <typename E>
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim, cudaStream_t stream)
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
@@ -91,23 +92,24 @@ int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t di
// Allocate memory on the device for the input vectors, the output vector, and the modulus
E *d_matrix, *d_vector, *d_result;
cudaMalloc(&d_matrix, (dim * dim) * sizeof(E));
cudaMalloc(&d_vector, dim * sizeof(E));
cudaMalloc(&d_result, dim * sizeof(E));
cudaMallocAsync(&d_matrix, (dim * dim) * sizeof(E), stream);
cudaMallocAsync(&d_vector, dim * sizeof(E), stream);
cudaMallocAsync(&d_result, dim * sizeof(E), stream);
// Copy the input vectors and the modulus from the host to the device
cudaMemcpy(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice);
cudaMemcpy(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice);
cudaMemcpyAsync(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice, stream);
// Call the kernel to perform element-wise modular multiplication
matrixVectorMult<<<num_blocks, threads_per_block>>>(d_matrix, d_vector, d_result, dim);
matrixVectorMult<<<num_blocks, threads_per_block, 0, stream>>>(d_matrix, d_vector, d_result, dim);
cudaMemcpy(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost);
cudaMemcpyAsync(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost, stream);
cudaFree(d_matrix);
cudaFree(d_vector);
cudaFree(d_result);
cudaFreeAsync(d_matrix, stream);
cudaFreeAsync(d_vector, stream);
cudaFreeAsync(d_result, stream);
cudaStreamSynchronize(stream);
return 0;
}
#endif

View File

@@ -6,14 +6,15 @@
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn));
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn), stream);
} else {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn));
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn), stream);
}
}
catch (const std::runtime_error &ex)
@@ -23,11 +24,12 @@ extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size
}
}
extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -37,11 +39,12 @@ extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inv
}
}
extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -50,11 +53,12 @@ extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bo
}
}
extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -63,11 +67,12 @@ extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_s
}
}
extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -76,11 +81,11 @@ extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t
}
}
extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -90,11 +95,12 @@ extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BL
}
extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_evaluations, BLS12_377::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -103,11 +109,11 @@ extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_o
}
}
extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0)
extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -117,11 +123,12 @@ extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out,
}
extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_evaluations, BLS12_377::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -131,12 +138,13 @@ extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t*
}
extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -146,12 +154,13 @@ extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12
}
extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -161,12 +170,13 @@ extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out,
}
extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -176,12 +186,13 @@ extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BL
}
extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -191,11 +202,12 @@ extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_o
}
extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0)
unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -205,11 +217,12 @@ extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_o
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -219,11 +232,12 @@ extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_
}
extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -233,11 +247,12 @@ extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t*
}
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -246,12 +261,13 @@ extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projecti
}
}
extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -261,12 +277,13 @@ extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, in
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -276,12 +293,13 @@ extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* a
}
}
extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -291,12 +309,13 @@ extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr,
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -8,11 +8,11 @@
extern "C"
int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0)
BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false);
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -23,11 +23,14 @@ int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[
}
extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false);
cudaStreamCreate(&stream);
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -46,11 +49,12 @@ extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377:
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0)
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true);
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -70,11 +74,13 @@ extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377:
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -1,19 +1,23 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BLS12_377{
struct fp_config{
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 32;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
static constexpr unsigned modulus_bits_count = 253;
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
static constexpr unsigned modulus_bit_count = 253;
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
@@ -46,6 +50,14 @@ namespace PARAMS_BLS12_377{
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
static constexpr storage_array<omegas_count, limbs_count> omega = {
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
};
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
@@ -78,6 +90,14 @@ namespace PARAMS_BLS12_377{
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
};
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
@@ -110,6 +130,13 @@ namespace PARAMS_BLS12_377{
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
static constexpr storage_array<omegas_count, limbs_count> inv = {
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
};
};
struct fq_config{
@@ -118,10 +145,10 @@ namespace PARAMS_BLS12_377{
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
static constexpr unsigned modulus_bits_count = 377;
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
static constexpr unsigned modulus_bit_count = 377;
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
@@ -130,26 +157,24 @@ namespace PARAMS_BLS12_377{
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
static constexpr storage<limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
static constexpr storage<limbs_count> generator_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
static constexpr storage<limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
static constexpr storage<limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52, 0x57db6b9b, 0x7ea501f5,
0x203e5031, 0xc565f071, 0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
static constexpr storage<limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984, 0x0799c9de, 0xe7223ece,
0x6651cecb, 0x532777ee, 0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
static constexpr storage<limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888, 0xf832d204, 0xe458c282,
0x74b49a58, 0xde03ed72, 0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
static constexpr storage<limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f, 0xcee304c2, 0x2463b01a,
0x3d591bf1, 0x61ef11ac, 0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// TODO: correct parameters for G2 here
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
}

View File

@@ -12,12 +12,13 @@
extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
BLS12_377::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -30,12 +31,13 @@ extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
BLS12_377::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -49,12 +51,13 @@ extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_fla
BLS12_377::scalar_t *input,
BLS12_377::scalar_t *output,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments);
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)

View File

@@ -6,14 +6,15 @@
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn));
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn), stream);
} else {
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn));
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn), stream);
}
}
catch (const std::runtime_error &ex)
@@ -23,11 +24,12 @@ extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size
}
}
extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -37,11 +39,12 @@ extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inv
}
}
extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -50,11 +53,12 @@ extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bo
}
}
extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -63,11 +67,12 @@ extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_s
}
}
extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -76,11 +81,11 @@ extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t
}
}
extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -90,11 +95,12 @@ extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BL
}
extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_evaluations, BLS12_381::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -103,11 +109,11 @@ extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_o
}
}
extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0)
extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -117,11 +123,12 @@ extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out,
}
extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_evaluations, BLS12_381::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -131,12 +138,13 @@ extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t*
}
extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -146,12 +154,13 @@ extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12
}
extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -161,12 +170,13 @@ extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out,
}
extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -176,12 +186,13 @@ extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BL
}
extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -191,11 +202,12 @@ extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_o
}
extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0)
unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -205,11 +217,12 @@ extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_o
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -219,11 +232,12 @@ extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_
}
extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -233,11 +247,12 @@ extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t*
}
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -246,12 +261,13 @@ extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projecti
}
}
extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -261,12 +277,13 @@ extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, in
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -276,12 +293,13 @@ extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* a
}
}
extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -291,12 +309,13 @@ extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr,
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -8,12 +8,11 @@
extern "C"
int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[],
BLS12_381::scalar_t scalars[], size_t count, size_t device_id = 0)
BLS12_381::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false);
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -24,12 +23,13 @@ int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[
}
extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381::affine_t points[],
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false);
cudaStreamCreate(&stream);
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -48,11 +48,12 @@ extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381:
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t device_id = 0)
int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -72,11 +73,13 @@ extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381:
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
{
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -6,6 +6,7 @@ namespace PARAMS_BLS12_381{
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 32;
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
@@ -15,13 +16,13 @@ namespace PARAMS_BLS12_381{
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
static constexpr unsigned modulus_bits_count = 255;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr unsigned modulus_bit_count = 255;
// m = floor(2^(2*modulus_bit_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
@@ -61,6 +62,13 @@ namespace PARAMS_BLS12_381{
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
static constexpr storage_array<omegas_count, limbs_count> omega = {
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
};
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
// Quick fix for linking issue
@@ -97,7 +105,13 @@ namespace PARAMS_BLS12_381{
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
};
// Quick fix for linking issue
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
@@ -131,7 +145,14 @@ namespace PARAMS_BLS12_381{
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
};
static constexpr storage_array<omegas_count, limbs_count> inv = {
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
};
};
struct fq_config {
// field structure size = 12 * 32 bit
@@ -148,19 +169,19 @@ namespace PARAMS_BLS12_381{
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
static constexpr storage<2*limbs_count> modulus_squared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
static constexpr unsigned modulus_bits_count = 381;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr unsigned modulus_bit_count = 381;
// m = floor(2^(2*modulus_bit_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
@@ -169,23 +190,22 @@ namespace PARAMS_BLS12_381{
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
static constexpr storage<limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
static constexpr storage<limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
static constexpr storage<limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
static constexpr storage<limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326, 0x7ae3d177, 0xb4510b64,
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
static constexpr storage<limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
static constexpr storage<limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
static constexpr storage<limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

View File

@@ -0,0 +1,47 @@
#ifndef _BLS12_381_POSEIDON
#define _BLS12_381_POSEIDON
#include <cuda.h>
#include <stdexcept>
#include "../../appUtils/poseidon/poseidon.cu"
#include "curve_config.cuh"
template class Poseidon<BLS12_381::scalar_t>;
extern "C" int poseidon_multi_cuda_bls12_381(BLS12_381::scalar_t input[], BLS12_381::scalar_t* out,
size_t number_of_blocks, int arity, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
// TODO: once we get bindings to pass a stream, we should make {stream} a required parameter and use it instead of
// creating a new stream
if (stream == 0) {
cudaStreamCreate(&stream);
}
cudaEvent_t start_event, end_event;
cudaEventCreate(&start_event);
cudaEventCreate(&end_event);
cudaEventRecord(start_event, stream);
Poseidon<BLS12_381::scalar_t> poseidon(arity, stream);
poseidon.hash_blocks(input, number_of_blocks, out, Poseidon<BLS12_381::scalar_t>::HashType::MerkleTree, stream);
cudaEventRecord(end_event, stream);
cudaEventSynchronize(end_event);
#ifdef DEBUG
float elapsedTime;
cudaEventElapsedTime(&elapsedTime, start_event, end_event);
printf("Time elapsed: %f", elapsedTime);
#endif
cudaEventDestroy(start_event);
cudaEventDestroy(end_event);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,4 +1,5 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"
#include "ve_mod_mult.cu"
#include "poseidon.cu"

View File

@@ -11,12 +11,13 @@
extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
BLS12_381::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -29,12 +30,13 @@ extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
extern "C" int32_t vec_mod_mult_scalar_bls12_381(BLS12_381::scalar_t *inout,
BLS12_381::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -48,12 +50,13 @@ extern "C" int32_t matrix_vec_mod_mult_bls12_381(BLS12_381::scalar_t *matrix_fla
BLS12_381::scalar_t *input,
BLS12_381::scalar_t *output,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments);
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)

View File

@@ -6,14 +6,15 @@
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
if (inverse) {
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega_inv(logn));
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega_inv(logn), stream);
} else {
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega(logn));
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega(logn), stream);
}
}
catch (const std::runtime_error &ex)
@@ -23,11 +24,12 @@ extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32
}
}
extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<BN254::scalar_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<BN254::scalar_t,BN254::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -37,11 +39,12 @@ extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, si
}
}
extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<BN254::projective_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<BN254::projective_t,BN254::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -50,11 +53,12 @@ extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inver
}
}
extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<BN254::scalar_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BN254::scalar_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -63,11 +67,12 @@ extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uin
}
}
extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<BN254::projective_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BN254::projective_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -76,11 +81,11 @@ extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_siz
}
}
extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -90,11 +95,12 @@ extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::sca
}
extern "C" int interpolate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_evaluations, BN254::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -103,11 +109,11 @@ extern "C" int interpolate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN25
}
}
extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, size_t device_id = 0)
extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -117,11 +123,12 @@ extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::
}
extern "C" int interpolate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_evaluations, BN254::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -131,12 +138,13 @@ extern "C" int interpolate_points_batch_cuda_bn254(BN254::projective_t* d_out, B
}
extern "C" int evaluate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -146,12 +154,13 @@ extern "C" int evaluate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar
}
extern "C" int evaluate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -161,12 +170,13 @@ extern "C" int evaluate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::
}
extern "C" int evaluate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -176,12 +186,13 @@ extern "C" int evaluate_points_cuda_bn254(BN254::projective_t* d_out, BN254::pro
}
extern "C" int evaluate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -191,11 +202,12 @@ extern "C" int evaluate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN25
}
extern "C" int evaluate_scalars_on_coset_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
unsigned n, BN254::scalar_t *coset_powers, unsigned device_id = 0)
unsigned n, BN254::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -205,11 +217,12 @@ extern "C" int evaluate_scalars_on_coset_cuda_bn254(BN254::scalar_t* d_out, BN25
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -219,11 +232,12 @@ extern "C" int evaluate_scalars_on_coset_batch_cuda_bn254(BN254::scalar_t* d_out
}
extern "C" int evaluate_points_on_coset_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
unsigned n, BN254::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, BN254::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -233,11 +247,12 @@ extern "C" int evaluate_points_on_coset_cuda_bn254(BN254::projective_t* d_out, B
}
extern "C" int evaluate_points_on_coset_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -246,12 +261,13 @@ extern "C" int evaluate_points_on_coset_batch_cuda_bn254(BN254::projective_t* d_
}
}
extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -261,12 +277,13 @@ extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, siz
}
}
extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -276,12 +293,13 @@ extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int
}
}
extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -291,12 +309,13 @@ extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n,
}
}
extern "C" int reverse_order_points_batch_cuda_bn254(BN254::projective_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_points_batch_cuda_bn254(BN254::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -8,11 +8,11 @@
extern "C"
int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
BN254::scalar_t scalars[], size_t count, size_t device_id = 0)
BN254::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false);
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -23,11 +23,13 @@ int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
}
extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t points[],
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false);
cudaStreamCreate(&stream);
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -46,11 +48,12 @@ extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t po
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t device_id = 0)
int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -70,11 +73,13 @@ extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t po
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -1,10 +1,39 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BN254{
struct fp_config{
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 32;
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_squared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_squared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bit_count = 254;
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
////
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
@@ -21,7 +50,30 @@ namespace PARAMS_BN254{
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
static constexpr storage_array<omegas_count, limbs_count> omega = {
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
};
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
static constexpr storage<limbs_count> omega_inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega_inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega_inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
@@ -38,8 +90,30 @@ namespace PARAMS_BN254{
static constexpr storage<limbs_count> omega_inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega_inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega_inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
////
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
};
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
static constexpr storage<limbs_count> inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
@@ -57,68 +131,12 @@ namespace PARAMS_BN254{
static constexpr storage<limbs_count> inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bits_count = 254;
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
static constexpr storage_array<omegas_count, limbs_count> inv = {
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
};
};
struct fq_config{
@@ -127,10 +145,10 @@ namespace PARAMS_BN254{
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522, 0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45, 0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<2*limbs_count> modulus_wide = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bits_count = 254;
static constexpr storage<2*limbs_count> modulus_squared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bit_count = 254;
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17, 0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
@@ -139,17 +157,15 @@ namespace PARAMS_BN254{
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_x_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_x_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4, 0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
static constexpr storage<limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933, 0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
static constexpr storage<limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769, 0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
static constexpr storage<limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133, 0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// TODO: correct parameters for G2 here
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x24a138e5, 0x3267e6dc, 0x59dbefa3, 0xb5b4c5e5, 0x1be06ac3, 0x81be1899, 0xceb8aaae, 0x2b149d40};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x85c315d2, 0xe4a2bd06, 0xe52d1852, 0xa74fa084, 0xeed8fdf4, 0xcd2cafad, 0x3af0fed4, 0x009713b0};
}

View File

@@ -16,4 +16,4 @@ extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
}
#endif
#endif

View File

@@ -12,14 +12,15 @@
extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
BN254::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -32,14 +33,15 @@ extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
extern "C" int32_t vec_mod_mult_scalar_bn254(BN254::scalar_t *inout,
BN254::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -53,14 +55,15 @@ extern "C" int32_t matrix_vec_mod_mult_bn254(BN254::scalar_t *matrix_flattened,
BN254::scalar_t *input,
BN254::scalar_t *output,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments);
matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)

View File

@@ -1,14 +1,20 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BN254 {
typedef Field<CURVE_NAME_U::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<CURVE_NAME_U::fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, CURVE_NAME_U::group_generator, CURVE_NAME_U::weierstrass_b> projective_t;
namespace ${CURVE_NAME_U} {
typedef Field<PARAMS_${CURVE_NAME_U}::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_${CURVE_NAME_U}::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_${CURVE_NAME_U}::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_re },
point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}

View File

@@ -1,19 +1,20 @@
#ifndef _CURVE_NAME_U_LDE
#define _CURVE_NAME_U_LDE
#ifndef _${CURVE_NAME_U}_LDE
#define _${CURVE_NAME_U}_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" CURVE_NAME_U::scalar_t* build_domain_cuda_CURVE_NAME_L(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
extern "C" ${CURVE_NAME_U}::scalar_t* build_domain_cuda_${CURVE_NAME_L}(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
if (inverse) {
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega_inv(logn));
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega_inv(logn), stream);
} else {
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega(logn));
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega(logn), stream);
}
}
catch (const std::runtime_error &ex)
@@ -23,11 +24,12 @@ extern "C" CURVE_NAME_U::scalar_t* build_domain_cuda_CURVE_NAME_L(uint32_t domai
}
}
extern "C" int ntt_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ntt_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<${CURVE_NAME_U}::scalar_t,${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -37,11 +39,12 @@ extern "C" int ntt_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t n, bo
}
}
extern "C" int ecntt_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
extern "C" int ecntt_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_template<${CURVE_NAME_U}::projective_t,${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -50,11 +53,12 @@ extern "C" int ecntt_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t
}
}
extern "C" int ntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ntt_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<${CURVE_NAME_U}::scalar_t,${CURVE_NAME_U}::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -63,11 +67,12 @@ extern "C" int ntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t
}
}
extern "C" int ecntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
extern "C" int ecntt_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return ntt_end2end_batch_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<${CURVE_NAME_U}::projective_t,${CURVE_NAME_U}::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
@@ -76,11 +81,11 @@ extern "C" int ecntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, ui
}
}
extern "C" int interpolate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
extern "C" int interpolate_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_evaluations, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -89,12 +94,13 @@ extern "C" int interpolate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_o
}
}
extern "C" int interpolate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
extern "C" int interpolate_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_evaluations, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -103,11 +109,11 @@ extern "C" int interpolate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_
}
}
extern "C" int interpolate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, size_t device_id = 0)
extern "C" int interpolate_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_evaluations, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
@@ -116,12 +122,13 @@ extern "C" int interpolate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t*
}
}
extern "C" int interpolate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
extern "C" int interpolate_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_evaluations, ${CURVE_NAME_U}::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
@@ -130,13 +137,14 @@ extern "C" int interpolate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projecti
}
}
extern "C" int evaluate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
extern "C" int evaluate_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
${CURVE_NAME_U}::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -145,13 +153,14 @@ extern "C" int evaluate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out,
}
}
extern "C" int evaluate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
extern "C" int evaluate_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
${CURVE_NAME_U}::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -160,13 +169,14 @@ extern "C" int evaluate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t*
}
}
extern "C" int evaluate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
extern "C" int evaluate_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
${CURVE_NAME_U}::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -175,13 +185,14 @@ extern "C" int evaluate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_o
}
}
extern "C" int evaluate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
extern "C" int evaluate_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
${CURVE_NAME_U}::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
@@ -190,12 +201,13 @@ extern "C" int evaluate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_
}
}
extern "C" int evaluate_scalars_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, unsigned device_id = 0)
extern "C" int evaluate_scalars_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned domain_size,
unsigned n, ${CURVE_NAME_U}::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -204,12 +216,13 @@ extern "C" int evaluate_scalars_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
extern "C" int evaluate_scalars_on_coset_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -218,12 +231,13 @@ extern "C" int evaluate_scalars_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::s
}
}
extern "C" int evaluate_points_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
extern "C" int evaluate_points_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned domain_size,
unsigned n, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -232,12 +246,13 @@ extern "C" int evaluate_points_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::projecti
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
extern "C" int evaluate_points_on_coset_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
@@ -246,12 +261,13 @@ extern "C" int evaluate_points_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::pr
}
}
extern "C" int reverse_order_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -261,12 +277,13 @@ extern "C" int reverse_order_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* a
}
}
extern "C" int reverse_order_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -276,12 +293,13 @@ extern "C" int reverse_order_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scala
}
}
extern "C" int reverse_order_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, size_t device_id = 0)
extern "C" int reverse_order_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -291,12 +309,13 @@ extern "C" int reverse_order_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t
}
}
extern "C" int reverse_order_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, int batch_size, size_t device_id = 0)
extern "C" int reverse_order_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -1,24 +1,17 @@
#ifndef _CURVE_NAME_U_MSM
#define _CURVE_NAME_U_MSM
#ifndef _${CURVE_NAME_U}_MSM
#define _${CURVE_NAME_U}_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *out, CURVE_NAME_U::affine_t points[],
CURVE_NAME_U::scalar_t scalars[], size_t count, size_t device_id = 0)
int msm_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *out, ${CURVE_NAME_U}::affine_t points[],
${CURVE_NAME_U}::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
if (count>256){
large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
}
else{
short_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
}
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -28,12 +21,14 @@ int msm_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *out, CURVE_NAME_U::affine_
}
}
extern "C" int msm_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* out, CURVE_NAME_U::affine_t points[],
CURVE_NAME_U::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
extern "C" int msm_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* out, ${CURVE_NAME_U}::affine_t points[],
${CURVE_NAME_U}::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, batch_size, msm_size, out, false);
cudaStreamCreate(&stream);
batched_large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
}
@@ -53,11 +48,12 @@ extern "C" int msm_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* out, CURV
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t device_id = 0)
int commit_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)
@@ -77,11 +73,13 @@ extern "C" int msm_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* out, CURV
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
int commit_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)

View File

@@ -0,0 +1,66 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_${curve_name_U} {
struct fp_config {
static constexpr unsigned limbs_count = ${fp_num_limbs};
static constexpr unsigned omegas_count = ${num_omegas};
static constexpr unsigned modulus_bit_count = ${fp_modulus_bit_count};
static constexpr storage<limbs_count> modulus = {${fp_modulus}};
static constexpr storage<limbs_count> modulus_2 = {${fp_modulus_2}};
static constexpr storage<limbs_count> modulus_4 = {${fp_modulus_4}};
static constexpr storage<2*limbs_count> modulus_wide = {${fp_modulus_wide}};
static constexpr storage<2*limbs_count> modulus_squared = {${fp_modulus_squared}};
static constexpr storage<2*limbs_count> modulus_squared_2 = {${fp_modulus_squared_2}};
static constexpr storage<2*limbs_count> modulus_squared_4 = {${fp_modulus_squared_4}};
static constexpr storage<limbs_count> m = {${fp_m}};
static constexpr storage<limbs_count> one = {${fp_one}};
static constexpr storage<limbs_count> zero = {${fp_zero}};
static constexpr storage_array<omegas_count, limbs_count> omega = { {
${omega}
} };
static constexpr storage_array<omegas_count, limbs_count> omega_inv = { {
${omega_inv}
} };
static constexpr storage_array<omegas_count, limbs_count> inv = { {
${inv}
} };
};
struct fq_config {
static constexpr unsigned limbs_count = ${fq_num_limbs};
static constexpr unsigned modulus_bit_count = ${fq_modulus_bit_count};
static constexpr storage<limbs_count> modulus = {${fq_modulus}};
static constexpr storage<limbs_count> modulus_2 = {${fq_modulus_2}};
static constexpr storage<limbs_count> modulus_4 = {${fq_modulus_4}};
static constexpr storage<2*limbs_count> modulus_wide = {${fq_modulus_wide}};
static constexpr storage<2*limbs_count> modulus_squared = {${fq_modulus_squared}};
static constexpr storage<2*limbs_count> modulus_squared_2 = {${fq_modulus_squared_2}};
static constexpr storage<2*limbs_count> modulus_squared_4 = {${fq_modulus_squared_4}};
static constexpr storage<limbs_count> m = {${fq_m}};
static constexpr storage<limbs_count> one = {${fq_one}};
static constexpr storage<limbs_count> zero = {${fq_zero}};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> g1_gen_x = {${fq_gen_x}};
static constexpr storage<limbs_count> g1_gen_y = {${fq_gen_y}};
static constexpr storage<limbs_count> g2_gen_x_re = {${fq_gen_x_re}};
static constexpr storage<limbs_count> g2_gen_x_im = {${fq_gen_x_im}};
static constexpr storage<limbs_count> g2_gen_y_re = {${fq_gen_y_re}};
static constexpr storage<limbs_count> g2_gen_y_im = {${fq_gen_y_im}};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {${weier_b}};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {${weier_b_g2_re}};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {${weier_b_g2_im}};
}

View File

@@ -2,7 +2,18 @@
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2, size_t device_id = 0)
extern "C" bool eq_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *point1, ${CURVE_NAME_U}::projective_t *point2)
{
return (*point1 == *point2);
}
return (*point1 == *point2) &&
!((point1->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::point_field_t::zero())) &&
!((point2->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *point1, ${CURVE_NAME_U}::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::g2_point_field_t::zero())) &&
!((point2->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::g2_point_field_t::zero()));
}
#endif

View File

@@ -1,5 +1,5 @@
#ifndef _CURVE_NAME_U_VEC_MULT
#define _CURVE_NAME_U_VEC_MULT
#ifndef _${CURVE_NAME_U}_VEC_MULT
#define _${CURVE_NAME_U}_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
@@ -8,16 +8,16 @@
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_CURVE_NAME_L(CURVE_NAME_U::projective_t *inout,
CURVE_NAME_U::scalar_t *scalar_vec,
extern "C" int32_t vec_mod_mult_point_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *inout,
${CURVE_NAME_U}::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
vector_mod_mult<CURVE_NAME_U::projective_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -27,15 +27,16 @@ extern "C" int32_t vec_mod_mult_point_CURVE_NAME_L(CURVE_NAME_U::projective_t *i
}
}
extern "C" int32_t vec_mod_mult_scalar_CURVE_NAME_L(CURVE_NAME_U::scalar_t *inout,
CURVE_NAME_U::scalar_t *scalar_vec,
extern "C" int32_t vec_mod_mult_scalar_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *inout,
${CURVE_NAME_U}::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
vector_mod_mult<CURVE_NAME_U::scalar_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -45,16 +46,17 @@ extern "C" int32_t vec_mod_mult_scalar_CURVE_NAME_L(CURVE_NAME_U::scalar_t *inou
}
}
extern "C" int32_t matrix_vec_mod_mult_CURVE_NAME_L(CURVE_NAME_U::scalar_t *matrix_flattened,
CURVE_NAME_U::scalar_t *input,
CURVE_NAME_U::scalar_t *output,
extern "C" int32_t matrix_vec_mod_mult_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *matrix_flattened,
${CURVE_NAME_U}::scalar_t *input,
${CURVE_NAME_U}::scalar_t *output,
size_t n_elments,
size_t device_id)
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: device_id
matrix_mod_mult<CURVE_NAME_U::scalar_t>(matrix_flattened, input, output, n_elments);
matrix_mod_mult<${CURVE_NAME_U}::scalar_t>(matrix_flattened, input, output, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)

View File

@@ -48,11 +48,11 @@ template <typename CONFIG> class ExtensionField {
}
static constexpr HOST_DEVICE_INLINE ExtensionField generator_x() {
return ExtensionField { FF { CONFIG::generator_x_re }, FF { CONFIG::generator_x_im } };
return ExtensionField { FF { CONFIG::g2_gen_x_re }, FF { CONFIG::g2_gen_x_im } };
}
static constexpr HOST_DEVICE_INLINE ExtensionField generator_y() {
return ExtensionField { FF { CONFIG::generator_y_re }, FF { CONFIG::generator_y_im } };
return ExtensionField { FF { CONFIG::g2_gen_y_re }, FF { CONFIG::g2_gen_y_im } };
}

View File

@@ -5,6 +5,9 @@
#include "../utils/host_math.cuh"
#include <random>
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#define HOST_INLINE __host__ __forceinline__
#define DEVICE_INLINE __device__ __forceinline__
@@ -13,7 +16,7 @@
template <class CONFIG> class Field {
public:
static constexpr unsigned TLC = CONFIG::limbs_count;
static constexpr unsigned NBITS = CONFIG::modulus_bits_count;
static constexpr unsigned NBITS = CONFIG::modulus_bit_count;
static constexpr HOST_DEVICE_INLINE Field zero() {
return Field { CONFIG::zero };
@@ -23,232 +26,60 @@ template <class CONFIG> class Field {
return Field { CONFIG::one };
}
static constexpr HOST_DEVICE_INLINE Field from(uint32_t value) {
storage<TLC> scalar;
scalar.limbs[0] = value;
for (int i = 1; i < TLC; i++) {
scalar.limbs[i] = 0;
}
return Field { scalar };
}
static constexpr HOST_DEVICE_INLINE Field generator_x() {
return Field { CONFIG::generator_x };
return Field { CONFIG::g1_gen_x };
}
static constexpr HOST_DEVICE_INLINE Field generator_y() {
return Field { CONFIG::generator_y };
return Field { CONFIG::g1_gen_y };
}
static constexpr HOST_INLINE Field omega(uint32_t log_size) {
// Quick fix to linking issue, permanent fix will follow
switch (log_size) {
case 0:
return Field { CONFIG::one };
case 1:
return Field { CONFIG::omega1 };
case 2:
return Field { CONFIG::omega2 };
case 3:
return Field { CONFIG::omega3 };
case 4:
return Field { CONFIG::omega4 };
case 5:
return Field { CONFIG::omega5 };
case 6:
return Field { CONFIG::omega6 };
case 7:
return Field { CONFIG::omega7 };
case 8:
return Field { CONFIG::omega8 };
case 9:
return Field { CONFIG::omega9 };
case 10:
return Field { CONFIG::omega10 };
case 11:
return Field { CONFIG::omega11 };
case 12:
return Field { CONFIG::omega12 };
case 13:
return Field { CONFIG::omega13 };
case 14:
return Field { CONFIG::omega14 };
case 15:
return Field { CONFIG::omega15 };
case 16:
return Field { CONFIG::omega16 };
case 17:
return Field { CONFIG::omega17 };
case 18:
return Field { CONFIG::omega18 };
case 19:
return Field { CONFIG::omega19 };
case 20:
return Field { CONFIG::omega20 };
case 21:
return Field { CONFIG::omega21 };
case 22:
return Field { CONFIG::omega22 };
case 23:
return Field { CONFIG::omega23 };
case 24:
return Field { CONFIG::omega24 };
case 25:
return Field { CONFIG::omega25 };
case 26:
return Field { CONFIG::omega26 };
case 27:
return Field { CONFIG::omega27 };
case 28:
return Field { CONFIG::omega28 };
case 29:
return Field { CONFIG::omega29 };
case 30:
return Field { CONFIG::omega30 };
case 31:
return Field { CONFIG::omega31 };
case 32:
return Field { CONFIG::omega32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::omega[log_size-1] };
static HOST_INLINE Field omega(uint32_t logn) {
if (logn == 0) {
return Field { CONFIG::one };
}
if (logn > CONFIG::omegas_count) {
throw std::invalid_argument( "Field: Invalid omega index" );
}
storage_array<CONFIG::omegas_count, TLC> const omega = CONFIG::omega;
return Field { omega.storages[logn-1] };
}
static HOST_INLINE Field omega_inv(uint32_t logn) {
if (logn == 0) {
return Field { CONFIG::one };
}
if (logn > CONFIG::omegas_count) {
throw std::invalid_argument( "Field: Invalid omega_inv index" );
}
storage_array<CONFIG::omegas_count, TLC> const omega_inv = CONFIG::omega_inv;
return Field { omega_inv.storages[logn-1] };
}
static constexpr HOST_INLINE Field omega_inv(uint32_t log_size) {
// Quick fix to linking issue, permanent fix will follow
switch (log_size) {
case 0:
return Field { CONFIG::one };
case 1:
return Field { CONFIG::omega_inv1 };
case 2:
return Field { CONFIG::omega_inv2 };
case 3:
return Field { CONFIG::omega_inv3 };
case 4:
return Field { CONFIG::omega_inv4 };
case 5:
return Field { CONFIG::omega_inv5 };
case 6:
return Field { CONFIG::omega_inv6 };
case 7:
return Field { CONFIG::omega_inv7 };
case 8:
return Field { CONFIG::omega_inv8 };
case 9:
return Field { CONFIG::omega_inv9 };
case 10:
return Field { CONFIG::omega_inv10 };
case 11:
return Field { CONFIG::omega_inv11 };
case 12:
return Field { CONFIG::omega_inv12 };
case 13:
return Field { CONFIG::omega_inv13 };
case 14:
return Field { CONFIG::omega_inv14 };
case 15:
return Field { CONFIG::omega_inv15 };
case 16:
return Field { CONFIG::omega_inv16 };
case 17:
return Field { CONFIG::omega_inv17 };
case 18:
return Field { CONFIG::omega_inv18 };
case 19:
return Field { CONFIG::omega_inv19 };
case 20:
return Field { CONFIG::omega_inv20 };
case 21:
return Field { CONFIG::omega_inv21 };
case 22:
return Field { CONFIG::omega_inv22 };
case 23:
return Field { CONFIG::omega_inv23 };
case 24:
return Field { CONFIG::omega_inv24 };
case 25:
return Field { CONFIG::omega_inv25 };
case 26:
return Field { CONFIG::omega_inv26 };
case 27:
return Field { CONFIG::omega_inv27 };
case 28:
return Field { CONFIG::omega_inv28 };
case 29:
return Field { CONFIG::omega_inv29 };
case 30:
return Field { CONFIG::omega_inv30 };
case 31:
return Field { CONFIG::omega_inv31 };
case 32:
return Field { CONFIG::omega_inv32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::omega_inv[log_size-1] };
}
static HOST_INLINE Field inv_log_size(uint32_t logn) {
if (logn == 0) {
return Field { CONFIG::one };
}
static constexpr HOST_INLINE Field inv_log_size(uint32_t log_size) {
// Quick fix to linking issue, permanent fix will follow
switch (log_size) {
case 1:
return Field { CONFIG::inv1 };
case 2:
return Field { CONFIG::inv2 };
case 3:
return Field { CONFIG::inv3 };
case 4:
return Field { CONFIG::inv4 };
case 5:
return Field { CONFIG::inv5 };
case 6:
return Field { CONFIG::inv6 };
case 7:
return Field { CONFIG::inv7 };
case 8:
return Field { CONFIG::inv8 };
case 9:
return Field { CONFIG::inv9 };
case 10:
return Field { CONFIG::inv10 };
case 11:
return Field { CONFIG::inv11 };
case 12:
return Field { CONFIG::inv12 };
case 13:
return Field { CONFIG::inv13 };
case 14:
return Field { CONFIG::inv14 };
case 15:
return Field { CONFIG::inv15 };
case 16:
return Field { CONFIG::inv16 };
case 17:
return Field { CONFIG::inv17 };
case 18:
return Field { CONFIG::inv18 };
case 19:
return Field { CONFIG::inv19 };
case 20:
return Field { CONFIG::inv20 };
case 21:
return Field { CONFIG::inv21 };
case 22:
return Field { CONFIG::inv22 };
case 23:
return Field { CONFIG::inv23 };
case 24:
return Field { CONFIG::inv24 };
case 25:
return Field { CONFIG::inv25 };
case 26:
return Field { CONFIG::inv26 };
case 27:
return Field { CONFIG::inv27 };
case 28:
return Field { CONFIG::inv28 };
case 29:
return Field { CONFIG::inv29 };
case 30:
return Field { CONFIG::inv30 };
case 31:
return Field { CONFIG::inv31 };
case 32:
return Field { CONFIG::inv32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::inv[log_size-1] };
if (logn > CONFIG::omegas_count) {
throw std::invalid_argument( "Field: Invalid inv index" );
}
storage_array<CONFIG::omegas_count, TLC> const inv = CONFIG::inv;
return Field { inv.storages[logn-1] };
}
static constexpr HOST_DEVICE_INLINE Field modulus() {
@@ -505,7 +336,6 @@ template <class CONFIG> class Field {
static HOST_INLINE Field rand_host() {
std::random_device rd;
std::mt19937_64 generator(rd());
// std::mt19937_64 generator(0);
std::uniform_int_distribution<unsigned> distribution;
Field value{};
for (unsigned i = 0; i < TLC; i++)
@@ -524,10 +354,14 @@ template <class CONFIG> class Field {
}
friend std::ostream& operator<<(std::ostream& os, const Field& xs) {
os << "{";
for (int i = 0; i < TLC; i++)
os << xs.limbs_storage.limbs[i] << ", ";
os << "}";
std::stringstream hex_string;
hex_string << std::hex << std::setfill('0');
for (int i = 0; i < TLC; i++) {
hex_string << std::setw(8) << xs.limbs_storage.limbs[i];
}
os << "0x" << hex_string.str();
return os;
}

View File

@@ -7,3 +7,7 @@ template <unsigned LIMBS_COUNT> struct __align__(LIMBS_ALIGNMENT(LIMBS_COUNT)) s
static constexpr unsigned LC = LIMBS_COUNT;
uint32_t limbs[LIMBS_COUNT];
};
template <unsigned OMEGAS_COUNT, unsigned LIMBS_COUNT> struct __align__(LIMBS_ALIGNMENT(LIMBS_COUNT)) storage_array {
storage<LIMBS_COUNT> storages[OMEGAS_COUNT];
};

View File

@@ -4,6 +4,7 @@ use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine
use ark_ec::AffineCurve;
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
use serde::{Serialize, Deserialize};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
@@ -53,6 +54,25 @@ pub const SCALAR_LIMBS_BLS12_381: usize = 8;
pub type BaseField_BLS12_381 = Field_BLS12_381<BASE_LIMBS_BLS12_381>;
pub type ScalarField_BLS12_381 = Field_BLS12_381<SCALAR_LIMBS_BLS12_381>;
impl Serialize for ScalarField_BLS12_381 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
self.s.serialize(serializer)
}
}
impl<'de> Deserialize<'de> for ScalarField_BLS12_381 {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
let s = <[u32; SCALAR_LIMBS_BLS12_381] as Deserialize<'de>>::deserialize(deserializer)?;
Ok(ScalarField_BLS12_381 { s })
}
}
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {

View File

@@ -14,6 +14,15 @@ use rustacuda_core::DevicePointer;
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
extern "C" {
fn poseidon_multi_cuda_bls12_381(
input: *const ScalarField_BLS12_381,
out: *mut ScalarField_BLS12_381,
number_of_blocks: usize,
arity: c_uint,
device_id: usize,
stream: usize // TODO: provide a real stream
) -> c_uint;
fn msm_cuda_bls12_381(
out: *mut Point_BLS12_381,
points: *const PointAffineNoInfinity_BLS12_381,
@@ -226,6 +235,28 @@ extern "C" {
) -> c_int;
}
pub fn poseidon_multi_bls12_381(input: &[ScalarField_BLS12_381], arity: usize, device_id: usize) -> Result<Vec<ScalarField_BLS12_381>, std::io::Error> {
let number_of_blocks = input.len() / arity;
let mut out = vec![ScalarField_BLS12_381::zero(); number_of_blocks];
unsafe {
let res = poseidon_multi_cuda_bls12_381(
input as *const _ as *const ScalarField_BLS12_381,
out.as_mut_slice() as *mut _ as *mut ScalarField_BLS12_381,
number_of_blocks,
arity as c_uint,
device_id,
0
);
// TO-DO: go for better expression of error types
if res != 0 {
return Err(std::io::Error::new(std::io::ErrorKind::InvalidInput, "Error executing poseidon_multi"));
}
}
Ok(out)
}
pub fn msm_bls12_381(points: &[PointAffineNoInfinity_BLS12_381], scalars: &[ScalarField_BLS12_381], device_id: usize) -> Point_BLS12_381 {
let count = points.len();
if count != scalars.len() {
@@ -732,7 +763,7 @@ pub fn clone_buffer_bls12_381<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> Devic
return buf_cpy;
}
pub fn get_rng_bls12_381(seed: Option<u64>) -> Box<dyn RngCore> {
pub fn get_rng_bls12_381(seed: Option<u64>) -> Box<dyn RngCore> { //TODO: not curve specific
let rng: Box<dyn RngCore> = match seed {
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
None => Box::new(rand::thread_rng()),
@@ -796,7 +827,9 @@ pub fn set_up_scalars_bls12_381(test_size: usize, log_domain_size: usize, invers
#[cfg(test)]
pub(crate) mod tests_bls12_381 {
use std::fs::{File, self};
use std::ops::Add;
use std::path::PathBuf;
use ark_bls12_381::{Fr, G1Affine, G1Projective};
use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
use ark_ff::{FftField, Field, Zero};
@@ -891,7 +924,24 @@ pub(crate) mod tests_bls12_381 {
);
}
}
#[test]
fn test_poseidon() {
let arities = [2, 4, 8, 11];
let number_of_blocks = 1024;
for arity in arities {
// Generate scalars sequence [0, 1, ... arity * number_of_blocks]
let scalars: Vec<ScalarField_BLS12_381> = (0..arity * number_of_blocks).map(|i| ScalarField_BLS12_381::from_ark(Fr_BLS12_381::from(i as i32).into_repr())).collect();
let out = poseidon_multi_bls12_381(&scalars, arity, 0).unwrap();
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push(format!("test_vectors/poseidon_1024_{}", arity));
let test_vector: Vec<ScalarField_BLS12_381> = serde_cbor::from_slice(&fs::read(path).unwrap()).unwrap();
assert!(out.iter().zip(test_vector.iter()).all(|(l, r)| l.eq(r)));
}
}
#[test]
fn test_batch_msm() {

View File

@@ -1,3 +1,7 @@
use rand::RngCore;
use rand::rngs::StdRng;
use rand::SeedableRng;
pub fn from_limbs<T>(limbs: Vec<u32>, chunk_size: usize, f: fn(&[u32]) -> T) -> Vec<T> {
let points = limbs
.chunks(chunk_size)
@@ -29,6 +33,14 @@ pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
arr_u32
}
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> { //TOOD: this func is universal
let rng: Box<dyn RngCore> = match seed {
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
None => Box::new(rand::thread_rng()),
};
rng
}
#[cfg(test)]
mod tests {
use ark_ff::BigInteger256;

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.