mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-11 16:38:27 -05:00
Compare commits
7 Commits
rust-inter
...
mod_mult_o
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a2f221dda1 | ||
|
|
c166b6d4d5 | ||
|
|
ecc3970c12 | ||
|
|
a20f603f6f | ||
|
|
32a178bc27 | ||
|
|
8d094bd5fb | ||
|
|
08c34a5183 |
@@ -4,4 +4,4 @@ This PR...
|
||||
|
||||
## Linked Issues
|
||||
|
||||
Resolves #
|
||||
Closes #
|
||||
5
.github/workflows/build.yml
vendored
5
.github/workflows/build.yml
vendored
@@ -2,9 +2,7 @@ name: Build
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- "main"
|
||||
- "dev"
|
||||
branches: [ "main" ]
|
||||
paths:
|
||||
- "icicle/**"
|
||||
- "src/**"
|
||||
@@ -14,7 +12,6 @@ on:
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ARCH_TYPE: sm_70
|
||||
DEFAULT_STREAM: per-thread
|
||||
|
||||
jobs:
|
||||
build-linux:
|
||||
|
||||
41
Cargo.toml
41
Cargo.toml
@@ -1,9 +1,42 @@
|
||||
[workspace]
|
||||
name = "icicle"
|
||||
[package]
|
||||
name = "icicle-utils"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
description = "An implementation of the Ingonyama CUDA Library"
|
||||
homepage = "https://www.ingonyama.com"
|
||||
repository = "https://github.com/ingonyama-zk/icicle"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
path = "benches/ntt.rs"
|
||||
harness = false
|
||||
|
||||
members = ["icicle-core", "bls12-381", "bls12-377", "bn254"]
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
path = "benches/msm.rs"
|
||||
harness = false
|
||||
|
||||
[dependencies]
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-381 = { version = "0.3.0", optional = true }
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
default = ["bls12_381"]
|
||||
bls12_381 = ["ark-bls12-381/curve"]
|
||||
|
||||
53
README.md
53
README.md
@@ -23,8 +23,8 @@ ICICLE is a CUDA implementation of general functions widely used in ZKP. ICICLE
|
||||
- Affine: {x, y}
|
||||
- Curves
|
||||
- [BLS12-381]
|
||||
- [BLS12-377]
|
||||
- [BN254]
|
||||
|
||||
> NOTE: _Support for BN254 and BLS12-377 are planned_
|
||||
|
||||
## Build and usage
|
||||
|
||||
@@ -89,55 +89,6 @@ The flag `--test-threads=1` is needed because currently some tests might interfe
|
||||
|
||||
An example of using the Rust bindings library can be found in our [fast-danksharding implementation][FDI]
|
||||
|
||||
### Supporting Additional Curves
|
||||
|
||||
Supporting additional curves can be done as follows:
|
||||
|
||||
Create a JSON file with the curve parameters. The curve is defined by the following parameters:
|
||||
- ``curve_name`` - e.g. ``bls12_381``.
|
||||
- ``modolus_p`` - scalar field modolus (in decimal).
|
||||
- ``bit_count_p`` - number of bits needed to represent `` modolus_p`` .
|
||||
- ``limb_p`` - number of bytes needed to represent `` modolus_p`` (rounded).
|
||||
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
|
||||
- ``modolus_q`` - base field modulus (in decimal).
|
||||
- ``bit_count_q`` - number of bits needed to represent `` modolus_q`` .
|
||||
- ``limb_q`` number of bytes needed to represent `` modolus_p`` (rounded).
|
||||
- ``weierstrass_b`` - Weierstrauss constant of the curve.
|
||||
- ``gen_x`` - x-value of a generator element for the curve.
|
||||
- ``gen_y`` - y-value of a generator element for the curve.
|
||||
|
||||
Here's an example for BLS12-381.
|
||||
```
|
||||
{
|
||||
"curve_name" : "bls12_381",
|
||||
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"bit_count_p" : 255,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"bit_count_q" : 381,
|
||||
"limb_q" : 12,
|
||||
"weierstrass_b" : 4,
|
||||
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
|
||||
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
|
||||
}
|
||||
```
|
||||
|
||||
Save the parameters JSON file in ``curve_parameters``.
|
||||
|
||||
Then run the Python script ``new_curve_script.py `` from the main icicle folder:
|
||||
|
||||
```
|
||||
python3 ./curve_parameters/new_curve_script_rust.py ./curve_parameters/bls12_381.json
|
||||
```
|
||||
|
||||
The script does the following:
|
||||
- Creates a folder in ``icicle/curves`` with the curve name, which contains all of the files needed for the supported operations in cuda.
|
||||
- Adds the curve exported operations to ``icicle/curves/index.cu``.
|
||||
- Creates a file with the curve name in ``src/curves`` with the relevant objects for the curve.
|
||||
- Creates a test file with the curve name in ``src``.
|
||||
|
||||
Testing the new curve could be done by running the tests in ``tests_curve_name`` (e.g. ``tests_bls12_381``).
|
||||
## Contributions
|
||||
|
||||
Join our [Discord Server](https://discord.gg/Y4SkbDf2Ff) and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
|
||||
|
||||
32
benches/msm.rs
Normal file
32
benches/msm.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
extern crate criterion;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng};
|
||||
use rustacuda::prelude::*;
|
||||
|
||||
|
||||
const LOG_MSM_SIZES: [usize; 1] = [12];
|
||||
const BATCH_SIZES: [usize; 2] = [128, 256];
|
||||
|
||||
fn bench_msm(c: &mut Criterion) {
|
||||
for log_msm_size in LOG_MSM_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let msm_size = 1 << log_msm_size;
|
||||
let (scalars, _, _) = set_up_scalars(msm_size, 0, false);
|
||||
let batch_scalars = vec![scalars; batch_size].concat();
|
||||
let mut d_scalars = DeviceBuffer::from_slice(&batch_scalars[..]).unwrap();
|
||||
let points = generate_random_points(msm_size, get_rng(None));
|
||||
let batch_points = vec![points; batch_size].concat();
|
||||
let mut d_points = DeviceBuffer::from_slice(&batch_points[..]).unwrap();
|
||||
|
||||
c.bench_function(
|
||||
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch(&mut d_points, &mut d_scalars, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(msm_benches, bench_msm);
|
||||
criterion_main!(msm_benches);
|
||||
40
benches/ntt.rs
Normal file
40
benches/ntt.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
extern crate criterion;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use icicle_utils::{interpolate_scalars_batch, interpolate_points_batch, set_up_scalars, set_up_points};
|
||||
|
||||
|
||||
const LOG_NTT_SIZES: [usize; 1] = [15];
|
||||
const BATCH_SIZES: [usize; 2] = [8, 16];
|
||||
|
||||
fn bench_point_ntt(c: &mut Criterion) {
|
||||
for log_ntt_size in LOG_NTT_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let ntt_size = 1 << log_ntt_size;
|
||||
let (_, mut d_evals, mut d_domain) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
c.bench_function(
|
||||
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_scalar_ntt(c: &mut Criterion) {
|
||||
for log_ntt_size in LOG_NTT_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let ntt_size = 1 << log_ntt_size;
|
||||
let (_, mut d_evals, mut d_domain) = set_up_scalars(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
c.bench_function(
|
||||
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(ntt_benches, bench_point_ntt, bench_scalar_ntt);
|
||||
criterion_main!(ntt_benches);
|
||||
@@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "bls12-377"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { path = "../icicle-core" }
|
||||
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-377 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
g2 = []
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,106 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
|
||||
use crate::basic_structs::scalar::ScalarT;
|
||||
use crate::basic_structs::field::Field;
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarField;
|
||||
impl Field<8> for ScalarField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct BaseField;
|
||||
impl Field<12> for BaseField {
|
||||
const MODOLUS: [u32; 12] = [0x0;12];
|
||||
}
|
||||
|
||||
|
||||
pub type Scalar = ScalarT<ScalarField,8>;
|
||||
impl Default for Scalar {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Scalar{}
|
||||
|
||||
|
||||
pub type Base = ScalarT<BaseField,12>;
|
||||
impl Default for Base {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Base{}
|
||||
|
||||
pub type Point = PointT<Base>;
|
||||
pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
@@ -1,798 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
extern "C" {
|
||||
fn msm_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn msm_batch_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
batch_size: usize,
|
||||
msm_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
|
||||
|
||||
fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ntt_batch_cuda(
|
||||
inout: *mut Scalar,
|
||||
arr_size: usize,
|
||||
n: usize,
|
||||
inverse: bool,
|
||||
) -> c_int;
|
||||
|
||||
fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
|
||||
|
||||
fn interpolate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_batch_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_batch_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_point(
|
||||
inout: *mut Point,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_scalar(
|
||||
inout: *mut Scalar,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn matrix_vec_mod_mult(
|
||||
matrix_flattened: *const Scalar,
|
||||
input: *const Scalar,
|
||||
output: *mut Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
}
|
||||
|
||||
pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = Point::zero();
|
||||
unsafe {
|
||||
msm_cuda(
|
||||
&mut ret as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
scalars.len(),
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn msm_batch(
|
||||
points: &[PointAffineNoInfinity],
|
||||
scalars: &[Scalar],
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> Vec<Point> {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = vec![Point::zero(); batch_size];
|
||||
|
||||
unsafe {
|
||||
msm_batch_cuda(
|
||||
&mut ret[0] as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
batch_size,
|
||||
count / batch_size,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn commit(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBox<Point> {
|
||||
let mut res = DeviceBox::new(&Point::zero()).unwrap();
|
||||
unsafe {
|
||||
commit_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn commit_batch(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
|
||||
unsafe {
|
||||
commit_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
|
||||
let ret_code = unsafe {
|
||||
ntt_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
ret_code
|
||||
}
|
||||
|
||||
pub fn ntt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, false);
|
||||
}
|
||||
|
||||
pub fn intt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal_batch(
|
||||
values: &mut [Scalar],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ntt_batch_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
|
||||
unsafe {
|
||||
ecntt_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, false, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, true, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal_batch(
|
||||
values: &mut [Point],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ecntt_batch_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
|
||||
unsafe {
|
||||
DeviceBuffer::from_raw_parts(build_domain_cuda(
|
||||
domain_size,
|
||||
logn,
|
||||
inverse,
|
||||
0
|
||||
), domain_size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn reverse_order_scalars(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_scalars_batch(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_batch_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
) {
|
||||
unsafe { reverse_order_points_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points_batch(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_points_batch_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_point(
|
||||
a as *mut _ as *mut Point,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_scalar(
|
||||
a as *mut _ as *mut Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply a matrix by a scalar:
|
||||
// `a` - flattenned matrix;
|
||||
// `b` - vector to multiply `a` by;
|
||||
pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
|
||||
let mut c = Vec::with_capacity(b.len());
|
||||
for i in 0..b.len() {
|
||||
c.push(Scalar::zero());
|
||||
}
|
||||
unsafe {
|
||||
matrix_vec_mod_mult(
|
||||
a as *const _ as *const Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
c.as_mut_slice() as *mut _ as *mut Scalar,
|
||||
b.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
|
||||
let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
|
||||
unsafe { buf_cpy.copy_from(buf) };
|
||||
return buf_cpy;
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
|
||||
fn set_up_device() {
|
||||
// Set up the context, load the module, and create a stream to run kernels in.
|
||||
rustacuda::init(CudaFlags::empty()).unwrap();
|
||||
let device = Device::get_device(0).unwrap();
|
||||
let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_random_points(
|
||||
count: usize,
|
||||
mut rng: Box<dyn RngCore>,
|
||||
) -> Vec<PointAffineNoInfinity> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_377::rand(&mut rng)).to_xy_strip_z())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_377::rand(&mut rng)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
|
||||
(0..count)
|
||||
.map(|_| Scalar::from_ark(Fr_BLS12_377::rand(&mut rng).into_repr()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalar
|
||||
let vector = generate_random_points_proj(test_size, get_rng(seed));
|
||||
let mut vector_mut = vector.clone();
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalars
|
||||
let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
pub mod test_bls12_377;
|
||||
pub mod basic_structs;
|
||||
pub mod from_cuda;
|
||||
pub mod curve_structs;
|
||||
@@ -1,816 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
pub use crate::basic_structs::scalar::ScalarTrait;
|
||||
pub use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
|
||||
impl Scalar {
|
||||
pub fn to_biginteger254(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_biginteger256(ark: BigInteger256) -> Self {
|
||||
Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
|
||||
}
|
||||
|
||||
pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
|
||||
Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> Fr_BLS12_377 {
|
||||
unsafe { std::mem::transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: &Fr_BLS12_377) -> Scalar {
|
||||
unsafe { std::mem::transmute_copy(v) }
|
||||
}
|
||||
|
||||
pub fn to_ark_mod_p(&self) -> Fr_BLS12_377 {
|
||||
Fr_BLS12_377::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> Fr_BLS12_377 {
|
||||
Fr_BLS12_377::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
|
||||
}
|
||||
|
||||
pub fn from_ark(v: BigInteger256) -> Scalar {
|
||||
Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Base {
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Point {
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_377 {
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_377 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_377::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_377) -> Point {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point {
|
||||
x: Base::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: Base::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: Base::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity {
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(Fq_BLS12_377::new(self.x.to_ark()), Fq_BLS12_377::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(
|
||||
Fq_BLS12_377::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_377::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BLS12_377) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(p.x.into_repr()),
|
||||
y: Base::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(ark_affine.x.into_repr()),
|
||||
y: Base::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests_bls12_377 {
|
||||
use std::ops::Add;
|
||||
use ark_bls12_377::{Fr, G1Affine, G1Projective};
|
||||
use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
|
||||
use ark_ff::{FftField, Field, Zero, PrimeField};
|
||||
use ark_std::UniformRand;
|
||||
use rustacuda::prelude::{DeviceBuffer, CopyDestination};
|
||||
use crate::curve_structs::{Point, Scalar, Base};
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
|
||||
|
||||
fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
|
||||
let mut rng = ark_std::rand::thread_rng();
|
||||
let mut points_ga: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..nof_elements {
|
||||
let aff = G1Projective::rand(&mut rng);
|
||||
points_ga.push(aff);
|
||||
}
|
||||
points_ga
|
||||
}
|
||||
|
||||
fn ecntt_arc_naive(
|
||||
points: &Vec<G1Projective>,
|
||||
size: usize,
|
||||
inverse: bool,
|
||||
) -> Vec<G1Projective> {
|
||||
let mut result: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..size {
|
||||
result.push(G1Projective::zero());
|
||||
}
|
||||
let rou: Fr;
|
||||
if !inverse {
|
||||
rou = Fr::get_root_of_unity(size).unwrap();
|
||||
} else {
|
||||
rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
|
||||
}
|
||||
for k in 0..size {
|
||||
for l in 0..size {
|
||||
let pow: [u64; 1] = [(l * k).try_into().unwrap()];
|
||||
let mul_rou = Fr::pow(&rou, &pow);
|
||||
result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
|
||||
}
|
||||
}
|
||||
if inverse {
|
||||
let size2 = size as u64;
|
||||
for k in 0..size {
|
||||
let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
|
||||
result[k] = result[k].into_affine().mul(multfactor);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
|
||||
let mut eq = true;
|
||||
for i in 0..points.len() {
|
||||
if points2[i].ne(&points[i]) {
|
||||
eq = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return eq;
|
||||
}
|
||||
|
||||
fn test_naive_ark_ecntt(size: usize) {
|
||||
let points = random_points_ark_proj(size);
|
||||
let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
|
||||
let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
|
||||
assert!(!check_eq(&result2, &result1));
|
||||
assert!(check_eq(&result2, &points));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_msm() {
|
||||
let test_sizes = [6, 9];
|
||||
|
||||
for pow2 in test_sizes {
|
||||
let count = 1 << pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points = generate_random_points(count, get_rng(seed));
|
||||
let scalars = generate_random_scalars(count, get_rng(seed));
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
|
||||
let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
|
||||
|
||||
assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
|
||||
assert_eq!(msm_result.to_ark(), msm_result_ark);
|
||||
assert_eq!(
|
||||
msm_result.to_ark_affine(),
|
||||
Point::from_ark(msm_result_ark).to_ark_affine()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_msm() {
|
||||
for batch_pow2 in [2, 4] {
|
||||
for pow2 in [4, 6] {
|
||||
let msm_size = 1 << pow2;
|
||||
let batch_size = 1 << batch_pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
|
||||
let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
|
||||
|
||||
let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let expected: Vec<_> = point_r_ark
|
||||
.chunks(msm_size)
|
||||
.zip(scalars_r_ark.chunks(msm_size))
|
||||
.map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
|
||||
.collect();
|
||||
|
||||
let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_commit() {
|
||||
let test_size = 1 << 8;
|
||||
let seed = Some(0);
|
||||
let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
|
||||
let mut points = generate_random_points(test_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
|
||||
let mut h_commit_result = Point::zero();
|
||||
d_commit_result.copy_to(&mut h_commit_result).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
assert_ne!(msm_result, Point::zero());
|
||||
assert_ne!(h_commit_result, Point::zero());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_commit() {
|
||||
let batch_size = 4;
|
||||
let test_size = 1 << 12;
|
||||
let seed = Some(0);
|
||||
let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
|
||||
let points = generate_random_points(test_size * batch_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm_batch(&points, &scalars, batch_size, 0);
|
||||
let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
|
||||
let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
|
||||
d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
for h in h_commit_result {
|
||||
assert_ne!(h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 3;
|
||||
|
||||
let scalars = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut ntt_result = scalars.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
assert_ne!(ntt_result, scalars);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
intt(&mut intt_result, 0);
|
||||
|
||||
assert_eq!(intt_result, scalars);
|
||||
|
||||
//ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size, get_rng(seed));
|
||||
|
||||
test_naive_ark_ecntt(test_size);
|
||||
|
||||
assert!(points_proj[0].to_ark().into_affine().is_on_curve());
|
||||
|
||||
//naive ark
|
||||
let points_proj_ark = points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark())
|
||||
.collect::<Vec<G1Projective>>();
|
||||
|
||||
let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
|
||||
|
||||
let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
|
||||
|
||||
assert_eq!(points_proj_ark, iecntt_result_naive);
|
||||
|
||||
//ingo gpu
|
||||
let mut ecntt_result = points_proj.to_vec();
|
||||
ecntt(&mut ecntt_result, 0);
|
||||
|
||||
assert_ne!(ecntt_result, points_proj);
|
||||
|
||||
let mut iecntt_result = ecntt_result.clone();
|
||||
iecntt(&mut iecntt_result, 0);
|
||||
|
||||
assert_eq!(
|
||||
iecntt_result_naive,
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
assert_eq!(
|
||||
iecntt_result
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>(),
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt_batch() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let batches = 4;
|
||||
|
||||
let scalars_batch: Vec<Scalar> =
|
||||
generate_random_scalars(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result = scalars_batch.clone();
|
||||
|
||||
// do batch ntt
|
||||
ntt_batch(&mut ntt_result, test_size, 0);
|
||||
|
||||
let mut ntt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do ntt for every chunk
|
||||
for i in 0..batches {
|
||||
ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
|
||||
ntt(&mut ntt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ntt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_vec_of_vec[i],
|
||||
ntt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
// check that ntt output is different from input
|
||||
assert_ne!(ntt_result, scalars_batch);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
// do batch intt
|
||||
intt_batch(&mut intt_result, test_size, 0);
|
||||
|
||||
let mut intt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do intt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
|
||||
intt(&mut intt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the intt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_vec_of_vec[i],
|
||||
intt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result, scalars_batch);
|
||||
|
||||
// //ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result_points = points_proj.clone();
|
||||
|
||||
// do batch ecintt
|
||||
ecntt_batch(&mut ntt_result_points, test_size, 0);
|
||||
|
||||
let mut ntt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
|
||||
ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_points_vec_of_vec[i],
|
||||
ntt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_ne!(ntt_result_points, points_proj);
|
||||
|
||||
let mut intt_result_points = ntt_result_points.clone();
|
||||
|
||||
// do batch ecintt
|
||||
iecntt_batch(&mut intt_result_points, test_size, 0);
|
||||
|
||||
let mut intt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
// do ecintt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
|
||||
iecntt(&mut intt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ecintt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_points_vec_of_vec[i],
|
||||
intt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result_points, points_proj);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_interpolation() {
|
||||
let log_test_size = 7;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
|
||||
intt(&mut evals_mut, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 10;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
intt_batch(&mut evals_mut, test_size, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_interpolation() {
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_points(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
|
||||
iecntt(&mut evals_mut[..], 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_points_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
iecntt_batch(&mut evals_mut[..], test_size, 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation() {
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size.. (1 << log_test_domain_size) {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation() {
|
||||
let batch_size = 6;
|
||||
let log_test_domain_size = 8;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation() {
|
||||
let log_test_domain_size = 7;
|
||||
let coeff_size = 1 << 7;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size..(1 << log_test_domain_size) {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
for i in 0..coeff_size {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation() {
|
||||
let batch_size = 4;
|
||||
let log_test_domain_size = 6;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 5;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
for i in j * domain_size..(j * domain_size + coeff_size) {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_trivial_coset() {
|
||||
// checks that the evaluations on the subgroup is the same as on the coset generated by 1
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
|
||||
let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_evals_coset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
for i in 0..test_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 2;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
for i in 0..test_size * batch_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
// testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
|
||||
#[test]
|
||||
fn test_matrix_multiplication() {
|
||||
let seed = None; // some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let rou = Fr::get_root_of_unity(test_size).unwrap();
|
||||
let matrix_flattened: Vec<Scalar> = (0..test_size).map(
|
||||
|row_num| { (0..test_size).map(
|
||||
|col_num| {
|
||||
let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
|
||||
Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
|
||||
}).collect::<Vec<Scalar>>()
|
||||
}).flatten().collect::<Vec<_>>();
|
||||
let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
|
||||
let mut ntt_result = vector.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
// we don't use the same roots of unity as arkworks, so the results are permutations
|
||||
// of one another and the only guaranteed fixed scalars are the following ones:
|
||||
assert_eq!(result[0], ntt_result[0]);
|
||||
assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_scalar_mul() {
|
||||
let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
|
||||
let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
mult_sc_vec(&mut intoo, &expected, 0);
|
||||
assert_eq!(intoo, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_point_mul() {
|
||||
let dummy_one = Point {
|
||||
x: Base::one(),
|
||||
y: Base::one(),
|
||||
z: Base::one(),
|
||||
};
|
||||
|
||||
let mut inout = [dummy_one, dummy_one, Point::zero()];
|
||||
let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
let expected = [dummy_one, Point::zero(), Point::zero()];
|
||||
multp_vec(&mut inout, &scalars, 0);
|
||||
assert_eq!(inout, expected);
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "bls12-381"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { path = "../icicle-core" }
|
||||
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-381 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
g2 = []
|
||||
@@ -1,36 +0,0 @@
|
||||
use std::env;
|
||||
|
||||
fn main() {
|
||||
//TODO: check cargo features selected
|
||||
//TODO: can conflict/duplicate with make ?
|
||||
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=./icicle");
|
||||
|
||||
let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
|
||||
let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
|
||||
|
||||
let mut arch = String::from("-arch=");
|
||||
arch.push_str(&arch_type);
|
||||
let mut stream = String::from("-default-stream=");
|
||||
stream.push_str(&stream_type);
|
||||
|
||||
let mut nvcc = cc::Build::new();
|
||||
|
||||
println!("Compiling icicle library using arch: {}", &arch);
|
||||
|
||||
if cfg!(feature = "g2") {
|
||||
nvcc.define("G2_DEFINED", None);
|
||||
}
|
||||
nvcc.cuda(true);
|
||||
nvcc.define("FEATURE_BLS12_381", None);
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.flag(&stream);
|
||||
nvcc.shared_flag(false);
|
||||
// nvcc.static_flag(true);
|
||||
nvcc.files([
|
||||
"../icicle-cuda/curves/index.cu",
|
||||
]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,106 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
|
||||
use crate::basic_structs::scalar::ScalarT;
|
||||
use crate::basic_structs::field::Field;
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarField;
|
||||
impl Field<8> for ScalarField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct BaseField;
|
||||
impl Field<12> for BaseField {
|
||||
const MODOLUS: [u32; 12] = [0x0;12];
|
||||
}
|
||||
|
||||
|
||||
pub type Scalar = ScalarT<ScalarField,8>;
|
||||
impl Default for Scalar {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Scalar{}
|
||||
|
||||
|
||||
pub type Base = ScalarT<BaseField,12>;
|
||||
impl Default for Base {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Base{}
|
||||
|
||||
pub type Point = PointT<Base>;
|
||||
pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
@@ -1,798 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
extern "C" {
|
||||
fn msm_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn msm_batch_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
batch_size: usize,
|
||||
msm_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
|
||||
|
||||
fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ntt_batch_cuda(
|
||||
inout: *mut Scalar,
|
||||
arr_size: usize,
|
||||
n: usize,
|
||||
inverse: bool,
|
||||
) -> c_int;
|
||||
|
||||
fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
|
||||
|
||||
fn interpolate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_batch_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_batch_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_point(
|
||||
inout: *mut Point,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_scalar(
|
||||
inout: *mut Scalar,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn matrix_vec_mod_mult(
|
||||
matrix_flattened: *const Scalar,
|
||||
input: *const Scalar,
|
||||
output: *mut Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
}
|
||||
|
||||
pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = Point::zero();
|
||||
unsafe {
|
||||
msm_cuda(
|
||||
&mut ret as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
scalars.len(),
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn msm_batch(
|
||||
points: &[PointAffineNoInfinity],
|
||||
scalars: &[Scalar],
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> Vec<Point> {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = vec![Point::zero(); batch_size];
|
||||
|
||||
unsafe {
|
||||
msm_batch_cuda(
|
||||
&mut ret[0] as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
batch_size,
|
||||
count / batch_size,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn commit(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBox<Point> {
|
||||
let mut res = DeviceBox::new(&Point::zero()).unwrap();
|
||||
unsafe {
|
||||
commit_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn commit_batch(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
|
||||
unsafe {
|
||||
commit_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
|
||||
let ret_code = unsafe {
|
||||
ntt_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
ret_code
|
||||
}
|
||||
|
||||
pub fn ntt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, false);
|
||||
}
|
||||
|
||||
pub fn intt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal_batch(
|
||||
values: &mut [Scalar],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ntt_batch_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
|
||||
unsafe {
|
||||
ecntt_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, false, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, true, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal_batch(
|
||||
values: &mut [Point],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ecntt_batch_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
|
||||
unsafe {
|
||||
DeviceBuffer::from_raw_parts(build_domain_cuda(
|
||||
domain_size,
|
||||
logn,
|
||||
inverse,
|
||||
0
|
||||
), domain_size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn reverse_order_scalars(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_scalars_batch(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_batch_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
) {
|
||||
unsafe { reverse_order_points_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points_batch(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_points_batch_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_point(
|
||||
a as *mut _ as *mut Point,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_scalar(
|
||||
a as *mut _ as *mut Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply a matrix by a scalar:
|
||||
// `a` - flattenned matrix;
|
||||
// `b` - vector to multiply `a` by;
|
||||
pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
|
||||
let mut c = Vec::with_capacity(b.len());
|
||||
for i in 0..b.len() {
|
||||
c.push(Scalar::zero());
|
||||
}
|
||||
unsafe {
|
||||
matrix_vec_mod_mult(
|
||||
a as *const _ as *const Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
c.as_mut_slice() as *mut _ as *mut Scalar,
|
||||
b.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
|
||||
let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
|
||||
unsafe { buf_cpy.copy_from(buf) };
|
||||
return buf_cpy;
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
|
||||
fn set_up_device() {
|
||||
// Set up the context, load the module, and create a stream to run kernels in.
|
||||
rustacuda::init(CudaFlags::empty()).unwrap();
|
||||
let device = Device::get_device(0).unwrap();
|
||||
let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_random_points(
|
||||
count: usize,
|
||||
mut rng: Box<dyn RngCore>,
|
||||
) -> Vec<PointAffineNoInfinity> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_381::rand(&mut rng)).to_xy_strip_z())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_381::rand(&mut rng)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
|
||||
(0..count)
|
||||
.map(|_| Scalar::from_ark(Fr_BLS12_381::rand(&mut rng).into_repr()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalar
|
||||
let vector = generate_random_points_proj(test_size, get_rng(seed));
|
||||
let mut vector_mut = vector.clone();
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalars
|
||||
let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
pub mod test_bls12_381;
|
||||
pub mod basic_structs;
|
||||
pub mod from_cuda;
|
||||
pub mod curve_structs;
|
||||
@@ -1,816 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
pub use crate::basic_structs::scalar::ScalarTrait;
|
||||
pub use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
|
||||
impl Scalar {
|
||||
pub fn to_biginteger254(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_biginteger256(ark: BigInteger256) -> Self {
|
||||
Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
|
||||
}
|
||||
|
||||
pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
|
||||
Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> Fr_BLS12_381 {
|
||||
unsafe { std::mem::transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: &Fr_BLS12_381) -> Scalar {
|
||||
unsafe { std::mem::transmute_copy(v) }
|
||||
}
|
||||
|
||||
pub fn to_ark_mod_p(&self) -> Fr_BLS12_381 {
|
||||
Fr_BLS12_381::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> Fr_BLS12_381 {
|
||||
Fr_BLS12_381::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
|
||||
}
|
||||
|
||||
pub fn from_ark(v: BigInteger256) -> Scalar {
|
||||
Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Base {
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Point {
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_381 {
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_381 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_381::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_381) -> Point {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point {
|
||||
x: Base::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: Base::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: Base::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity {
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(Fq_BLS12_381::new(self.x.to_ark()), Fq_BLS12_381::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(
|
||||
Fq_BLS12_381::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_381::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BLS12_381) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(p.x.into_repr()),
|
||||
y: Base::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(ark_affine.x.into_repr()),
|
||||
y: Base::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests_bls12_381 {
|
||||
use std::ops::Add;
|
||||
use ark_bls12_381::{Fr, G1Affine, G1Projective};
|
||||
use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
|
||||
use ark_ff::{FftField, Field, Zero, PrimeField};
|
||||
use ark_std::UniformRand;
|
||||
use rustacuda::prelude::{DeviceBuffer, CopyDestination};
|
||||
use crate::curve_structs::{Point, Scalar, Base};
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
|
||||
|
||||
fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
|
||||
let mut rng = ark_std::rand::thread_rng();
|
||||
let mut points_ga: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..nof_elements {
|
||||
let aff = G1Projective::rand(&mut rng);
|
||||
points_ga.push(aff);
|
||||
}
|
||||
points_ga
|
||||
}
|
||||
|
||||
fn ecntt_arc_naive(
|
||||
points: &Vec<G1Projective>,
|
||||
size: usize,
|
||||
inverse: bool,
|
||||
) -> Vec<G1Projective> {
|
||||
let mut result: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..size {
|
||||
result.push(G1Projective::zero());
|
||||
}
|
||||
let rou: Fr;
|
||||
if !inverse {
|
||||
rou = Fr::get_root_of_unity(size).unwrap();
|
||||
} else {
|
||||
rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
|
||||
}
|
||||
for k in 0..size {
|
||||
for l in 0..size {
|
||||
let pow: [u64; 1] = [(l * k).try_into().unwrap()];
|
||||
let mul_rou = Fr::pow(&rou, &pow);
|
||||
result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
|
||||
}
|
||||
}
|
||||
if inverse {
|
||||
let size2 = size as u64;
|
||||
for k in 0..size {
|
||||
let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
|
||||
result[k] = result[k].into_affine().mul(multfactor);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
|
||||
let mut eq = true;
|
||||
for i in 0..points.len() {
|
||||
if points2[i].ne(&points[i]) {
|
||||
eq = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return eq;
|
||||
}
|
||||
|
||||
fn test_naive_ark_ecntt(size: usize) {
|
||||
let points = random_points_ark_proj(size);
|
||||
let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
|
||||
let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
|
||||
assert!(!check_eq(&result2, &result1));
|
||||
assert!(check_eq(&result2, &points));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_msm() {
|
||||
let test_sizes = [6, 9];
|
||||
|
||||
for pow2 in test_sizes {
|
||||
let count = 1 << pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points = generate_random_points(count, get_rng(seed));
|
||||
let scalars = generate_random_scalars(count, get_rng(seed));
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
|
||||
let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
|
||||
|
||||
assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
|
||||
assert_eq!(msm_result.to_ark(), msm_result_ark);
|
||||
assert_eq!(
|
||||
msm_result.to_ark_affine(),
|
||||
Point::from_ark(msm_result_ark).to_ark_affine()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_msm() {
|
||||
for batch_pow2 in [2, 4] {
|
||||
for pow2 in [4, 6] {
|
||||
let msm_size = 1 << pow2;
|
||||
let batch_size = 1 << batch_pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
|
||||
let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
|
||||
|
||||
let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let expected: Vec<_> = point_r_ark
|
||||
.chunks(msm_size)
|
||||
.zip(scalars_r_ark.chunks(msm_size))
|
||||
.map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
|
||||
.collect();
|
||||
|
||||
let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_commit() {
|
||||
let test_size = 1 << 8;
|
||||
let seed = Some(0);
|
||||
let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
|
||||
let mut points = generate_random_points(test_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
|
||||
let mut h_commit_result = Point::zero();
|
||||
d_commit_result.copy_to(&mut h_commit_result).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
assert_ne!(msm_result, Point::zero());
|
||||
assert_ne!(h_commit_result, Point::zero());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_commit() {
|
||||
let batch_size = 4;
|
||||
let test_size = 1 << 12;
|
||||
let seed = Some(0);
|
||||
let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
|
||||
let points = generate_random_points(test_size * batch_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm_batch(&points, &scalars, batch_size, 0);
|
||||
let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
|
||||
let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
|
||||
d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
for h in h_commit_result {
|
||||
assert_ne!(h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 3;
|
||||
|
||||
let scalars = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut ntt_result = scalars.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
assert_ne!(ntt_result, scalars);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
intt(&mut intt_result, 0);
|
||||
|
||||
assert_eq!(intt_result, scalars);
|
||||
|
||||
//ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size, get_rng(seed));
|
||||
|
||||
test_naive_ark_ecntt(test_size);
|
||||
|
||||
assert!(points_proj[0].to_ark().into_affine().is_on_curve());
|
||||
|
||||
//naive ark
|
||||
let points_proj_ark = points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark())
|
||||
.collect::<Vec<G1Projective>>();
|
||||
|
||||
let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
|
||||
|
||||
let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
|
||||
|
||||
assert_eq!(points_proj_ark, iecntt_result_naive);
|
||||
|
||||
//ingo gpu
|
||||
let mut ecntt_result = points_proj.to_vec();
|
||||
ecntt(&mut ecntt_result, 0);
|
||||
|
||||
assert_ne!(ecntt_result, points_proj);
|
||||
|
||||
let mut iecntt_result = ecntt_result.clone();
|
||||
iecntt(&mut iecntt_result, 0);
|
||||
|
||||
assert_eq!(
|
||||
iecntt_result_naive,
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
assert_eq!(
|
||||
iecntt_result
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>(),
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt_batch() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let batches = 4;
|
||||
|
||||
let scalars_batch: Vec<Scalar> =
|
||||
generate_random_scalars(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result = scalars_batch.clone();
|
||||
|
||||
// do batch ntt
|
||||
ntt_batch(&mut ntt_result, test_size, 0);
|
||||
|
||||
let mut ntt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do ntt for every chunk
|
||||
for i in 0..batches {
|
||||
ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
|
||||
ntt(&mut ntt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ntt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_vec_of_vec[i],
|
||||
ntt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
// check that ntt output is different from input
|
||||
assert_ne!(ntt_result, scalars_batch);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
// do batch intt
|
||||
intt_batch(&mut intt_result, test_size, 0);
|
||||
|
||||
let mut intt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do intt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
|
||||
intt(&mut intt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the intt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_vec_of_vec[i],
|
||||
intt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result, scalars_batch);
|
||||
|
||||
// //ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result_points = points_proj.clone();
|
||||
|
||||
// do batch ecintt
|
||||
ecntt_batch(&mut ntt_result_points, test_size, 0);
|
||||
|
||||
let mut ntt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
|
||||
ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_points_vec_of_vec[i],
|
||||
ntt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_ne!(ntt_result_points, points_proj);
|
||||
|
||||
let mut intt_result_points = ntt_result_points.clone();
|
||||
|
||||
// do batch ecintt
|
||||
iecntt_batch(&mut intt_result_points, test_size, 0);
|
||||
|
||||
let mut intt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
// do ecintt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
|
||||
iecntt(&mut intt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ecintt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_points_vec_of_vec[i],
|
||||
intt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result_points, points_proj);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_interpolation() {
|
||||
let log_test_size = 7;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
|
||||
intt(&mut evals_mut, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 10;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
intt_batch(&mut evals_mut, test_size, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_interpolation() {
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_points(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
|
||||
iecntt(&mut evals_mut[..], 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_points_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
iecntt_batch(&mut evals_mut[..], test_size, 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation() {
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size.. (1 << log_test_domain_size) {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation() {
|
||||
let batch_size = 6;
|
||||
let log_test_domain_size = 8;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation() {
|
||||
let log_test_domain_size = 7;
|
||||
let coeff_size = 1 << 7;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size..(1 << log_test_domain_size) {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
for i in 0..coeff_size {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation() {
|
||||
let batch_size = 4;
|
||||
let log_test_domain_size = 6;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 5;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
for i in j * domain_size..(j * domain_size + coeff_size) {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_trivial_coset() {
|
||||
// checks that the evaluations on the subgroup is the same as on the coset generated by 1
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
|
||||
let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_evals_coset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
for i in 0..test_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 2;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
for i in 0..test_size * batch_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
// testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
|
||||
#[test]
|
||||
fn test_matrix_multiplication() {
|
||||
let seed = None; // some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let rou = Fr::get_root_of_unity(test_size).unwrap();
|
||||
let matrix_flattened: Vec<Scalar> = (0..test_size).map(
|
||||
|row_num| { (0..test_size).map(
|
||||
|col_num| {
|
||||
let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
|
||||
Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
|
||||
}).collect::<Vec<Scalar>>()
|
||||
}).flatten().collect::<Vec<_>>();
|
||||
let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
|
||||
let mut ntt_result = vector.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
// we don't use the same roots of unity as arkworks, so the results are permutations
|
||||
// of one another and the only guaranteed fixed scalars are the following ones:
|
||||
assert_eq!(result[0], ntt_result[0]);
|
||||
assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_scalar_mul() {
|
||||
let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
|
||||
let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
mult_sc_vec(&mut intoo, &expected, 0);
|
||||
assert_eq!(intoo, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_point_mul() {
|
||||
let dummy_one = Point {
|
||||
x: Base::one(),
|
||||
y: Base::one(),
|
||||
z: Base::one(),
|
||||
};
|
||||
|
||||
let mut inout = [dummy_one, dummy_one, Point::zero()];
|
||||
let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
let expected = [dummy_one, Point::zero(), Point::zero()];
|
||||
multp_vec(&mut inout, &scalars, 0);
|
||||
assert_eq!(inout, expected);
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "bn254"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { path = "../icicle-core" }
|
||||
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bn254 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
g2 = []
|
||||
@@ -1,36 +0,0 @@
|
||||
use std::env;
|
||||
|
||||
fn main() {
|
||||
//TODO: check cargo features selected
|
||||
//TODO: can conflict/duplicate with make ?
|
||||
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=./icicle");
|
||||
|
||||
let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
|
||||
let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
|
||||
|
||||
let mut arch = String::from("-arch=");
|
||||
arch.push_str(&arch_type);
|
||||
let mut stream = String::from("-default-stream=");
|
||||
stream.push_str(&stream_type);
|
||||
|
||||
let mut nvcc = cc::Build::new();
|
||||
|
||||
println!("Compiling icicle library using arch: {}", &arch);
|
||||
|
||||
if cfg!(feature = "g2") {
|
||||
nvcc.define("G2_DEFINED", None);
|
||||
}
|
||||
nvcc.cuda(true);
|
||||
nvcc.define("FEATURE_BN254", None);
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.flag(&stream);
|
||||
nvcc.shared_flag(false);
|
||||
// nvcc.static_flag(true);
|
||||
nvcc.files([
|
||||
"../icicle-cuda/curves/index.cu",
|
||||
]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,108 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
|
||||
use crate::basic_structs::scalar::ScalarT;
|
||||
use crate::basic_structs::field::Field;
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarField;
|
||||
impl Field<8> for ScalarField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct BaseField;
|
||||
impl Field<8> for BaseField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
|
||||
pub type Scalar = ScalarT<ScalarField,8>;
|
||||
impl Default for Scalar {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Scalar{}
|
||||
|
||||
|
||||
pub type Base = ScalarT<BaseField,8>;
|
||||
impl Default for Base {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Base{}
|
||||
|
||||
pub type Point = PointT<Base>;
|
||||
pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
@@ -1,797 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
extern "C" {
|
||||
fn msm_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn msm_batch_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
batch_size: usize,
|
||||
msm_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
|
||||
|
||||
fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ntt_batch_cuda(
|
||||
inout: *mut Scalar,
|
||||
arr_size: usize,
|
||||
n: usize,
|
||||
inverse: bool,
|
||||
) -> c_int;
|
||||
|
||||
fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
|
||||
|
||||
fn interpolate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_batch_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_batch_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_point(
|
||||
inout: *mut Point,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_scalar(
|
||||
inout: *mut Scalar,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn matrix_vec_mod_mult(
|
||||
matrix_flattened: *const Scalar,
|
||||
input: *const Scalar,
|
||||
output: *mut Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
}
|
||||
|
||||
pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
let mut ret = Point::zero();
|
||||
unsafe {
|
||||
msm_cuda(
|
||||
&mut ret as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
scalars.len(),
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn msm_batch(
|
||||
points: &[PointAffineNoInfinity],
|
||||
scalars: &[Scalar],
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> Vec<Point> {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = vec![Point::zero(); batch_size];
|
||||
|
||||
unsafe {
|
||||
msm_batch_cuda(
|
||||
&mut ret[0] as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
batch_size,
|
||||
count / batch_size,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn commit(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBox<Point> {
|
||||
let mut res = DeviceBox::new(&Point::zero()).unwrap();
|
||||
unsafe {
|
||||
commit_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn commit_batch(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
|
||||
unsafe {
|
||||
commit_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
|
||||
let ret_code = unsafe {
|
||||
ntt_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
ret_code
|
||||
}
|
||||
|
||||
pub fn ntt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, false);
|
||||
}
|
||||
|
||||
pub fn intt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal_batch(
|
||||
values: &mut [Scalar],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ntt_batch_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
|
||||
unsafe {
|
||||
ecntt_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, false, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, true, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal_batch(
|
||||
values: &mut [Point],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ecntt_batch_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
|
||||
unsafe {
|
||||
DeviceBuffer::from_raw_parts(build_domain_cuda(
|
||||
domain_size,
|
||||
logn,
|
||||
inverse,
|
||||
0
|
||||
), domain_size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn reverse_order_scalars(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_scalars_batch(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_batch_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
) {
|
||||
unsafe { reverse_order_points_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points_batch(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_points_batch_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_point(
|
||||
a as *mut _ as *mut Point,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_scalar(
|
||||
a as *mut _ as *mut Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply a matrix by a scalar:
|
||||
// `a` - flattenned matrix;
|
||||
// `b` - vector to multiply `a` by;
|
||||
pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
|
||||
let mut c = Vec::with_capacity(b.len());
|
||||
for i in 0..b.len() {
|
||||
c.push(Scalar::zero());
|
||||
}
|
||||
unsafe {
|
||||
matrix_vec_mod_mult(
|
||||
a as *const _ as *const Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
c.as_mut_slice() as *mut _ as *mut Scalar,
|
||||
b.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
|
||||
let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
|
||||
unsafe { buf_cpy.copy_from(buf) };
|
||||
return buf_cpy;
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
|
||||
fn set_up_device() {
|
||||
// Set up the context, load the module, and create a stream to run kernels in.
|
||||
rustacuda::init(CudaFlags::empty()).unwrap();
|
||||
let device = Device::get_device(0).unwrap();
|
||||
let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_random_points(
|
||||
count: usize,
|
||||
mut rng: Box<dyn RngCore>,
|
||||
) -> Vec<PointAffineNoInfinity> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BN254::rand(&mut rng)).to_xy_strip_z())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BN254::rand(&mut rng)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
|
||||
(0..count)
|
||||
.map(|_| Scalar::from_ark(Fr_BN254::rand(&mut rng).into_repr()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalar
|
||||
let vector = generate_random_points_proj(test_size, get_rng(seed));
|
||||
let mut vector_mut = vector.clone();
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalars
|
||||
let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
pub mod test_bn254;
|
||||
pub mod basic_structs;
|
||||
pub mod from_cuda;
|
||||
pub mod curve_structs;
|
||||
@@ -8,27 +8,22 @@ fn main() {
|
||||
println!("cargo:rerun-if-changed=./icicle");
|
||||
|
||||
let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
|
||||
let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
|
||||
|
||||
let mut arch = String::from("-arch=");
|
||||
arch.push_str(&arch_type);
|
||||
let mut stream = String::from("-default-stream=");
|
||||
stream.push_str(&stream_type);
|
||||
|
||||
let mut nvcc = cc::Build::new();
|
||||
|
||||
println!("Compiling icicle library using arch: {}", &arch);
|
||||
|
||||
if cfg!(feature = "g2") {
|
||||
nvcc.define("G2_DEFINED", None);
|
||||
}
|
||||
nvcc.cuda(true);
|
||||
nvcc.define("FEATURE_BLS12_377", None);
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.flag(&stream);
|
||||
nvcc.files([
|
||||
"../icicle-cuda/curves/index.cu",
|
||||
"./icicle/appUtils/vector_manipulation/ve_mod_mult.cu",
|
||||
"./icicle/appUtils/ntt/lde.cu",
|
||||
"./icicle/appUtils/msm/msm.cu",
|
||||
"./icicle/primitives/projective.cu",
|
||||
]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"curve_name" : "bls12_377",
|
||||
"modolus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
|
||||
"bit_count_p" : 253,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
|
||||
"bit_count_q" : 377,
|
||||
"limb_q" : 12,
|
||||
"weierstrass_b" : 1,
|
||||
"gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
|
||||
"gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"curve_name" : "bls12_381",
|
||||
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"bit_count_p" : 255,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"bit_count_q" : 381,
|
||||
"limb_q" : 12,
|
||||
"weierstrass_b" : 4,
|
||||
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
|
||||
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
{
|
||||
"curve_name" : "bn254",
|
||||
"modolus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
|
||||
"bit_count_p" : 254,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 16,
|
||||
"modolus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
|
||||
"bit_count_q" : 254,
|
||||
"limb_q" : 8,
|
||||
"weierstrass_b" : 3,
|
||||
"gen_x" : 1,
|
||||
"gen_y" : 2
|
||||
}
|
||||
@@ -1,203 +0,0 @@
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from sympy.ntheory import isprime, primitive_root
|
||||
import subprocess
|
||||
import random
|
||||
import sys
|
||||
|
||||
data = None
|
||||
with open(sys.argv[1]) as json_file:
|
||||
data = json.load(json_file)
|
||||
|
||||
curve_name = data["curve_name"]
|
||||
modolus_p = data["modolus_p"]
|
||||
bit_count_p = data["bit_count_p"]
|
||||
limb_p = data["limb_p"]
|
||||
ntt_size = data["ntt_size"]
|
||||
modolus_q = data["modolus_q"]
|
||||
bit_count_q = data["bit_count_q"]
|
||||
limb_q = data["limb_q"]
|
||||
weierstrass_b = data["weierstrass_b"]
|
||||
gen_x = data["gen_x"]
|
||||
gen_y = data["gen_y"]
|
||||
|
||||
|
||||
def to_hex(val, length):
|
||||
x = str(hex(val))[2:]
|
||||
if len(x) % 8 != 0:
|
||||
x = "0" * (8-len(x) % 8) + x
|
||||
if len(x) != length:
|
||||
x = "0" * (length-len(x)) + x
|
||||
n = 8
|
||||
chunks = [x[i:i+n] for i in range(0, len(x), n)][::-1]
|
||||
s = ""
|
||||
for c in chunks:
|
||||
s += "0x" + c + ", "
|
||||
return s
|
||||
|
||||
|
||||
def get_root_of_unity(order: int) -> int:
|
||||
assert (modolus_p - 1) % order == 0
|
||||
return pow(5, (modolus_p - 1) // order, modolus_p)
|
||||
|
||||
def create_field_parameters_struct(modulus, modulus_bits_count,limbs,ntt,size,name):
|
||||
s = " struct "+name+"{\n"
|
||||
s += " static constexpr unsigned limbs_count = " + str(limbs)+";\n"
|
||||
s += " static constexpr storage<limbs_count> modulus = {"+to_hex(modulus,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<limbs_count> modulus_2 = {"+to_hex(modulus*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<limbs_count> modulus_4 = {"+to_hex(modulus*4,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_wide = {"+to_hex(modulus,8*limbs*2)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared = {"+to_hex(modulus*modulus,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared_2 = {"+to_hex(modulus*modulus*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared_4 = {"+to_hex(modulus*modulus*2*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr unsigned modulus_bits_count = "+str(modulus_bits_count)+";\n"
|
||||
m = int(math.floor(int(pow(2,2*modulus_bits_count) // modulus)))
|
||||
s += " static constexpr storage<limbs_count> m = {"+ to_hex(m,8*limbs)[:-2] +"};\n"
|
||||
s += " static constexpr storage<limbs_count> one = {"+ to_hex(1,8*limbs)[:-2] +"};\n"
|
||||
s += " static constexpr storage<limbs_count> zero = {"+ to_hex(0,8*limbs)[:-2] +"};\n"
|
||||
|
||||
if ntt:
|
||||
for k in range(size):
|
||||
omega = get_root_of_unity(int(pow(2,k+1)))
|
||||
s += " static constexpr storage<limbs_count> omega"+str(k+1)+"= {"+ to_hex(omega,8*limbs)[:-2]+"};\n"
|
||||
for k in range(size):
|
||||
omega = get_root_of_unity(int(pow(2,k+1)))
|
||||
s += " static constexpr storage<limbs_count> omega_inv"+str(k+1)+"= {"+ to_hex(pow(omega, -1, modulus),8*limbs)[:-2]+"};\n"
|
||||
for k in range(size):
|
||||
s += " static constexpr storage<limbs_count> inv"+str(k+1)+"= {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),8*limbs)[:-2]+"};\n"
|
||||
s+=" };\n"
|
||||
return s
|
||||
|
||||
def create_gen():
|
||||
s = " struct group_generator {\n"
|
||||
s += " static constexpr storage<fq_config::limbs_count> generator_x = {"+to_hex(gen_x,8*limb_q)[:-2]+ "};\n"
|
||||
s += " static constexpr storage<fq_config::limbs_count> generator_y = {"+to_hex(gen_y,8*limb_q)[:-2]+ "};\n"
|
||||
s+=" };\n"
|
||||
return s
|
||||
|
||||
def get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b):
|
||||
file_content = ""
|
||||
file_content += "#pragma once\n#include \"../../utils/storage.cuh\"\n"
|
||||
file_content += "namespace PARAMS_"+curve_name.upper()+"{\n"
|
||||
file_content += create_field_parameters_struct(modolus_p,bit_count_p,limb_p,True,ntt_size,"fp_config")
|
||||
file_content += create_field_parameters_struct(modolus_q,bit_count_q,limb_q,False,0,"fq_config")
|
||||
file_content += " static constexpr unsigned weierstrass_b = " + str(weierstrass_b)+ ";\n"
|
||||
file_content += create_gen()
|
||||
file_content+="}\n"
|
||||
return file_content
|
||||
|
||||
|
||||
# Create Cuda interface
|
||||
|
||||
newpath = "./icicle-cuda/curves/"+curve_name
|
||||
if not os.path.exists(newpath):
|
||||
os.makedirs(newpath)
|
||||
|
||||
fc = get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b)
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/params.cuh", "w")
|
||||
n = text_file.write(fc)
|
||||
text_file.close()
|
||||
|
||||
with open("./icicle-cuda/curves/curve_template/lde.cu", "r") as lde_file:
|
||||
content = lde_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/lde.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open("./icicle-cuda/curves/curve_template/msm.cu", "r") as msm_file:
|
||||
content = msm_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/msm.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open("./icicle-cuda/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
|
||||
content = ve_mod_mult_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/ve_mod_mult.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
|
||||
namespace = '#include "params.cuh"\n'+'''namespace CURVE_NAME_U {
|
||||
typedef Field<PARAMS_CURVE_NAME_U::fp_config> scalar_field_t;\
|
||||
typedef scalar_field_t scalar_t;\
|
||||
typedef Field<PARAMS_CURVE_NAME_U::fq_config> point_field_t;
|
||||
typedef Projective<point_field_t, scalar_field_t, PARAMS_CURVE_NAME_U::group_generator, PARAMS_CURVE_NAME_U::weierstrass_b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
}'''
|
||||
|
||||
with open('./icicle-cuda/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
|
||||
f.write(namespace.replace("CURVE_NAME_U",curve_name.upper()))
|
||||
|
||||
|
||||
eq = '''
|
||||
#include <cuda.h>\n
|
||||
#include "curve_config.cuh"\n
|
||||
#include "../../primitives/projective.cuh"\n
|
||||
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2);
|
||||
}'''
|
||||
|
||||
with open('./icicle-cuda/curves/'+curve_name+'/projective.cu', 'w') as f:
|
||||
f.write(eq.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
|
||||
|
||||
supported_operations = '''
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
'''
|
||||
|
||||
with open('./icicle-cuda/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
|
||||
f.write(supported_operations.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
|
||||
|
||||
with open('./icicle-cuda/curves/index.cu', 'a') as f:
|
||||
f.write('\n#include "'+curve_name.lower()+'/supported_operations.cu"')
|
||||
|
||||
|
||||
|
||||
# Create Rust interface and tests
|
||||
|
||||
if limb_p == limb_q:
|
||||
with open("./src/curve_templates/curve_same_limbs.rs", "r") as curve_file:
|
||||
content = curve_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
|
||||
content = content.replace("limbs_p",str(limb_p))
|
||||
text_file = open("./src/curves/"+curve_name+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
else:
|
||||
with open("./src/curve_templates/curve_different_limbs.rs", "r") as curve_file:
|
||||
content = curve_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
|
||||
content = content.replace("limbs_p",str(limb_p))
|
||||
content = content.replace("_limbs_q",str(limb_q * 8 * 4))
|
||||
content = content.replace("limbs_q",str(limb_q))
|
||||
text_file = open("./src/curves/"+curve_name+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open("./src/curve_templates/test.rs", "r") as test_file:
|
||||
content = test_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./src/test_"+curve_name+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open('./src/curves/mod.rs', 'a') as f:
|
||||
f.write('\n pub mod ' + curve_name + ';')
|
||||
|
||||
with open('./src/lib.rs', 'a') as f:
|
||||
f.write('\npub mod ' + curve_name + ';')
|
||||
@@ -1,49 +0,0 @@
|
||||
[package]
|
||||
name = "icicle-core"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
description = "An implementation of the Ingonyama CUDA Library"
|
||||
homepage = "https://www.ingonyama.com"
|
||||
repository = "https://github.com/ingonyama-zk/icicle"
|
||||
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
path = "benches/ntt.rs"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
path = "benches/msm.rs"
|
||||
harness = false
|
||||
|
||||
[dependencies]
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-381 = "0.3.0"
|
||||
ark-bls12-377 = "0.3.0"
|
||||
ark-bn254 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
default = ["bls12-381"]
|
||||
bls12-381 = ["ark-bls12-381/curve"]
|
||||
g2 = []
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,108 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
pub mod utils;
|
||||
pub mod basic_structs;
|
||||
@@ -1,42 +0,0 @@
|
||||
use rand::RngCore;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
|
||||
pub fn from_limbs<T>(limbs: Vec<u32>, chunk_size: usize, f: fn(&[u32]) -> T) -> Vec<T> {
|
||||
let points = limbs
|
||||
.chunks(chunk_size)
|
||||
.map(|lmbs| f(lmbs))
|
||||
.collect::<Vec<T>>();
|
||||
points
|
||||
}
|
||||
|
||||
pub fn u32_vec_to_u64_vec(arr_u32: &[u32]) -> Vec<u64> {
|
||||
let len = (arr_u32.len() / 2) as usize;
|
||||
let mut arr_u64 = vec![0u64; len];
|
||||
|
||||
for i in 0..len {
|
||||
arr_u64[i] = u64::from(arr_u32[i * 2]) | (u64::from(arr_u32[i * 2 + 1]) << 32);
|
||||
}
|
||||
|
||||
arr_u64
|
||||
}
|
||||
|
||||
pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
|
||||
let len = arr_u64.len() * 2;
|
||||
let mut arr_u32 = vec![0u32; len];
|
||||
|
||||
for i in 0..arr_u64.len() {
|
||||
arr_u32[i * 2] = arr_u64[i] as u32;
|
||||
arr_u32[i * 2 + 1] = (arr_u64[i] >> 32) as u32;
|
||||
}
|
||||
|
||||
arr_u32
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> { //TOOD: this func is universal
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
test_msm:
|
||||
mkdir -p work
|
||||
nvcc -o work/test_msm -I. tests/msm_test.cu
|
||||
work/test_msm
|
||||
@@ -1,188 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include "msm.cu"
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../curves/bls12_381/curve_config.cuh"
|
||||
|
||||
using namespace BLS12_381;
|
||||
|
||||
class Dummy_Scalar {
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) {
|
||||
return (x>>(digit_num*digit_width))&((1<<digit_width)-1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
|
||||
return {p1.x+p2.x};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
|
||||
return (p1.x == p2.x);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) {
|
||||
return (p1.x == p2);
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
static HOST_INLINE Dummy_Scalar rand_host() {
|
||||
return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
class Dummy_Projective {
|
||||
|
||||
public:
|
||||
Dummy_Scalar x;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective zero() {
|
||||
return {0};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
return {p1.x+p2.x};
|
||||
}
|
||||
|
||||
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
// return p1 + neg(p2);
|
||||
// }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point) {
|
||||
os << point.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point) {
|
||||
Dummy_Projective res = zero();
|
||||
#ifdef CUDA_ARCH
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
|
||||
if (i > 0) {
|
||||
res = res + res;
|
||||
}
|
||||
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) {
|
||||
res = res + point;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2) {
|
||||
return (p1.x == p2.x);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective &point) {
|
||||
return point.x == 0;
|
||||
}
|
||||
|
||||
static HOST_INLINE Dummy_Projective rand_host() {
|
||||
return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
//switch between dummy and real:
|
||||
|
||||
typedef scalar_t test_scalar;
|
||||
typedef projective_t test_projective;
|
||||
typedef affine_t test_affine;
|
||||
|
||||
// typedef Dummy_Scalar test_scalar;
|
||||
// typedef Dummy_Projective test_projective;
|
||||
// typedef Dummy_Projective test_affine;
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned batch_size = 4;
|
||||
unsigned msm_size = 1<<15;
|
||||
unsigned N = batch_size*msm_size;
|
||||
|
||||
test_scalar *scalars = new test_scalar[N];
|
||||
test_affine *points = new test_affine[N];
|
||||
|
||||
for (unsigned i=0;i<N;i++){
|
||||
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
|
||||
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
|
||||
// scalars[i] = test_scalar::rand_host();
|
||||
// points[i] = test_projective::to_affine(test_projective::rand_host());
|
||||
}
|
||||
std::cout<<"finished generating"<<std::endl;
|
||||
|
||||
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
|
||||
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
|
||||
test_projective large_res[batch_size];
|
||||
test_projective batched_large_res[batch_size];
|
||||
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
|
||||
// fake_point batched_large_res[256];
|
||||
|
||||
|
||||
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
|
||||
for (unsigned i=0;i<batch_size;i++){
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
|
||||
// std::cout<<"final result large"<<std::endl;
|
||||
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
|
||||
}
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
|
||||
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
|
||||
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
|
||||
|
||||
std::cout<<"final results batched large"<<std::endl;
|
||||
bool success = true;
|
||||
for (unsigned i = 0; i < batch_size; i++)
|
||||
{
|
||||
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
|
||||
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
|
||||
std::cout<<"good"<<std::endl;
|
||||
}
|
||||
else{
|
||||
std::cout<<"miss"<<std::endl;
|
||||
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
if (success){
|
||||
std::cout<<"success!"<<std::endl;
|
||||
}
|
||||
|
||||
// std::cout<<batched_large_res[0]<<std::endl;
|
||||
// std::cout<<batched_large_res[1]<<std::endl;
|
||||
// std::cout<<projective_t::to_affine(batched_large_res[0])<<std::endl;
|
||||
// std::cout<<projective_t::to_affine(batched_large_res[1])<<std::endl;
|
||||
|
||||
// std::cout<<"final result short"<<std::endl;
|
||||
// std::cout<<pr<<std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,184 +0,0 @@
|
||||
#ifndef LDE
|
||||
#define LDE
|
||||
#include <cuda.h>
|
||||
#include "ntt.cuh"
|
||||
#include "lde.cuh"
|
||||
#include "../vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
|
||||
/**
|
||||
* Interpolate a batch of polynomials from their evaluations on the same subgroup.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
|
||||
* @param d_out The variable to write coefficients of the resulting polynomials into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_evaluations Input array of evaluations of all polynomials of type E (elements).
|
||||
* @param d_domain Domain on which the polynomials are evaluated. Must be a subgroup.
|
||||
* @param n Length of `d_domain` array, also equal to the number of evaluations of each polynomial.
|
||||
* @param batch_size The size of the batch; the length of `d_evaluations` is `n` * `batch_size`.
|
||||
*/
|
||||
template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaMemcpyAsync(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice, stream);
|
||||
|
||||
int NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
|
||||
int NUM_BLOCKS = batch_size * max(int((n / 2) / NUM_THREADS), 1);
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
|
||||
}
|
||||
|
||||
NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>> (d_out, n * batch_size, S::inv_log_size(logn));
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interpolate a polynomial from its evaluations on a subgroup.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
|
||||
* @param d_out The variable to write coefficients of the resulting polynomial into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_evaluations Input array of evaluations that have type E (elements).
|
||||
* @param d_domain Domain on which the polynomial is evaluated. Must be a subgroup.
|
||||
* @param n Length of `d_evaluations` and the size `d_domain` arrays (they should have equal length).
|
||||
*/
|
||||
template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n, cudaStream_t stream) {
|
||||
return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1, stream);
|
||||
}
|
||||
|
||||
template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
|
||||
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid < n) {
|
||||
arr[tid] = val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate a batch of polynomials on the same coset.
|
||||
* @param d_out The evaluations of the polynomials on coset `u` * `d_domain`.
|
||||
* @param d_coefficients Input array of coefficients of all polynomials of type E (elements) to be evaluated in-place on a coset.
|
||||
* @param d_domain Domain on which the polynomials are evaluated (see `coset` flag). Must be a subgroup.
|
||||
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
|
||||
* @param n The number of coefficients, which might be different from `domain_size`.
|
||||
* @param batch_size The size of the batch; the length of `d_coefficients` is `n` * `batch_size`.
|
||||
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers, cudaStream_t stream) {
|
||||
uint32_t logn = uint32_t(log(domain_size) / log(2));
|
||||
if (domain_size > n) {
|
||||
// allocate and initialize an array of stream handles to parallelize data copying across batches
|
||||
cudaStream_t *memcpy_streams = (cudaStream_t *) malloc(batch_size * sizeof(cudaStream_t));
|
||||
for (unsigned i = 0; i < batch_size; i++)
|
||||
{
|
||||
cudaStreamCreate(&(memcpy_streams[i]));
|
||||
|
||||
cudaMemcpyAsync(&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
|
||||
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
|
||||
uint32_t NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
fill_array <E> <<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>> (&d_out[i * domain_size + n], E::zero(), domain_size - n);
|
||||
|
||||
cudaStreamSynchronize(memcpy_streams[i]);
|
||||
cudaStreamDestroy(memcpy_streams[i]);
|
||||
}
|
||||
} else
|
||||
cudaMemcpyAsync(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice, stream);
|
||||
|
||||
if (coset)
|
||||
batch_vector_mult(coset_powers, d_out, domain_size, batch_size, stream);
|
||||
|
||||
int NUM_THREADS = min(domain_size / 2, MAX_THREADS_BATCH);
|
||||
int chunks = max(int((domain_size / 2) / NUM_THREADS), 1);
|
||||
int NUM_BLOCKS = batch_size * chunks;
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
|
||||
}
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate a polynomial on a coset.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs, so the order of outputs is bit-reversed.
|
||||
* @param d_out The evaluations of the polynomial on coset `u` * `d_domain`.
|
||||
* @param d_coefficients Input array of coefficients of a polynomial of type E (elements).
|
||||
* @param d_domain Domain on which the polynomial is evaluated (see `coset` flag). Must be a subgroup.
|
||||
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
|
||||
* @param n The number of coefficients, which might be different from `domain_size`.
|
||||
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers, cudaStream_t stream) {
|
||||
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
|
||||
S* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
S* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
|
||||
S* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
S* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
#endif
|
||||
@@ -1,46 +0,0 @@
|
||||
#ifndef LDE_H
|
||||
#define LDE_H
|
||||
#pragma once
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
|
||||
|
||||
#endif
|
||||
@@ -1,51 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <cassert>
|
||||
|
||||
#include "constants/constants_2.h"
|
||||
#include "constants/constants_4.h"
|
||||
#include "constants/constants_8.h"
|
||||
#include "constants/constants_11.h"
|
||||
|
||||
uint32_t partial_rounds_number_from_arity(const uint32_t arity) {
|
||||
switch (arity) {
|
||||
case 2:
|
||||
return 55;
|
||||
case 4:
|
||||
return 56;
|
||||
case 8:
|
||||
return 57;
|
||||
case 11:
|
||||
return 57;
|
||||
default:
|
||||
throw std::invalid_argument( "unsupported arity" );
|
||||
}
|
||||
};
|
||||
|
||||
// TO-DO: change to mapping
|
||||
const uint32_t FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
// TO-DO: for now, the constants are only generated in bls12_381
|
||||
template <typename S>
|
||||
S * load_constants(const uint32_t arity) {
|
||||
unsigned char * constants;
|
||||
switch (arity) {
|
||||
case 2:
|
||||
constants = constants_2;
|
||||
break;
|
||||
case 4:
|
||||
constants = constants_4;
|
||||
break;
|
||||
case 8:
|
||||
constants = constants_8;
|
||||
break;
|
||||
case 11:
|
||||
constants = constants_11;
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument( "unsupported arity" );
|
||||
}
|
||||
return reinterpret_cast< S * >(constants);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,995 +0,0 @@
|
||||
unsigned char constants_2[] = {
|
||||
0xd8, 0xd3, 0x6e, 0x9d, 0x00, 0x0a, 0x32, 0xa7, 0x36, 0x8b, 0x75, 0xa2,
|
||||
0x92, 0xac, 0x1e, 0x50, 0x24, 0x4a, 0xbb, 0x1d, 0x86, 0x51, 0xbd, 0x23,
|
||||
0x7a, 0xe1, 0x3a, 0xfa, 0x4b, 0x06, 0x9f, 0x66, 0x15, 0x3f, 0x9d, 0x2b,
|
||||
0x84, 0xab, 0x72, 0x6e, 0x34, 0x27, 0xac, 0x45, 0x96, 0x7c, 0xe6, 0xee,
|
||||
0x6c, 0xa6, 0x4f, 0xc8, 0xf2, 0x7f, 0x53, 0xe4, 0x36, 0xca, 0xac, 0xfb,
|
||||
0xde, 0xa8, 0x61, 0x0a, 0xd5, 0x65, 0x81, 0x12, 0x71, 0x47, 0x23, 0x1e,
|
||||
0x30, 0x49, 0xaa, 0x1a, 0x4e, 0x2b, 0x29, 0x17, 0x5b, 0x27, 0xdf, 0x45,
|
||||
0x8a, 0x1e, 0x1b, 0xf9, 0x09, 0x9d, 0xb8, 0x24, 0xfa, 0xce, 0xe9, 0x21,
|
||||
0xd1, 0xa0, 0x12, 0x2f, 0xca, 0x56, 0x5f, 0x2f, 0x1b, 0x40, 0x6c, 0x31,
|
||||
0x90, 0x55, 0x2f, 0x1f, 0x2b, 0xd0, 0xd2, 0xd2, 0xc9, 0x24, 0x26, 0x38,
|
||||
0x05, 0x18, 0x53, 0x38, 0x1d, 0x42, 0xfc, 0x0b, 0xc8, 0xc5, 0x8b, 0x5a,
|
||||
0xf3, 0x19, 0xca, 0xff, 0xf5, 0x3b, 0xef, 0x15, 0x4e, 0xf4, 0xcc, 0xbe,
|
||||
0xe8, 0x42, 0x69, 0x68, 0xf4, 0xfc, 0xd3, 0xc3, 0xf0, 0x5d, 0x03, 0x89,
|
||||
0x4a, 0xae, 0xb0, 0x13, 0x43, 0x39, 0xaa, 0x45, 0xb2, 0x41, 0x38, 0xf8,
|
||||
0x20, 0x2d, 0xd1, 0x1f, 0x3c, 0xc4, 0xaa, 0xf1, 0x40, 0xd0, 0x26, 0xe4,
|
||||
0x81, 0x74, 0x41, 0xc1, 0xb4, 0xd0, 0x64, 0x8d, 0xf9, 0xdd, 0x9a, 0x4b,
|
||||
0x38, 0x45, 0x02, 0xcc, 0x01, 0x65, 0x25, 0x72, 0x24, 0x2b, 0xba, 0x3f,
|
||||
0x2c, 0x1a, 0xbf, 0xc6, 0x3c, 0xcf, 0xa1, 0xef, 0x9c, 0xda, 0x2e, 0x9b,
|
||||
0x14, 0xc7, 0x81, 0x65, 0x85, 0xc3, 0x24, 0x2c, 0x65, 0xc6, 0x51, 0x3a,
|
||||
0xd3, 0xc1, 0xd1, 0xd5, 0x42, 0x8f, 0x2f, 0x3c, 0x0e, 0x61, 0xaf, 0xb8,
|
||||
0xf6, 0x3c, 0x32, 0x1a, 0x9f, 0x28, 0x91, 0x9d, 0x02, 0x18, 0x97, 0x47,
|
||||
0x79, 0x21, 0xf9, 0x61, 0x40, 0x5c, 0x16, 0xa9, 0xc5, 0x6e, 0xca, 0x9f,
|
||||
0x37, 0xf1, 0x2a, 0x13, 0xf1, 0xf0, 0xf0, 0xef, 0xb4, 0x56, 0xf4, 0x08,
|
||||
0x6a, 0x47, 0x52, 0x4a, 0x32, 0xbf, 0xb3, 0xab, 0xa5, 0xdf, 0x36, 0x12,
|
||||
0x63, 0x5f, 0x2e, 0xc2, 0xf4, 0x17, 0xa4, 0x0c, 0xfd, 0xeb, 0x3d, 0xe9,
|
||||
0xc7, 0x1d, 0x97, 0x5e, 0x52, 0x61, 0x75, 0x96, 0xfb, 0x11, 0x60, 0xcd,
|
||||
0xf8, 0xca, 0xa8, 0x11, 0xdc, 0x6e, 0xcd, 0x59, 0xf3, 0x37, 0x41, 0xd6,
|
||||
0x61, 0xb3, 0x74, 0xe5, 0xa8, 0xc1, 0x51, 0xf5, 0xa2, 0x57, 0x2e, 0x32,
|
||||
0xe4, 0x0e, 0xd2, 0xed, 0x73, 0xca, 0x58, 0x7a, 0x81, 0x16, 0x9c, 0xa0,
|
||||
0xa0, 0xc0, 0xaa, 0x65, 0xe0, 0x3f, 0x43, 0xb7, 0x03, 0xb0, 0x35, 0x84,
|
||||
0x61, 0xf6, 0x60, 0x0e, 0x18, 0xb3, 0x0a, 0xc0, 0x59, 0x98, 0x57, 0x80,
|
||||
0x7e, 0x26, 0x8b, 0x26, 0x0f, 0x94, 0x44, 0xbc, 0xc9, 0x71, 0xf8, 0x19,
|
||||
0x9a, 0x3b, 0x0a, 0xea, 0x9a, 0xc0, 0x41, 0x26, 0x9b, 0x50, 0xe7, 0x5d,
|
||||
0x1b, 0x59, 0x22, 0x26, 0x79, 0x3a, 0xae, 0x39, 0x61, 0x13, 0x9c, 0x8f,
|
||||
0x8e, 0xd0, 0xbf, 0x84, 0xb8, 0xca, 0x3f, 0x71, 0x41, 0x70, 0x35, 0x88,
|
||||
0x03, 0x63, 0x0d, 0xc5, 0x1a, 0xcb, 0x63, 0x11, 0x32, 0x90, 0xb6, 0xaa,
|
||||
0xfb, 0xdc, 0xd9, 0xc3, 0xa1, 0x93, 0x41, 0xe8, 0xa1, 0xfb, 0x2d, 0x88,
|
||||
0x9e, 0xe6, 0x37, 0x21, 0xb2, 0xbe, 0xfc, 0x64, 0x18, 0x37, 0x87, 0xbc,
|
||||
0x36, 0xf2, 0xe4, 0x08, 0x5e, 0x87, 0x5f, 0x78, 0xbc, 0xbd, 0x4c, 0x91,
|
||||
0x53, 0xb5, 0xf3, 0x3c, 0xe9, 0x8c, 0x1d, 0xa7, 0x0a, 0x95, 0x90, 0x55,
|
||||
0xfd, 0xfd, 0x61, 0xd1, 0x38, 0x21, 0xca, 0x5c, 0x8f, 0xc0, 0xc9, 0x39,
|
||||
0x81, 0x8e, 0x2d, 0x7c, 0xa7, 0xab, 0x84, 0xef, 0x09, 0xd3, 0x1f, 0xb2,
|
||||
0xc8, 0xe7, 0x6a, 0x9c, 0xe5, 0x0d, 0xea, 0x15, 0x0f, 0xdf, 0x55, 0x7e,
|
||||
0x25, 0x01, 0xad, 0x36, 0xdc, 0xfe, 0x2c, 0xc2, 0xf5, 0xd1, 0x57, 0xef,
|
||||
0xf2, 0x1d, 0xdd, 0x82, 0xb0, 0x20, 0xbf, 0xfe, 0x8a, 0xa8, 0x4d, 0xb5,
|
||||
0xd2, 0x03, 0x0d, 0x49, 0x43, 0xaf, 0x4a, 0xac, 0x95, 0x64, 0x6b, 0x62,
|
||||
0x6e, 0x75, 0x84, 0x85, 0x56, 0x8f, 0x99, 0x6d, 0xfa, 0xb4, 0x37, 0x30,
|
||||
0xc4, 0x06, 0x82, 0x32, 0xf0, 0x86, 0x6e, 0x5f, 0xde, 0x62, 0xa3, 0x61,
|
||||
0xdc, 0x17, 0x37, 0x5c, 0xc8, 0x9b, 0x78, 0x6a, 0xf1, 0xa2, 0x77, 0x76,
|
||||
0x44, 0x93, 0xbe, 0x6b, 0x71, 0x39, 0x0a, 0x35, 0x86, 0xa3, 0x4c, 0x84,
|
||||
0x0f, 0xb1, 0xbf, 0x51, 0x88, 0x18, 0x88, 0x57, 0x09, 0x97, 0x55, 0xdf,
|
||||
0x29, 0xe6, 0xff, 0xaa, 0xaf, 0x7b, 0x27, 0x29, 0xca, 0xf5, 0x11, 0x64,
|
||||
0xa2, 0x2e, 0xb9, 0x99, 0xc5, 0xc4, 0x56, 0x1b, 0x03, 0x0c, 0xf1, 0x7e,
|
||||
0x9b, 0xf1, 0x8b, 0x57, 0xc7, 0x4c, 0x4a, 0x05, 0x84, 0x78, 0x67, 0x3c,
|
||||
0x82, 0xee, 0xe4, 0x55, 0xfb, 0xf2, 0x2e, 0xcb, 0x3a, 0x64, 0x88, 0x44,
|
||||
0x15, 0x1b, 0x23, 0xaa, 0xe9, 0x9a, 0x04, 0xbd, 0xb4, 0xbd, 0x34, 0x28,
|
||||
0x84, 0x34, 0x55, 0x13, 0x2c, 0xbd, 0x43, 0x1c, 0x3b, 0xaf, 0x1d, 0x95,
|
||||
0x28, 0x18, 0x5f, 0xe7, 0x33, 0xa2, 0x4c, 0x58, 0xca, 0x42, 0xbe, 0x9e,
|
||||
0x8e, 0x72, 0xae, 0xf1, 0x08, 0x40, 0x8f, 0x55, 0x61, 0x68, 0xa3, 0x2e,
|
||||
0xff, 0x75, 0xe7, 0x38, 0x44, 0x68, 0x4a, 0x40, 0x05, 0x3f, 0x64, 0xf2,
|
||||
0xf3, 0xd7, 0x8d, 0xd4, 0x3d, 0x69, 0x2e, 0xc9, 0x94, 0x3f, 0xc8, 0x75,
|
||||
0xa1, 0xa1, 0xe5, 0x0b, 0x26, 0xec, 0x36, 0xe9, 0x29, 0x67, 0x4b, 0xc9,
|
||||
0x2b, 0x0f, 0x4b, 0xa0, 0x56, 0xaf, 0x8b, 0x81, 0xea, 0x11, 0xf5, 0x42,
|
||||
0xd3, 0xf2, 0x6e, 0x91, 0xf3, 0x35, 0x60, 0xe6, 0xa0, 0x80, 0x09, 0x45,
|
||||
0xfe, 0x29, 0x4c, 0xde, 0x96, 0x76, 0x6e, 0x27, 0xfc, 0x64, 0xd3, 0xf7,
|
||||
0xb4, 0xbf, 0xfa, 0x8c, 0x13, 0x68, 0x52, 0xf7, 0x9c, 0x86, 0x74, 0xe1,
|
||||
0x8a, 0x01, 0x97, 0x73, 0x69, 0x29, 0x21, 0x0d, 0xae, 0xcf, 0xa7, 0x83,
|
||||
0xf2, 0x8b, 0x93, 0x8d, 0xef, 0xf2, 0x7c, 0xc1, 0xfd, 0x50, 0xca, 0x95,
|
||||
0x53, 0x77, 0x46, 0xa8, 0xe0, 0xb9, 0x6f, 0x4e, 0xe5, 0x55, 0x35, 0x2b,
|
||||
0x6b, 0x67, 0x5e, 0x4e, 0x24, 0x51, 0x85, 0xf3, 0x19, 0x3d, 0x4c, 0x5c,
|
||||
0x94, 0x7c, 0xb4, 0xe5, 0x49, 0xe8, 0xdf, 0xdd, 0x34, 0x4c, 0x64, 0x13,
|
||||
0x6e, 0x67, 0x6a, 0xc6, 0x5e, 0x82, 0x1f, 0xdc, 0x0e, 0xf6, 0x15, 0x2a,
|
||||
0x6f, 0xdd, 0x3a, 0x5c, 0x7d, 0x20, 0xbf, 0xd5, 0x89, 0xa1, 0x25, 0x2f,
|
||||
0x59, 0xe7, 0xca, 0xa2, 0xb4, 0xde, 0x72, 0x2c, 0xe8, 0xe6, 0xc5, 0x3d,
|
||||
0x93, 0xa5, 0xe0, 0x47, 0x7d, 0xe5, 0x65, 0x58, 0x59, 0xec, 0x62, 0x79,
|
||||
0xc5, 0x69, 0x21, 0xfb, 0x12, 0x45, 0xe7, 0xb3, 0xa0, 0x5c, 0xba, 0xfb,
|
||||
0x70, 0x38, 0x8b, 0x80, 0x95, 0x90, 0x72, 0x85, 0xf8, 0x61, 0xb3, 0x6f,
|
||||
0x5f, 0x9d, 0x2d, 0x36, 0x9f, 0xe0, 0xeb, 0xc2, 0xd2, 0xcd, 0x33, 0x5a,
|
||||
0x26, 0x78, 0xa7, 0x7f, 0x24, 0x52, 0x52, 0x3a, 0xe6, 0xf6, 0xf4, 0xa0,
|
||||
0x9c, 0x52, 0x1d, 0xd5, 0x26, 0x5d, 0x9a, 0x7b, 0x9f, 0xba, 0x63, 0x6a,
|
||||
0xda, 0xb9, 0xed, 0xec, 0x37, 0x8b, 0x24, 0x76, 0xcf, 0x1d, 0xa0, 0x3e,
|
||||
0x1e, 0xc7, 0x60, 0x73, 0xc5, 0x5b, 0x7f, 0x93, 0x84, 0x62, 0x9b, 0xe8,
|
||||
0x28, 0x07, 0xac, 0x77, 0xe7, 0xb3, 0x7d, 0x6f, 0x51, 0x91, 0xc7, 0xf3,
|
||||
0x4d, 0x17, 0xeb, 0xe7, 0xc5, 0x31, 0x1e, 0x2d, 0x75, 0x2e, 0x30, 0xd8,
|
||||
0xe8, 0x75, 0x4c, 0x37, 0x7a, 0xd6, 0x5c, 0x75, 0x1d, 0xc0, 0xb4, 0x99,
|
||||
0xa2, 0x49, 0xe0, 0x72, 0xe2, 0xb3, 0x30, 0xed, 0x8b, 0xa7, 0x7e, 0x07,
|
||||
0x79, 0x36, 0x77, 0xee, 0x15, 0x71, 0x1f, 0xe0, 0x0a, 0x98, 0x0a, 0xee,
|
||||
0xcf, 0x0c, 0x59, 0xcc, 0xc7, 0x48, 0x50, 0xd3, 0xea, 0x41, 0xe1, 0x66,
|
||||
0xd4, 0x3b, 0x24, 0xe9, 0x63, 0x4c, 0x16, 0xec, 0x51, 0x8e, 0x06, 0xc2,
|
||||
0x11, 0x53, 0x58, 0x35, 0xd0, 0xd1, 0x77, 0x43, 0x59, 0x7f, 0xdb, 0x35,
|
||||
0xe6, 0xea, 0x04, 0x1b, 0x69, 0x2e, 0x03, 0x2e, 0x5e, 0xa9, 0x67, 0xc7,
|
||||
0x24, 0x52, 0xef, 0x5e, 0x1d, 0x8c, 0xe8, 0xa3, 0xa4, 0x8e, 0xc4, 0xcb,
|
||||
0x5d, 0x8a, 0x57, 0x31, 0xdf, 0x3c, 0x38, 0xdf, 0xe6, 0xaf, 0x21, 0x77,
|
||||
0x49, 0x02, 0xbc, 0x32, 0xde, 0x1e, 0x9f, 0x6a, 0x95, 0x9f, 0x94, 0x3b,
|
||||
0x84, 0xdc, 0xea, 0x0b, 0x09, 0x76, 0x2f, 0x93, 0x70, 0x12, 0x8c, 0xb6,
|
||||
0xd0, 0x20, 0xc3, 0xe2, 0x94, 0x8a, 0xb6, 0x2f, 0x9a, 0x03, 0xef, 0x5b,
|
||||
0xc0, 0x47, 0xbf, 0xd0, 0xa7, 0x90, 0xe6, 0x13, 0xac, 0xc9, 0x2e, 0x10,
|
||||
0xef, 0x10, 0xd1, 0x81, 0x65, 0x5d, 0xfa, 0x50, 0x65, 0xc0, 0xd6, 0x59,
|
||||
0x3a, 0xe0, 0x5c, 0x94, 0xbd, 0xf8, 0xc6, 0x25, 0x85, 0x61, 0x2f, 0xa5,
|
||||
0x5c, 0x0d, 0x7e, 0xe1, 0xa8, 0x04, 0x3b, 0x1f, 0x61, 0x34, 0x4b, 0x30,
|
||||
0xf3, 0x84, 0x8e, 0x89, 0xb1, 0x58, 0xe2, 0x48, 0xf4, 0x79, 0x7f, 0x5f,
|
||||
0x95, 0x1d, 0xe7, 0x71, 0x47, 0x5d, 0x43, 0x69, 0xd4, 0x7b, 0xe6, 0x87,
|
||||
0x9e, 0x11, 0x12, 0x2a, 0x4f, 0xf7, 0x0c, 0xfb, 0x3c, 0x0b, 0x1d, 0xe7,
|
||||
0xa3, 0x0b, 0xdf, 0xc7, 0xd1, 0x35, 0xdb, 0x7d, 0x58, 0x7b, 0x46, 0x40,
|
||||
0x3e, 0xf6, 0xc1, 0xb6, 0x22, 0x99, 0x13, 0xd0, 0xd9, 0x3f, 0x28, 0xc5,
|
||||
0xef, 0xeb, 0x6a, 0xda, 0xf5, 0xfb, 0x2d, 0x9d, 0x3c, 0x23, 0x23, 0x7d,
|
||||
0x1f, 0x81, 0x55, 0xaf, 0xd4, 0xec, 0x7b, 0x09, 0x79, 0xe1, 0x90, 0xde,
|
||||
0xe3, 0xff, 0x9a, 0x13, 0x2b, 0x4e, 0x70, 0x5c, 0x63, 0x72, 0x88, 0xfa,
|
||||
0x74, 0x4f, 0xb7, 0xd1, 0x33, 0x3b, 0x8a, 0xec, 0x2e, 0x9b, 0x77, 0x0b,
|
||||
0x8c, 0x3a, 0x91, 0x2c, 0x63, 0x3c, 0x03, 0x40, 0x1e, 0x78, 0x83, 0x4c,
|
||||
0xcc, 0x0a, 0x3b, 0x99, 0x8d, 0x10, 0x54, 0x79, 0x3e, 0x85, 0x9d, 0xab,
|
||||
0x2f, 0xd6, 0x9b, 0xab, 0x63, 0x85, 0x7a, 0x80, 0xe2, 0x43, 0xc0, 0x31,
|
||||
0xa9, 0x77, 0x9a, 0x12, 0xf6, 0xcb, 0x8d, 0xfb, 0x65, 0xed, 0xb7, 0x11,
|
||||
0xff, 0x5c, 0xe0, 0x8f, 0x16, 0xc6, 0x9b, 0x36, 0x56, 0x2b, 0x8a, 0xe1,
|
||||
0x9b, 0xe1, 0xfc, 0x01, 0x3f, 0xa4, 0x49, 0x5d, 0x59, 0x19, 0xbd, 0xbe,
|
||||
0x17, 0x49, 0xe5, 0xa1, 0xa7, 0xf7, 0x26, 0x19, 0xa4, 0x0f, 0xd3, 0x5b,
|
||||
0x74, 0xa9, 0xfe, 0x53, 0x88, 0x51, 0xa8, 0x9c, 0x3f, 0xde, 0xbd, 0x19,
|
||||
0xa0, 0x40, 0x31, 0x50, 0x1f, 0x8b, 0x92, 0x97, 0xb2, 0x1c, 0xc7, 0xb0,
|
||||
0xdd, 0xd5, 0xae, 0x88, 0x92, 0x00, 0x4a, 0xd7, 0xb7, 0xf8, 0x02, 0xaa,
|
||||
0x25, 0xbb, 0x05, 0x89, 0x78, 0xda, 0x9c, 0x00, 0xb5, 0x48, 0x2c, 0x0d,
|
||||
0xf3, 0xfa, 0xfc, 0x4e, 0x6f, 0x3d, 0x96, 0x74, 0x92, 0xb5, 0x16, 0x01,
|
||||
0x88, 0xb2, 0x4a, 0x9c, 0x43, 0x35, 0x75, 0xef, 0x3d, 0x6e, 0xd0, 0x92,
|
||||
0xc0, 0x24, 0xf6, 0xd6, 0xc0, 0x01, 0xef, 0x23, 0xb0, 0x6e, 0x27, 0x21,
|
||||
0x5e, 0xa1, 0x8c, 0x0f, 0x69, 0xbc, 0x09, 0x47, 0x2c, 0x13, 0x5d, 0xba,
|
||||
0x32, 0x3c, 0x37, 0x62, 0x3a, 0xdf, 0x38, 0x5a, 0x17, 0xe2, 0xfc, 0xe3,
|
||||
0x8e, 0xe2, 0xd6, 0x6d, 0x50, 0x1b, 0xd1, 0xcc, 0x4b, 0x9d, 0x66, 0x0a,
|
||||
0x90, 0x85, 0x01, 0x3b, 0xa2, 0x77, 0xd4, 0x95, 0x90, 0x63, 0x49, 0x5e,
|
||||
0x27, 0xe7, 0xab, 0xc5, 0xf1, 0xf9, 0xa8, 0xf2, 0x40, 0xb1, 0x14, 0x35,
|
||||
0x4d, 0x69, 0x4c, 0x51, 0x3b, 0x9b, 0x10, 0x50, 0x70, 0x34, 0xf4, 0xbe,
|
||||
0x14, 0x88, 0xb5, 0x40, 0x1a, 0x68, 0x74, 0x40, 0x4c, 0xa3, 0xa7, 0x0d,
|
||||
0x32, 0x64, 0xaa, 0xef, 0xf5, 0x7b, 0x1a, 0x60, 0x1d, 0xfc, 0x33, 0xf2,
|
||||
0x50, 0xc6, 0x39, 0x28, 0x53, 0xe7, 0x98, 0xbf, 0xbd, 0x1e, 0xac, 0x80,
|
||||
0x35, 0x5d, 0x7a, 0x18, 0x96, 0x8f, 0xb1, 0x41, 0xc2, 0xcb, 0x7d, 0xd0,
|
||||
0x75, 0xd4, 0xc2, 0x11, 0x78, 0xd8, 0xa1, 0x98, 0x53, 0x1c, 0x59, 0x72,
|
||||
0xac, 0xc1, 0x37, 0x0f, 0x42, 0x13, 0x0b, 0x98, 0xf9, 0x6e, 0x6f, 0x36,
|
||||
0x53, 0x8d, 0x66, 0x46, 0x65, 0xf0, 0x27, 0xd3, 0xe3, 0xf0, 0x10, 0x5d,
|
||||
0x1b, 0xae, 0x8d, 0x49, 0xec, 0xe6, 0x40, 0xfc, 0xfa, 0xbe, 0x55, 0x60,
|
||||
0x4b, 0xfe, 0xd0, 0xca, 0x6a, 0x45, 0xd0, 0xd5, 0xe1, 0x5f, 0x20, 0x67,
|
||||
0x09, 0x4e, 0x6d, 0x59, 0xef, 0xba, 0xec, 0x57, 0x41, 0xfa, 0x62, 0x1c,
|
||||
0x54, 0xa4, 0x74, 0x46, 0xd1, 0x91, 0x48, 0xc9, 0xa6, 0x07, 0x01, 0xd1,
|
||||
0x43, 0xa0, 0xe7, 0x7f, 0x35, 0xa0, 0x6f, 0xe4, 0x57, 0xb0, 0xb8, 0x99,
|
||||
0x7c, 0x93, 0x4a, 0x0d, 0x4b, 0x0a, 0xd6, 0x24, 0xb2, 0x27, 0xd1, 0xa8,
|
||||
0x2e, 0x5b, 0x3c, 0xcc, 0x17, 0xb2, 0x8a, 0x70, 0x93, 0x2b, 0x00, 0x96,
|
||||
0x2d, 0x90, 0x4d, 0x67, 0x62, 0xb8, 0xc6, 0xd1, 0x46, 0xda, 0x3b, 0x6d,
|
||||
0xdf, 0xd6, 0x03, 0xf2, 0x01, 0xa2, 0x89, 0x6c, 0x50, 0xd5, 0xf0, 0xb1,
|
||||
0xd2, 0x24, 0xdd, 0x02, 0x42, 0xde, 0x1d, 0x5b, 0x00, 0xe0, 0x5f, 0x5f,
|
||||
0x31, 0xf8, 0x59, 0x9d, 0xc8, 0xa4, 0x70, 0x4d, 0x49, 0x54, 0xc3, 0x94,
|
||||
0xbc, 0x58, 0x2e, 0x03, 0x02, 0xba, 0x43, 0x2b, 0xfd, 0x0f, 0x9c, 0x0f,
|
||||
0x91, 0x28, 0xf4, 0x3b, 0xe7, 0xb1, 0x3b, 0x69, 0xbd, 0x6a, 0x8f, 0x20,
|
||||
0xab, 0x8f, 0xd2, 0x5a, 0xf4, 0x00, 0x92, 0xcd, 0x45, 0xd5, 0x96, 0x37,
|
||||
0x31, 0x0e, 0xfd, 0x75, 0xda, 0xa4, 0x0c, 0x57, 0xcf, 0x7b, 0x1b, 0xf5,
|
||||
0xa9, 0xcd, 0xff, 0xaf, 0xe8, 0x54, 0x52, 0x8a, 0x9e, 0x03, 0x97, 0x5e,
|
||||
0x62, 0x3f, 0x09, 0x6d, 0x54, 0x61, 0x7d, 0xfc, 0x7a, 0x33, 0x85, 0x38,
|
||||
0x9a, 0x67, 0x4d, 0xb2, 0x24, 0xa7, 0x7d, 0x33, 0xff, 0x3d, 0xe5, 0x7f,
|
||||
0x7d, 0x09, 0x60, 0x87, 0xa6, 0xe4, 0x96, 0x2d, 0x3d, 0x1a, 0xa4, 0x3d,
|
||||
0x2e, 0x49, 0xcd, 0xb3, 0x62, 0x45, 0xa9, 0x84, 0xb3, 0xd8, 0xa5, 0x94,
|
||||
0x07, 0xf0, 0x67, 0x39, 0xbc, 0x85, 0x9d, 0x3f, 0x14, 0xd2, 0x53, 0x83,
|
||||
0x2e, 0x85, 0x89, 0x69, 0xc7, 0xe7, 0x88, 0xbf, 0x3e, 0x1d, 0x40, 0x53,
|
||||
0x95, 0xc8, 0x78, 0x03, 0x87, 0x80, 0x93, 0x9c, 0x88, 0x32, 0x70, 0x2e,
|
||||
0x91, 0x7b, 0x8f, 0x2b, 0x83, 0xd7, 0x32, 0x88, 0x5c, 0x94, 0x65, 0x4b,
|
||||
0x1a, 0x31, 0xe1, 0x16, 0x25, 0x03, 0x6e, 0xfd, 0x91, 0x7c, 0x33, 0x81,
|
||||
0xcd, 0x36, 0xbb, 0xf5, 0xd2, 0x7a, 0x65, 0x29, 0x34, 0xd7, 0x0e, 0x58,
|
||||
0x75, 0xef, 0xda, 0x5e, 0xc0, 0x38, 0x16, 0x02, 0xff, 0x42, 0x1a, 0xad,
|
||||
0xc5, 0x17, 0x61, 0x89, 0x83, 0xe1, 0xc0, 0x2c, 0x5f, 0xae, 0xaa, 0x4e,
|
||||
0x0f, 0x4c, 0xd6, 0xe6, 0x14, 0xf2, 0xe9, 0x06, 0xc0, 0x16, 0x05, 0x9d,
|
||||
0xd4, 0xa3, 0x32, 0x69, 0xa8, 0x8f, 0x51, 0x8c, 0x23, 0xfe, 0x66, 0x8b,
|
||||
0x79, 0x79, 0xc2, 0x6c, 0xe8, 0xff, 0x1f, 0x24, 0xf9, 0x7e, 0xe1, 0x17,
|
||||
0x23, 0x65, 0xf1, 0x53, 0x8e, 0x74, 0xd5, 0xb8, 0xc8, 0x95, 0x65, 0x00,
|
||||
0xf3, 0x5f, 0x88, 0x99, 0x77, 0x8f, 0x71, 0xe5, 0xac, 0xee, 0x85, 0x4a,
|
||||
0x22, 0x8b, 0x3b, 0xb9, 0xa6, 0x71, 0x54, 0x0a, 0x03, 0x60, 0x21, 0x82,
|
||||
0x2f, 0xd6, 0x20, 0x91, 0x5d, 0xd9, 0x33, 0x5e, 0x54, 0x48, 0xf7, 0xfb,
|
||||
0x6b, 0xd1, 0xef, 0x89, 0x0e, 0xd6, 0x4a, 0x18, 0x7d, 0x89, 0x19, 0x45,
|
||||
0xae, 0x60, 0x2c, 0x91, 0x0a, 0x2e, 0x9c, 0xae, 0x8b, 0xd4, 0xd8, 0x03,
|
||||
0x3d, 0x33, 0xc1, 0x31, 0x68, 0x7e, 0xed, 0xa8, 0xe3, 0xa8, 0x13, 0x65,
|
||||
0x64, 0xc6, 0x5c, 0x5d, 0x60, 0xef, 0xfa, 0xf1, 0x2d, 0x33, 0x32, 0xcb,
|
||||
0xc5, 0x4d, 0x0b, 0x48, 0xec, 0x84, 0x39, 0x92, 0x9a, 0xdc, 0x60, 0x0b,
|
||||
0x0a, 0x66, 0xd9, 0xaa, 0x04, 0x53, 0x84, 0xe4, 0xeb, 0x71, 0x40, 0x82,
|
||||
0xc6, 0x6c, 0xb8, 0xc0, 0xd1, 0x44, 0x9b, 0xb2, 0xb8, 0xa1, 0x5b, 0x14,
|
||||
0x3d, 0xc7, 0x13, 0x1b, 0xee, 0x19, 0xb0, 0x60, 0xc5, 0x0a, 0xc6, 0x40,
|
||||
0x7f, 0x0c, 0x2e, 0x75, 0x3c, 0x6f, 0x49, 0xba, 0x38, 0x0d, 0x03, 0x54,
|
||||
0x04, 0xa3, 0x51, 0x0e, 0xaf, 0x3e, 0x34, 0xcb, 0x13, 0x22, 0x15, 0xfa,
|
||||
0xc1, 0x16, 0x9e, 0x58, 0x78, 0x47, 0x9c, 0xad, 0x06, 0xcd, 0xf4, 0x10,
|
||||
0xee, 0x0d, 0x78, 0x64, 0x8d, 0x70, 0x45, 0x55, 0xc2, 0x76, 0x45, 0x40,
|
||||
0xf0, 0xe2, 0xfc, 0x0e, 0x8a, 0xc4, 0x78, 0x56, 0x7f, 0x8f, 0x05, 0x16,
|
||||
0x31, 0xd3, 0x42, 0xd7, 0xfe, 0xbb, 0xaa, 0x89, 0x3c, 0x53, 0xd8, 0xa6,
|
||||
0x95, 0x43, 0x72, 0xa2, 0x00, 0x15, 0xe7, 0x83, 0xea, 0x9e, 0xad, 0x69,
|
||||
0x93, 0xc5, 0x9d, 0xfc, 0xc2, 0x8d, 0x3c, 0x58, 0xd8, 0x70, 0xd7, 0xcb,
|
||||
0x65, 0x53, 0xc0, 0x9e, 0x83, 0x7e, 0x02, 0xbf, 0x6a, 0x90, 0x2f, 0xa2,
|
||||
0xbe, 0x5d, 0x68, 0xeb, 0xbd, 0xad, 0xaf, 0xee, 0x91, 0x8a, 0xeb, 0x4e,
|
||||
0x98, 0xc9, 0xed, 0x24, 0x0d, 0x0d, 0x8d, 0xdf, 0x16, 0xf6, 0x8e, 0x10,
|
||||
0x59, 0x38, 0xa4, 0xd1, 0x0c, 0x09, 0x61, 0x06, 0x63, 0xcc, 0x5a, 0x84,
|
||||
0x63, 0x1d, 0x90, 0xc8, 0xfb, 0x7b, 0x49, 0xcb, 0xcf, 0x5b, 0x34, 0x47,
|
||||
0x96, 0x4e, 0xb1, 0x13, 0x66, 0xd5, 0x1f, 0x18, 0x30, 0xdd, 0xd7, 0x9d,
|
||||
0xb8, 0x98, 0x41, 0xf2, 0x13, 0x1d, 0xc0, 0xbc, 0xf9, 0x03, 0x37, 0x48,
|
||||
0x06, 0x89, 0xcf, 0xa8, 0x2e, 0x68, 0x3f, 0x16, 0xf3, 0x41, 0x22, 0xfc,
|
||||
0x11, 0x2d, 0xb2, 0x29, 0x5c, 0x56, 0x0f, 0xf9, 0x84, 0x22, 0xee, 0x59,
|
||||
0xb3, 0x95, 0x0b, 0xc7, 0xf0, 0x3a, 0xe4, 0x08, 0xce, 0x91, 0xdc, 0xa4,
|
||||
0x1e, 0xad, 0x5e, 0x30, 0x9a, 0x0e, 0xe3, 0xb2, 0x81, 0x13, 0x97, 0x57,
|
||||
0x31, 0x8f, 0x9c, 0xe1, 0x92, 0xa3, 0xab, 0x69, 0x94, 0x02, 0x5c, 0x68,
|
||||
0xe9, 0x41, 0x1a, 0xc7, 0x17, 0xde, 0xaf, 0x77, 0xc5, 0x44, 0x2d, 0x6a,
|
||||
0x2f, 0xd0, 0x16, 0x41, 0x40, 0x74, 0x57, 0x01, 0xd0, 0xaa, 0xe6, 0x70,
|
||||
0x13, 0xf5, 0x3f, 0x0a, 0x39, 0x4d, 0xb1, 0x82, 0x87, 0x9b, 0xd6, 0x2b,
|
||||
0xbd, 0xec, 0xa7, 0xcb, 0xdb, 0x57, 0x4e, 0x49, 0xa5, 0x8a, 0xa4, 0xbe,
|
||||
0x83, 0xe6, 0x7c, 0x8b, 0x79, 0x4a, 0x7f, 0x39, 0x23, 0x07, 0xa8, 0xa9,
|
||||
0x4e, 0xbd, 0xc3, 0xbb, 0xcf, 0xbc, 0xe2, 0x48, 0xa3, 0x60, 0xc0, 0x2c,
|
||||
0x8d, 0x60, 0x26, 0x49, 0x07, 0x92, 0x78, 0xdb, 0x99, 0x94, 0x0e, 0x02,
|
||||
0x27, 0x08, 0x9c, 0xc8, 0x23, 0x03, 0xfb, 0x6d, 0x18, 0x89, 0xe7, 0x3c,
|
||||
0x08, 0xe5, 0x93, 0x31, 0xaa, 0x06, 0xa9, 0x86, 0x70, 0x40, 0x9e, 0x08,
|
||||
0x5d, 0x7d, 0x8d, 0xa8, 0xee, 0xa0, 0x31, 0x49, 0x35, 0x99, 0x78, 0xf3,
|
||||
0x97, 0x77, 0x05, 0x5f, 0xb2, 0xc8, 0xdc, 0xd8, 0xec, 0x9d, 0x48, 0xb5,
|
||||
0xa4, 0x13, 0x01, 0x45, 0xe9, 0xe3, 0x84, 0x1c, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x01, 0x00, 0x00, 0x40, 0xff, 0xff, 0xff, 0x3f,
|
||||
0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe, 0x03, 0x62, 0x39, 0x07,
|
||||
0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f, 0x7e, 0x3d, 0xf2, 0x56,
|
||||
0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66,
|
||||
0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
|
||||
0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0x01, 0x00, 0x00, 0x40,
|
||||
0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
|
||||
0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
|
||||
0x7e, 0x3d, 0xf2, 0x56, 0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc,
|
||||
0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
|
||||
0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
|
||||
0x01, 0x00, 0x00, 0x80, 0x54, 0x55, 0x55, 0xd5, 0xa9, 0x4c, 0xa9, 0x2a,
|
||||
0xad, 0x08, 0x1e, 0x1b, 0xaf, 0xde, 0x06, 0x08, 0x5c, 0x89, 0x05, 0x80,
|
||||
0x11, 0x93, 0x58, 0x4d, 0xc5, 0x60, 0x9b, 0x60, 0x34, 0x33, 0x33, 0x33,
|
||||
0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
|
||||
0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
|
||||
0x98, 0x97, 0x8e, 0x45, 0x01, 0x00, 0x00, 0x80, 0x54, 0x55, 0x55, 0xd5,
|
||||
0xa9, 0x4c, 0xa9, 0x2a, 0xad, 0x08, 0x1e, 0x1b, 0xaf, 0xde, 0x06, 0x08,
|
||||
0x5c, 0x89, 0x05, 0x80, 0x11, 0x93, 0x58, 0x4d, 0xc5, 0x60, 0x9b, 0x60,
|
||||
0x25, 0x49, 0x92, 0x24, 0xdb, 0xb6, 0x6d, 0xdb, 0x48, 0x1a, 0x24, 0x49,
|
||||
0xdc, 0x2e, 0x36, 0xaa, 0x4a, 0x62, 0x77, 0x70, 0x4b, 0x62, 0xc7, 0x57,
|
||||
0x82, 0x91, 0x51, 0xe7, 0x60, 0x54, 0x1f, 0x21, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x3e, 0x2c, 0xe4, 0xf5, 0x63, 0xa7, 0xf2, 0x0e,
|
||||
0x51, 0x85, 0x27, 0x20, 0x86, 0xec, 0x5b, 0x36, 0xe4, 0xea, 0xf3, 0x0c,
|
||||
0x61, 0xaa, 0xc6, 0x06, 0x74, 0xa3, 0xcc, 0xc1, 0x24, 0x1c, 0x32, 0x69,
|
||||
0x95, 0x82, 0xd1, 0xb0, 0xf9, 0x5d, 0x48, 0x7e, 0x90, 0xf2, 0x18, 0x4e,
|
||||
0xad, 0x05, 0x11, 0x3b, 0xc9, 0xfa, 0xfe, 0xf4, 0xe4, 0x3a, 0xfc, 0xd1,
|
||||
0xb8, 0x4b, 0xee, 0xab, 0x1c, 0x80, 0x90, 0x0d, 0x01, 0x00, 0x00, 0x40,
|
||||
0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
|
||||
0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
|
||||
0x7e, 0x3d, 0xf2, 0x56, 0xa5, 0xd2, 0xd6, 0x97, 0x8d, 0x8a, 0xf9, 0x2a,
|
||||
0x81, 0x39, 0xfd, 0xef, 0xe5, 0xd1, 0x41, 0x7d, 0xb2, 0xa6, 0x20, 0x46,
|
||||
0x9a, 0xc4, 0x02, 0xcc, 0x8e, 0x38, 0xd8, 0xa2, 0x2d, 0x8f, 0xa7, 0x5e,
|
||||
0xc2, 0x91, 0x1b, 0xf6, 0x2c, 0xb3, 0x21, 0xed, 0x91, 0x8c, 0x3e, 0xb5,
|
||||
0x8e, 0x10, 0x26, 0x1f, 0x86, 0x4d, 0xae, 0x08, 0x65, 0x72, 0x2b, 0x5d,
|
||||
0x7c, 0x64, 0x10, 0xfa, 0xc1, 0xa6, 0x20, 0x55, 0x34, 0x33, 0x33, 0x33,
|
||||
0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
|
||||
0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
|
||||
0x98, 0x97, 0x8e, 0x45, 0xc2, 0x91, 0x1b, 0xf6, 0x2c, 0xb3, 0x21, 0xed,
|
||||
0x91, 0x8c, 0x3e, 0xb5, 0x8e, 0x10, 0x26, 0x1f, 0x86, 0x4d, 0xae, 0x08,
|
||||
0x65, 0x72, 0x2b, 0x5d, 0x7c, 0x64, 0x10, 0xfa, 0xc1, 0xa6, 0x20, 0x55,
|
||||
0x54, 0x24, 0x25, 0x3c, 0xb1, 0x71, 0x3e, 0xfe, 0x4e, 0xcd, 0x04, 0x70,
|
||||
0xb5, 0xcd, 0x65, 0x35, 0x9b, 0xb9, 0x69, 0x70, 0xfc, 0x9e, 0xc1, 0xa4,
|
||||
0x2c, 0xe6, 0xb5, 0xd6, 0x38, 0x75, 0x07, 0x52, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xa2, 0x92, 0xa6, 0xd6, 0xd4, 0x73, 0x2f, 0x8d,
|
||||
0x27, 0x96, 0x8a, 0xc6, 0xc2, 0xbe, 0x74, 0x03, 0x5f, 0x90, 0x8e, 0xac,
|
||||
0x46, 0x55, 0x68, 0x45, 0x37, 0x1a, 0xb9, 0xf9, 0x97, 0x93, 0xd3, 0x59,
|
||||
0x64, 0xe7, 0x4f, 0xee, 0x6e, 0x02, 0x20, 0x8d, 0xbb, 0xe5, 0x84, 0x23,
|
||||
0xf2, 0x41, 0x5f, 0x9f, 0xb2, 0xcf, 0xe4, 0x7d, 0xa9, 0x3a, 0xde, 0xdf,
|
||||
0xd5, 0xb6, 0x90, 0xd5, 0x24, 0xa0, 0xe0, 0x47, 0xd4, 0xfc, 0x91, 0xd8,
|
||||
0xff, 0x9b, 0x40, 0xeb, 0x00, 0x2b, 0x35, 0x8c, 0x86, 0x3c, 0x71, 0xa0,
|
||||
0x84, 0xfc, 0x18, 0xf1, 0x16, 0x08, 0x99, 0xe5, 0x0c, 0x47, 0x83, 0xcb,
|
||||
0x7e, 0x7f, 0x96, 0x58, 0x72, 0x40, 0x10, 0x59, 0x28, 0x31, 0x2f, 0x0c,
|
||||
0x1c, 0xa1, 0x19, 0x7f, 0x78, 0x5a, 0x5b, 0x8b, 0x40, 0xe2, 0x52, 0xa3,
|
||||
0xda, 0xea, 0xec, 0x8f, 0x71, 0xbc, 0x5d, 0x9a, 0xb2, 0xe8, 0x86, 0x6b,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xf6, 0xd4, 0x61, 0x0e,
|
||||
0xe7, 0x6c, 0x28, 0x1a, 0x40, 0x1b, 0x06, 0x94, 0x15, 0xb8, 0x36, 0x75,
|
||||
0xd6, 0x24, 0xc8, 0x9b, 0x0d, 0xde, 0x46, 0xc4, 0x4d, 0x88, 0x8c, 0xb5,
|
||||
0x17, 0xfd, 0x3a, 0x6b, 0x8b, 0xfc, 0x1a, 0xd8, 0x20, 0x32, 0xfa, 0x3d,
|
||||
0xe4, 0xac, 0xa3, 0xc4, 0x8c, 0xce, 0xac, 0xbf, 0x42, 0x77, 0x63, 0x24,
|
||||
0x8d, 0x33, 0x7f, 0xc1, 0x04, 0x61, 0x88, 0x6e, 0xae, 0x24, 0x52, 0x2f,
|
||||
0xc6, 0xe7, 0x50, 0x37, 0x91, 0x5c, 0xb7, 0x6a, 0x69, 0xeb, 0xfc, 0x51,
|
||||
0x70, 0x80, 0xa5, 0x77, 0x83, 0xe5, 0x3e, 0xba, 0x99, 0xfa, 0x5b, 0xfa,
|
||||
0x44, 0xc2, 0x17, 0xe5, 0x7b, 0xab, 0xfe, 0x58, 0x07, 0x0a, 0x3e, 0x04,
|
||||
0x21, 0x6c, 0x74, 0x03, 0x24, 0x15, 0x15, 0x1a, 0x63, 0xa3, 0xf5, 0xaf,
|
||||
0x38, 0xf9, 0xf0, 0x3f, 0x1c, 0xf2, 0x2f, 0xba, 0x9e, 0x7d, 0xac, 0x24,
|
||||
0x16, 0xc3, 0x93, 0x1e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x7c, 0x21, 0x29, 0xeb, 0xdd, 0xd9, 0xc2, 0xc2, 0x87, 0x30, 0x82, 0xe0,
|
||||
0xe9, 0xa7, 0x35, 0x64, 0xc9, 0x67, 0xa9, 0xed, 0xd0, 0xc5, 0x91, 0x3b,
|
||||
0xc9, 0xf2, 0xd4, 0xd9, 0xc5, 0x89, 0x91, 0x4f, 0xc3, 0xd3, 0xbe, 0x3f,
|
||||
0xb1, 0x31, 0x98, 0x25, 0x25, 0x83, 0x24, 0xcd, 0x54, 0x99, 0xdb, 0x6f,
|
||||
0xa7, 0x2d, 0x31, 0xc4, 0x53, 0xe1, 0x69, 0xa6, 0x35, 0xd5, 0x8d, 0x11,
|
||||
0x70, 0xfa, 0x26, 0x1e, 0x28, 0xbd, 0xfe, 0x69, 0x57, 0x63, 0x6c, 0x33,
|
||||
0xe6, 0xb6, 0x10, 0x41, 0xb8, 0xbe, 0x1f, 0xf6, 0x3a, 0xbe, 0xb5, 0x6a,
|
||||
0x57, 0x66, 0xd0, 0xe4, 0x2a, 0x6b, 0xc3, 0xaa, 0x4f, 0xf2, 0xba, 0x5b,
|
||||
0xd7, 0xbe, 0xb3, 0xcc, 0x01, 0x81, 0x30, 0xdc, 0x9a, 0xd6, 0xab, 0x6a,
|
||||
0x87, 0x56, 0x69, 0x23, 0xef, 0x37, 0xac, 0xbc, 0xaf, 0xec, 0x35, 0xea,
|
||||
0x74, 0xc9, 0xbf, 0x9d, 0x06, 0x76, 0xbc, 0x1d, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xcb, 0xf6, 0x96, 0x28, 0xa7, 0x00, 0x6b, 0x60,
|
||||
0x4b, 0xd1, 0x5a, 0x13, 0x03, 0xa3, 0xda, 0x7d, 0xb6, 0x2d, 0xaa, 0x2b,
|
||||
0x12, 0xf8, 0x5b, 0x0c, 0x81, 0xb4, 0x61, 0x51, 0x55, 0x40, 0x5c, 0x5e,
|
||||
0xb1, 0x5a, 0x71, 0xf9, 0x06, 0x13, 0xed, 0x2b, 0x89, 0xc0, 0x01, 0x55,
|
||||
0xa8, 0xaa, 0x45, 0x36, 0xc8, 0x84, 0xfc, 0x78, 0x69, 0x40, 0x55, 0x89,
|
||||
0x88, 0xbf, 0x02, 0xd3, 0xa7, 0xd9, 0xc5, 0x48, 0x6a, 0x73, 0xaa, 0x9b,
|
||||
0x54, 0x06, 0x6c, 0x88, 0x2b, 0xef, 0x5a, 0x3f, 0x70, 0xbd, 0xb8, 0x38,
|
||||
0x75, 0x9d, 0xe1, 0xf2, 0x8a, 0x8a, 0x69, 0x26, 0x6f, 0x07, 0xf1, 0x5b,
|
||||
0x00, 0x0a, 0x28, 0x41, 0x3a, 0xb1, 0xf8, 0x8d, 0xaa, 0x7f, 0xd5, 0x90,
|
||||
0x34, 0xdf, 0xc9, 0xa0, 0x59, 0x13, 0x36, 0x53, 0x28, 0x15, 0xed, 0x06,
|
||||
0x7c, 0x1c, 0x2d, 0x43, 0xab, 0x50, 0x3c, 0xc9, 0x4b, 0x14, 0xda, 0x36,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x7a, 0xc0, 0x5b, 0x5c,
|
||||
0x19, 0x50, 0x23, 0xa3, 0x8b, 0x3d, 0x80, 0x09, 0x63, 0x38, 0x5e, 0x65,
|
||||
0x16, 0xd0, 0xa3, 0xa3, 0xe7, 0x28, 0x5a, 0x5f, 0xa7, 0x0a, 0x4f, 0x0c,
|
||||
0x34, 0x95, 0x26, 0x70, 0x85, 0x11, 0x97, 0x36, 0xa2, 0x76, 0x64, 0xd9,
|
||||
0x7a, 0x8e, 0xa6, 0xa1, 0x51, 0x9a, 0x89, 0xda, 0x38, 0x46, 0xd0, 0x68,
|
||||
0x30, 0xde, 0x70, 0xf8, 0x88, 0x7d, 0xa9, 0x19, 0x45, 0x62, 0xb0, 0x6f,
|
||||
0xf3, 0xc4, 0xd7, 0xfd, 0x95, 0xb9, 0xd1, 0x1c, 0x7e, 0xb5, 0x58, 0xa6,
|
||||
0x63, 0xaf, 0xcb, 0x4b, 0x52, 0x83, 0x85, 0x0e, 0xed, 0x33, 0xfa, 0xb3,
|
||||
0x61, 0x90, 0x61, 0x68, 0xc2, 0xba, 0x54, 0x5d, 0x23, 0xc8, 0xfb, 0x0e,
|
||||
0x7d, 0x7a, 0x8c, 0xa9, 0x09, 0x2a, 0x21, 0x4e, 0x23, 0x02, 0xf2, 0x0d,
|
||||
0x84, 0xd1, 0xab, 0x8a, 0x13, 0x8c, 0x9a, 0x6a, 0x20, 0xd2, 0x09, 0x82,
|
||||
0xfb, 0x1e, 0xe6, 0x17, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0xd1, 0x0f, 0xb4, 0xbd, 0x0e, 0x68, 0x31, 0x1a, 0x30, 0xc8, 0x92, 0xd8,
|
||||
0xaa, 0xcd, 0x04, 0x05, 0x87, 0x5c, 0x49, 0x68, 0xd2, 0xba, 0x3e, 0xb2,
|
||||
0x09, 0xa3, 0xb0, 0x8a, 0x59, 0xcf, 0xd2, 0x4b, 0xeb, 0xf7, 0xaa, 0x35,
|
||||
0xf6, 0x48, 0x57, 0x40, 0x6b, 0x0e, 0x26, 0x37, 0x2a, 0x91, 0x0c, 0xea,
|
||||
0x3f, 0x26, 0x97, 0xc8, 0xc4, 0x4a, 0x18, 0xd3, 0x06, 0x12, 0x93, 0xa5,
|
||||
0xad, 0x9a, 0x69, 0x6a, 0xc1, 0x4d, 0x2c, 0x31, 0x45, 0x03, 0x3a, 0x2e,
|
||||
0x24, 0xd5, 0xd0, 0xdf, 0xeb, 0xdb, 0xdf, 0xd0, 0x6f, 0x3d, 0x14, 0xa8,
|
||||
0x7a, 0x5f, 0x53, 0xe3, 0x9e, 0xb8, 0x68, 0x4d, 0x91, 0xe2, 0x0a, 0x48,
|
||||
0x94, 0x73, 0xd9, 0x84, 0x9a, 0x55, 0x3e, 0x42, 0x99, 0xe7, 0x1e, 0x73,
|
||||
0x64, 0xd4, 0x8d, 0x26, 0xf0, 0x16, 0x36, 0x08, 0x7b, 0xde, 0x8c, 0xe9,
|
||||
0x0a, 0x27, 0x3a, 0x90, 0x6f, 0x6c, 0x90, 0x66, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xd1, 0x81, 0xc0, 0x8c, 0xc5, 0x6d, 0x18, 0x32,
|
||||
0xd7, 0x82, 0x81, 0xb4, 0x0f, 0x0b, 0x34, 0x91, 0xe1, 0xec, 0x57, 0xf2,
|
||||
0x17, 0xca, 0x56, 0x15, 0x1f, 0x7d, 0xa7, 0x27, 0x3a, 0xaf, 0xc3, 0x24,
|
||||
0x0e, 0x26, 0x24, 0x5e, 0x31, 0x77, 0x45, 0xfb, 0x9c, 0x71, 0xaf, 0x19,
|
||||
0x73, 0xd0, 0x33, 0x0b, 0x22, 0xd8, 0xde, 0xd0, 0x42, 0x79, 0xc0, 0x40,
|
||||
0x23, 0x44, 0x1e, 0xa3, 0xdf, 0x65, 0x48, 0x50, 0x96, 0xdc, 0xc5, 0x98,
|
||||
0x9b, 0x13, 0xa8, 0x29, 0x6e, 0x79, 0x02, 0xef, 0x50, 0xd0, 0xdf, 0x71,
|
||||
0x29, 0xd3, 0xa4, 0x3a, 0xa4, 0x13, 0xc9, 0x0f, 0xa3, 0xff, 0x73, 0x25,
|
||||
0xb0, 0xb8, 0xe0, 0x07, 0x02, 0xfd, 0xb5, 0x6f, 0xb4, 0x95, 0xc5, 0x49,
|
||||
0x8b, 0x13, 0xb4, 0xb9, 0x4f, 0xba, 0xd3, 0x83, 0xc2, 0x8c, 0x7a, 0xbe,
|
||||
0x9a, 0x0b, 0x7d, 0x03, 0x75, 0xf6, 0xbd, 0x84, 0xbd, 0xc0, 0xd4, 0x33,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x60, 0x31, 0xd7, 0xab,
|
||||
0x48, 0x21, 0x3b, 0x89, 0xc4, 0x3f, 0xe9, 0x30, 0xf6, 0xbc, 0x55, 0xf0,
|
||||
0xcf, 0x49, 0x4d, 0xf3, 0x3f, 0x66, 0xbe, 0x39, 0x3c, 0xcd, 0x53, 0x36,
|
||||
0xc6, 0xd6, 0x04, 0x62, 0x37, 0x64, 0x6e, 0x86, 0x83, 0x2f, 0x1a, 0xe3,
|
||||
0xd8, 0xcb, 0x6b, 0xcc, 0x18, 0x8c, 0xbc, 0x89, 0x97, 0x69, 0xa7, 0xe9,
|
||||
0x61, 0x1f, 0xe6, 0x92, 0x3e, 0x34, 0x7b, 0xfa, 0xee, 0x9d, 0xcb, 0x03,
|
||||
0x26, 0x8e, 0xd5, 0xc7, 0x11, 0xfb, 0x18, 0xc6, 0xe0, 0xd0, 0x7e, 0xb7,
|
||||
0x77, 0x2c, 0x6e, 0xc0, 0x48, 0x33, 0x34, 0x11, 0x1c, 0x7d, 0x55, 0xa5,
|
||||
0xca, 0xb3, 0x2d, 0xc6, 0x06, 0x59, 0x9b, 0x27, 0x8a, 0x1a, 0xd6, 0xa4,
|
||||
0x5c, 0x48, 0x9f, 0x72, 0x20, 0x69, 0xdc, 0xbd, 0xf0, 0xba, 0x39, 0x4c,
|
||||
0x70, 0xa5, 0x78, 0xb5, 0x87, 0x9c, 0x00, 0xe0, 0xc8, 0xf1, 0x8c, 0x03,
|
||||
0x3a, 0x2c, 0x1c, 0x2e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x83, 0x09, 0xd7, 0x49, 0xb5, 0x30, 0x90, 0x05, 0x98, 0x2a, 0x2f, 0x01,
|
||||
0x25, 0x9f, 0x29, 0xf4, 0xa1, 0x30, 0x62, 0x62, 0x05, 0xbb, 0xa6, 0xda,
|
||||
0x2f, 0x82, 0x41, 0xad, 0x2f, 0x4a, 0x49, 0x2f, 0x06, 0x35, 0xd8, 0x2f,
|
||||
0x0c, 0xfa, 0xa5, 0x8c, 0x8e, 0xe7, 0x8a, 0x31, 0x83, 0x67, 0xf4, 0x34,
|
||||
0xa2, 0xa2, 0x88, 0x6c, 0x71, 0xc7, 0xf1, 0x4c, 0xca, 0xba, 0x0d, 0x57,
|
||||
0xc8, 0xef, 0x8f, 0x42, 0x9b, 0x2d, 0x86, 0x4a, 0x6a, 0x2c, 0xe7, 0x42,
|
||||
0x56, 0xe5, 0x36, 0x46, 0xf6, 0xa6, 0x25, 0x4c, 0x83, 0xc1, 0x46, 0x19,
|
||||
0x22, 0xf9, 0xcd, 0x19, 0x31, 0x55, 0xaa, 0x2b, 0xa5, 0x59, 0x78, 0x70,
|
||||
0x90, 0x84, 0x93, 0x55, 0xb8, 0x46, 0x4d, 0x54, 0xaa, 0x13, 0x1e, 0x5f,
|
||||
0x72, 0x9a, 0xb5, 0x05, 0x48, 0x28, 0x3d, 0x78, 0xaf, 0xd3, 0x25, 0x46,
|
||||
0x9b, 0xd1, 0x06, 0x60, 0x74, 0x34, 0x4e, 0x38, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x79, 0xc6, 0x8a, 0xe3, 0xae, 0xf7, 0xaf, 0x3a,
|
||||
0xb6, 0xe4, 0xd3, 0xdd, 0x69, 0x6a, 0x24, 0x45, 0x6e, 0x16, 0x97, 0x1c,
|
||||
0x3e, 0xb8, 0x34, 0x46, 0xf3, 0xd2, 0x73, 0x3f, 0x83, 0x89, 0xd2, 0x0c,
|
||||
0x5a, 0x95, 0x79, 0x68, 0x8d, 0x99, 0x66, 0xad, 0xfc, 0x2d, 0x15, 0xfb,
|
||||
0x2b, 0xce, 0x6d, 0x15, 0x95, 0x82, 0x9d, 0xae, 0x31, 0x31, 0x3b, 0xd5,
|
||||
0x7a, 0xe3, 0x66, 0x40, 0x34, 0x8b, 0xc0, 0x2f, 0x94, 0x52, 0x55, 0x33,
|
||||
0xce, 0x37, 0x27, 0xe3, 0x35, 0x3f, 0x63, 0x58, 0x7f, 0x92, 0x2a, 0x4e,
|
||||
0xbd, 0x43, 0x10, 0x6e, 0xc6, 0xc3, 0x86, 0x31, 0xd8, 0xb8, 0xe0, 0x39,
|
||||
0x48, 0xf1, 0xa0, 0x49, 0xec, 0x14, 0x25, 0x1b, 0xf1, 0x2d, 0x6f, 0x1a,
|
||||
0x96, 0xb2, 0x0c, 0x08, 0x86, 0x9b, 0x9f, 0xfa, 0xe5, 0x1a, 0x00, 0xb6,
|
||||
0x54, 0x35, 0xcd, 0x4a, 0xb2, 0x93, 0x6e, 0x09, 0xb4, 0xb1, 0x61, 0x4c,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xdb, 0x04, 0x30, 0x03,
|
||||
0xcb, 0xf5, 0x5e, 0xe4, 0xbc, 0x0e, 0xcf, 0x6f, 0x40, 0x4d, 0xfa, 0x18,
|
||||
0x32, 0x71, 0x77, 0x86, 0x99, 0x1d, 0xb7, 0xb6, 0x34, 0x8a, 0x42, 0x0e,
|
||||
0x08, 0x86, 0x14, 0x50, 0x2e, 0x67, 0x22, 0x47, 0x1e, 0x39, 0xfb, 0x7d,
|
||||
0x35, 0xc4, 0x12, 0x18, 0x0e, 0x30, 0x86, 0x19, 0xfb, 0x76, 0x16, 0x19,
|
||||
0x6d, 0x8c, 0x75, 0x95, 0x23, 0x69, 0x48, 0x28, 0xfd, 0x9b, 0x0b, 0x64,
|
||||
0x91, 0xe6, 0x92, 0xd3, 0x1a, 0x8d, 0x5f, 0x08, 0xa9, 0xee, 0x34, 0x8b,
|
||||
0xe2, 0x71, 0x86, 0x8c, 0xf8, 0xa8, 0x27, 0x08, 0xd1, 0x00, 0x12, 0x77,
|
||||
0xce, 0x0b, 0xae, 0x05, 0x38, 0x38, 0x3b, 0x6f, 0xc8, 0xda, 0x82, 0x9c,
|
||||
0x50, 0x72, 0x45, 0xaf, 0xad, 0x71, 0x4a, 0x6b, 0x19, 0x6b, 0x7c, 0x1e,
|
||||
0x6e, 0xe8, 0x87, 0xaa, 0x9b, 0xe5, 0x38, 0x9a, 0x22, 0x19, 0xd2, 0x9a,
|
||||
0x94, 0x15, 0x70, 0x4c, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0xd6, 0xbc, 0xf5, 0x83, 0xf1, 0xfe, 0xde, 0xf5, 0xd8, 0xae, 0xf8, 0xb3,
|
||||
0x54, 0x9d, 0x69, 0x55, 0x8b, 0x2b, 0xa9, 0x5e, 0x78, 0xaf, 0x24, 0x05,
|
||||
0x58, 0x70, 0x69, 0xcd, 0x88, 0xc4, 0x0f, 0x4f, 0x68, 0xc6, 0x43, 0x2f,
|
||||
0xa6, 0x92, 0xea, 0x6e, 0xb9, 0x77, 0x74, 0xae, 0x8c, 0xfd, 0x9f, 0x79,
|
||||
0xc4, 0xe1, 0x7a, 0x07, 0x6c, 0x38, 0x40, 0xdd, 0xf9, 0x1c, 0x6d, 0x19,
|
||||
0xc8, 0xf1, 0xe0, 0x18, 0xc2, 0xa5, 0xf2, 0x5f, 0xde, 0x70, 0x37, 0x1c,
|
||||
0x82, 0x56, 0x5e, 0xde, 0x09, 0x70, 0x48, 0xad, 0xb8, 0x73, 0xe7, 0x90,
|
||||
0x36, 0x88, 0x4d, 0x68, 0x32, 0x0b, 0x1d, 0x77, 0x71, 0x9a, 0x21, 0x1c,
|
||||
0x12, 0x3a, 0x4e, 0x82, 0x34, 0xc7, 0xfa, 0xa9, 0x2b, 0x10, 0xa1, 0x6b,
|
||||
0x9b, 0x11, 0xdb, 0x82, 0x42, 0x91, 0x02, 0x88, 0xe4, 0xba, 0x5f, 0x57,
|
||||
0xd9, 0xac, 0x30, 0x98, 0x05, 0xa8, 0x2c, 0x4d, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xc5, 0xb5, 0x00, 0xde, 0x7e, 0x5c, 0xfe, 0x18,
|
||||
0xec, 0xaa, 0x55, 0x85, 0xcb, 0x66, 0x55, 0x52, 0xad, 0xcb, 0x74, 0x28,
|
||||
0x93, 0x85, 0xd8, 0x94, 0x97, 0x4d, 0x64, 0x6e, 0xd8, 0xfe, 0x99, 0x3a,
|
||||
0x57, 0x1d, 0x51, 0xc1, 0xea, 0xdd, 0xd2, 0x38, 0xdd, 0x3d, 0xba, 0x2a,
|
||||
0x71, 0x96, 0xa2, 0x97, 0xa6, 0x00, 0xc9, 0xc6, 0x65, 0xbb, 0xc6, 0x27,
|
||||
0xd6, 0x04, 0x49, 0x3a, 0x5c, 0xb3, 0xb0, 0x05, 0x05, 0xea, 0x70, 0x92,
|
||||
0xe7, 0xf2, 0x43, 0x67, 0x49, 0x6b, 0x96, 0x0f, 0x95, 0x7a, 0x15, 0x3a,
|
||||
0x4f, 0x0f, 0xf5, 0xf6, 0xf1, 0x51, 0xe7, 0x12, 0x83, 0x8d, 0x2a, 0xad,
|
||||
0xb3, 0x29, 0x0b, 0x66, 0x20, 0x30, 0xf5, 0x8f, 0xb9, 0x81, 0x61, 0x2d,
|
||||
0xb4, 0x44, 0xe7, 0x89, 0x89, 0x1b, 0x4b, 0xb8, 0x90, 0x2d, 0x54, 0xa7,
|
||||
0x8e, 0x58, 0x6c, 0x3c, 0xe1, 0x31, 0xba, 0x3e, 0xbf, 0xe2, 0xb4, 0x5c,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x72, 0xdc, 0xc0, 0xee,
|
||||
0x8c, 0x48, 0x9a, 0x30, 0x4b, 0x5f, 0xfc, 0x54, 0xe7, 0x0b, 0xc7, 0x1d,
|
||||
0xfc, 0x66, 0x27, 0x13, 0x6d, 0x4d, 0x0c, 0x15, 0x76, 0x71, 0x51, 0xf3,
|
||||
0x25, 0xfd, 0x2c, 0x37, 0xf7, 0xc0, 0xce, 0xcc, 0xa3, 0x90, 0x0b, 0xbd,
|
||||
0x5e, 0x6e, 0x09, 0x8a, 0xde, 0x6f, 0x9c, 0x6d, 0xdc, 0xf1, 0xeb, 0x6b,
|
||||
0xd0, 0x3d, 0x38, 0x8c, 0xd0, 0xbf, 0xaf, 0xbf, 0xe2, 0xb3, 0x95, 0x4f,
|
||||
0x4e, 0x11, 0x43, 0xe0, 0x90, 0x60, 0xca, 0x61, 0xc5, 0xab, 0xfc, 0xd5,
|
||||
0x19, 0x02, 0xe6, 0xee, 0x32, 0xd3, 0x93, 0xc1, 0x6c, 0x69, 0x07, 0xba,
|
||||
0x37, 0x7a, 0x55, 0xed, 0xb3, 0xda, 0xbf, 0x3a, 0x56, 0x67, 0xbb, 0x94,
|
||||
0xc3, 0x70, 0x3e, 0xf6, 0x35, 0xdf, 0x5d, 0xf6, 0xec, 0x4c, 0x76, 0x88,
|
||||
0x22, 0xcb, 0xb8, 0x87, 0x6f, 0x73, 0x48, 0xc5, 0xae, 0x2f, 0x89, 0x18,
|
||||
0xab, 0x89, 0x0c, 0x4e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x62, 0x6e, 0xaf, 0x1e, 0xc0, 0xc8, 0xa0, 0x0f, 0x41, 0x9c, 0x9a, 0x41,
|
||||
0x06, 0xeb, 0x2e, 0xe5, 0x1a, 0xf4, 0x0d, 0x48, 0x40, 0x95, 0x0e, 0xb0,
|
||||
0xbb, 0xc3, 0x0c, 0x66, 0x7a, 0xd9, 0xb4, 0x0c, 0x5b, 0x03, 0x93, 0xa6,
|
||||
0xa8, 0x9a, 0xea, 0x96, 0x44, 0xcb, 0x12, 0xae, 0x40, 0x60, 0x03, 0xfc,
|
||||
0xb7, 0x1b, 0x2d, 0xa0, 0x12, 0xd3, 0x30, 0x74, 0x66, 0xcc, 0xa4, 0xfa,
|
||||
0xca, 0x5b, 0x20, 0x25, 0x7f, 0x66, 0x6d, 0xad, 0x3a, 0x65, 0x13, 0xc3,
|
||||
0x51, 0x00, 0x54, 0x5c, 0x61, 0x0c, 0x76, 0xb7, 0x8b, 0xe6, 0x97, 0xb1,
|
||||
0x94, 0x78, 0x4d, 0x2c, 0x33, 0xc7, 0xf0, 0x09, 0xba, 0x2d, 0xf5, 0x60,
|
||||
0x6d, 0x86, 0x75, 0x71, 0xed, 0xc9, 0x73, 0x2d, 0x73, 0x2d, 0x8a, 0xfb,
|
||||
0x53, 0xa3, 0x1a, 0xc0, 0xe6, 0x8c, 0xd0, 0x6f, 0x98, 0xfc, 0x00, 0xfe,
|
||||
0x8e, 0xd9, 0x2a, 0x39, 0x5c, 0xef, 0x79, 0x1b, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x1b, 0x0e, 0xb3, 0x6e, 0xec, 0x41, 0x47, 0x9c,
|
||||
0x86, 0x65, 0xba, 0x43, 0xfd, 0xef, 0xb4, 0x42, 0xca, 0x96, 0x39, 0xec,
|
||||
0x62, 0x22, 0x73, 0xf7, 0xed, 0xd3, 0x27, 0xef, 0x57, 0x7f, 0x9b, 0x61,
|
||||
0x98, 0x00, 0x63, 0xbb, 0x4f, 0x68, 0x42, 0x1e, 0xe3, 0xea, 0x08, 0xf3,
|
||||
0xa4, 0xe2, 0x9e, 0x71, 0x0a, 0x45, 0x90, 0x7f, 0x93, 0x18, 0x1d, 0x1e,
|
||||
0xf1, 0x47, 0x79, 0x91, 0xe4, 0x73, 0xbf, 0x5f, 0x7e, 0xcb, 0x83, 0xde,
|
||||
0xde, 0x88, 0xa7, 0xe0, 0x65, 0x01, 0x5b, 0x94, 0x64, 0xa8, 0x12, 0xb3,
|
||||
0xde, 0x22, 0x82, 0x62, 0x5d, 0x30, 0x11, 0xc7, 0x7a, 0xa8, 0xa0, 0xd6,
|
||||
0xab, 0x70, 0x88, 0x59, 0x92, 0x3f, 0xf2, 0x6b, 0x25, 0xbb, 0x11, 0xb8,
|
||||
0xda, 0x29, 0x5e, 0xde, 0x29, 0xf0, 0x50, 0x8d, 0xf2, 0x76, 0x05, 0xc0,
|
||||
0xb8, 0x9f, 0xa1, 0xe7, 0x7e, 0x85, 0x07, 0xfa, 0xda, 0xed, 0x97, 0x0a,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x0e, 0xa5, 0x09, 0x60,
|
||||
0x5e, 0xeb, 0xb2, 0x0a, 0xda, 0x15, 0x84, 0xad, 0xd7, 0xf2, 0x0e, 0x1a,
|
||||
0xce, 0x93, 0xea, 0xfa, 0x1d, 0xf5, 0xfb, 0x2d, 0x45, 0x02, 0x26, 0x89,
|
||||
0xf5, 0x4a, 0x1f, 0x4a, 0x4b, 0x39, 0xfb, 0xdd, 0x38, 0xef, 0x39, 0x55,
|
||||
0x55, 0xdf, 0xc4, 0x5e, 0x23, 0x83, 0x02, 0x48, 0x60, 0xf7, 0x1b, 0xcc,
|
||||
0xe8, 0x6f, 0x86, 0xdf, 0x0e, 0xcf, 0xa1, 0x3d, 0x6e, 0xd4, 0xb4, 0x20,
|
||||
0x5d, 0xee, 0xf2, 0xe7, 0x78, 0xb8, 0x40, 0xf7, 0xad, 0xe2, 0x81, 0xe7,
|
||||
0xfa, 0x26, 0x92, 0x0f, 0x24, 0x87, 0xc9, 0x44, 0x5f, 0x90, 0x9d, 0x25,
|
||||
0xaa, 0xb7, 0x95, 0x76, 0x89, 0x53, 0xd5, 0x5f, 0xe8, 0xa6, 0x59, 0xba,
|
||||
0x77, 0xf1, 0x05, 0xe8, 0x2e, 0x5d, 0x24, 0x31, 0x59, 0xf8, 0x13, 0x02,
|
||||
0xfe, 0x9e, 0x22, 0x7b, 0xa7, 0xb1, 0xa1, 0xba, 0xb0, 0xb7, 0xbd, 0xd1,
|
||||
0x12, 0x38, 0x99, 0x61, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0xb2, 0xcc, 0xab, 0x2f, 0x02, 0xe8, 0x24, 0xdd, 0x52, 0xf7, 0xe4, 0x98,
|
||||
0x90, 0x56, 0x66, 0x7c, 0xef, 0x7c, 0x42, 0xf3, 0x4a, 0x71, 0x47, 0x18,
|
||||
0x17, 0x8c, 0x37, 0xac, 0x03, 0xde, 0xf7, 0x55, 0x75, 0xd4, 0x7a, 0x60,
|
||||
0x4e, 0xc3, 0xd0, 0xc4, 0xd4, 0x29, 0xd6, 0xc2, 0x5a, 0x10, 0x42, 0x98,
|
||||
0x1f, 0xcd, 0x91, 0xe2, 0xe5, 0xc3, 0x62, 0x26, 0x01, 0xda, 0x4b, 0xde,
|
||||
0xf2, 0x37, 0xab, 0x37, 0x30, 0xaf, 0x44, 0x39, 0xb0, 0xa0, 0xa1, 0x84,
|
||||
0xbb, 0x01, 0x51, 0x77, 0x94, 0x9f, 0x1f, 0x67, 0x92, 0x39, 0x00, 0x69,
|
||||
0x7a, 0x68, 0x79, 0xf4, 0x8e, 0x28, 0xde, 0xe0, 0x4f, 0x0a, 0x4d, 0x00,
|
||||
0xf0, 0xc3, 0xf8, 0x2a, 0xd3, 0x5e, 0xc2, 0x92, 0x9d, 0x1f, 0x7e, 0x77,
|
||||
0x88, 0x25, 0x9e, 0xd7, 0xaa, 0x95, 0xbd, 0x5a, 0x85, 0x61, 0x54, 0x41,
|
||||
0xad, 0x8f, 0x8f, 0x02, 0x6b, 0xd5, 0x8d, 0x40, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xc0, 0xb2, 0x14, 0x11, 0x7c, 0xd7, 0xd4, 0x99,
|
||||
0xa2, 0xd1, 0x3f, 0xf2, 0xf9, 0x9c, 0x7e, 0xd9, 0x72, 0x9b, 0x8b, 0x73,
|
||||
0xeb, 0x6c, 0x2b, 0xc0, 0x16, 0x3b, 0x8d, 0xa3, 0x36, 0x95, 0xfa, 0x44,
|
||||
0xd9, 0xd8, 0x58, 0xfd, 0x23, 0x67, 0x7f, 0xa2, 0xc4, 0x67, 0x69, 0xbb,
|
||||
0x18, 0x52, 0xc8, 0x38, 0xef, 0xad, 0x36, 0x79, 0x0e, 0x43, 0x17, 0x87,
|
||||
0x3d, 0x1e, 0x6e, 0xf7, 0x06, 0xa5, 0xc2, 0x10, 0x55, 0x73, 0x3a, 0x04,
|
||||
0x3a, 0x32, 0x33, 0xde, 0x21, 0x54, 0xbf, 0xde, 0xd0, 0x5f, 0x2d, 0xe8,
|
||||
0x3a, 0x6f, 0x9b, 0xcb, 0x59, 0x32, 0x95, 0xb7, 0x63, 0xea, 0x6a, 0x07,
|
||||
0x64, 0xa7, 0x6f, 0x3d, 0x55, 0x2a, 0x89, 0x52, 0xda, 0x87, 0xbf, 0xaa,
|
||||
0xd4, 0xbf, 0x97, 0xc0, 0xea, 0xfc, 0xc3, 0x2f, 0x2f, 0xcf, 0x8f, 0xf5,
|
||||
0x7d, 0xfe, 0x0f, 0xf3, 0x13, 0x23, 0x91, 0x76, 0xa8, 0xc5, 0x61, 0x5a,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x5c, 0xbf, 0x5f, 0xe9,
|
||||
0x15, 0x89, 0xfb, 0xdb, 0xaf, 0x98, 0x7b, 0x9c, 0x9d, 0x4f, 0x11, 0xba,
|
||||
0xaf, 0x71, 0x71, 0xc8, 0x09, 0x4e, 0xaa, 0xbe, 0x20, 0x14, 0x24, 0xc8,
|
||||
0x5d, 0xa1, 0x18, 0x3b, 0xf6, 0x48, 0xd9, 0x1a, 0x75, 0x26, 0x54, 0xcf,
|
||||
0xe7, 0xbe, 0xab, 0x24, 0x8f, 0x0c, 0x9c, 0xca, 0x23, 0x33, 0xb6, 0xd6,
|
||||
0x42, 0x65, 0x37, 0x5f, 0x35, 0xe9, 0x06, 0xe3, 0x0f, 0xb6, 0x73, 0x1e,
|
||||
0x4f, 0x5e, 0x94, 0x44, 0x6e, 0xdf, 0xe1, 0x2a, 0x17, 0x97, 0x9a, 0xa1,
|
||||
0x19, 0x1b, 0x3f, 0xa3, 0x25, 0x7a, 0xf1, 0x51, 0xfa, 0xdd, 0x5f, 0x80,
|
||||
0x35, 0xcc, 0x90, 0x2b, 0x8e, 0xa7, 0x5b, 0x4d, 0x9d, 0x0f, 0x13, 0xc7,
|
||||
0xa6, 0x87, 0x8a, 0xce, 0xe4, 0x45, 0xf9, 0xdc, 0xbf, 0xe8, 0xbc, 0xc1,
|
||||
0x5b, 0xfa, 0x51, 0x81, 0xa9, 0x7b, 0x26, 0xa9, 0xdd, 0xa8, 0xdf, 0xcf,
|
||||
0x29, 0xb3, 0xbe, 0x14, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x4d, 0x90, 0x6a, 0xf7, 0xb0, 0xff, 0x6a, 0xac, 0xaf, 0x33, 0xf0, 0x2d,
|
||||
0x6c, 0x58, 0x3e, 0x8d, 0x45, 0x5f, 0x8b, 0xd9, 0x08, 0x69, 0x0e, 0x8d,
|
||||
0x4f, 0x07, 0x87, 0x5b, 0xdb, 0x17, 0xb9, 0x4e, 0xcb, 0x48, 0x41, 0x0c,
|
||||
0x88, 0xa8, 0x23, 0x79, 0x4a, 0x9d, 0x9a, 0x31, 0x72, 0xe3, 0x1b, 0xd4,
|
||||
0x4d, 0xc5, 0xb1, 0x90, 0xdb, 0x9b, 0xef, 0x03, 0xff, 0x11, 0x42, 0x29,
|
||||
0xb6, 0xc6, 0xc2, 0x1e, 0x70, 0x80, 0xaf, 0xea, 0x0a, 0xd3, 0x2d, 0xb9,
|
||||
0xea, 0x6e, 0xbd, 0x5c, 0xac, 0x97, 0xa9, 0x08, 0x7d, 0x51, 0xb3, 0x72,
|
||||
0x62, 0x70, 0x4f, 0x19, 0xa9, 0xee, 0xe4, 0xc7, 0x06, 0x3f, 0xe3, 0x3d,
|
||||
0x1e, 0x02, 0x0e, 0xb5, 0x57, 0x3f, 0x14, 0x23, 0xda, 0x5b, 0xf3, 0xf3,
|
||||
0xf6, 0x62, 0x4e, 0x33, 0x72, 0x41, 0x4d, 0xb1, 0x87, 0xa6, 0xee, 0x68,
|
||||
0x62, 0xb1, 0x71, 0x7e, 0xb8, 0xa9, 0xa7, 0x6f, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xa9, 0x53, 0x42, 0x6f, 0xe4, 0x79, 0xbd, 0xc7,
|
||||
0x37, 0x51, 0xc9, 0xe6, 0xfe, 0x9b, 0x63, 0x03, 0x25, 0xdb, 0xb6, 0xf9,
|
||||
0xdb, 0x80, 0x91, 0x01, 0x3f, 0xdd, 0xee, 0xe9, 0x9e, 0x60, 0xa6, 0x37,
|
||||
0x43, 0x16, 0x9b, 0x92, 0x8b, 0xf9, 0xd9, 0x21, 0xa7, 0xf9, 0x05, 0x21,
|
||||
0x00, 0xaa, 0x35, 0x9c, 0x08, 0xa5, 0x66, 0xba, 0xcb, 0xe3, 0x45, 0xfd,
|
||||
0x8e, 0xbb, 0x5e, 0x97, 0x2a, 0xf9, 0x99, 0x6c, 0x8f, 0x92, 0xe8, 0x7d,
|
||||
0x4f, 0x6d, 0x9c, 0x6d, 0xae, 0x17, 0xe1, 0x16, 0xde, 0x03, 0x35, 0x01,
|
||||
0x76, 0xe6, 0x5a, 0x3b, 0x45, 0xb5, 0x43, 0x21, 0x59, 0xa9, 0x87, 0x38,
|
||||
0x2f, 0x07, 0x94, 0x60, 0x26, 0xb9, 0xd2, 0xb5, 0xe3, 0x3b, 0x57, 0xa0,
|
||||
0xb0, 0xb6, 0xfe, 0x10, 0x2e, 0xb6, 0xbd, 0xdf, 0x24, 0xe1, 0xc3, 0x8c,
|
||||
0xbb, 0x08, 0x62, 0x84, 0x7e, 0x37, 0x25, 0x5c, 0xd1, 0x65, 0xf2, 0x45,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x4f, 0x72, 0x7c, 0xfb,
|
||||
0xb3, 0x7d, 0x0e, 0xa5, 0x2f, 0x39, 0x76, 0x80, 0xff, 0x08, 0xac, 0xe7,
|
||||
0x21, 0x18, 0x3f, 0x03, 0xab, 0xb2, 0xde, 0x9c, 0x83, 0x47, 0xa7, 0x90,
|
||||
0x1e, 0x2c, 0xa3, 0x11, 0x14, 0x4e, 0x27, 0x52, 0x70, 0x56, 0xa6, 0x87,
|
||||
0x1a, 0x45, 0x52, 0xf4, 0x9f, 0xed, 0x73, 0xb2, 0xa2, 0x01, 0x55, 0xbd,
|
||||
0x6a, 0x32, 0xd4, 0x15, 0x2e, 0x7e, 0x05, 0xbb, 0xb5, 0x27, 0x46, 0x58,
|
||||
0x49, 0x2b, 0xab, 0x60, 0x30, 0xbb, 0x6d, 0x8a, 0xd8, 0x85, 0x78, 0x25,
|
||||
0x19, 0x8a, 0x9d, 0xc9, 0x3e, 0xf2, 0xb0, 0x65, 0x1b, 0xe2, 0xfd, 0x30,
|
||||
0x66, 0x4b, 0xb5, 0x6b, 0xfd, 0x7e, 0xe3, 0x1c, 0x31, 0x08, 0xa6, 0xb2,
|
||||
0xda, 0x48, 0x8d, 0xeb, 0xb0, 0xac, 0xf0, 0xf5, 0x6e, 0xe5, 0x4b, 0xc7,
|
||||
0xf9, 0xfb, 0xca, 0xbd, 0x8c, 0x4e, 0x28, 0xa1, 0xe9, 0xf6, 0x7f, 0x12,
|
||||
0xd3, 0x5d, 0x88, 0x0b, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x8b, 0x4e, 0xfd, 0x3f, 0x25, 0xc1, 0xc0, 0x59, 0x5a, 0xd5, 0x46, 0xa0,
|
||||
0x79, 0x62, 0x4e, 0x29, 0xc6, 0x1f, 0x75, 0x8a, 0x8c, 0x82, 0x5e, 0x15,
|
||||
0x72, 0xd5, 0x58, 0x66, 0x81, 0x74, 0x9c, 0x40, 0x1d, 0x98, 0x69, 0x66,
|
||||
0xf6, 0xcb, 0x81, 0x16, 0xd4, 0xa4, 0xbc, 0x13, 0xd6, 0x85, 0x54, 0x5d,
|
||||
0xb3, 0x1b, 0x28, 0xa3, 0x56, 0x36, 0x46, 0xf6, 0xc2, 0x98, 0x24, 0xbb,
|
||||
0x35, 0xfe, 0xa2, 0x6a, 0x0b, 0xb7, 0x27, 0x6c, 0xb4, 0xda, 0x41, 0x1b,
|
||||
0x37, 0x2f, 0x76, 0x99, 0x63, 0x28, 0x79, 0xc0, 0xc5, 0x38, 0x5d, 0xf2,
|
||||
0x16, 0x23, 0x40, 0xa5, 0xb7, 0x36, 0x5a, 0xd1, 0x91, 0x4a, 0xbe, 0x6f,
|
||||
0x76, 0x8e, 0x06, 0x8f, 0x7c, 0x29, 0x10, 0x51, 0x6a, 0x42, 0xde, 0x5b,
|
||||
0xed, 0x80, 0x38, 0xf4, 0xf3, 0xf2, 0x5c, 0x76, 0xf2, 0x78, 0xbd, 0x0d,
|
||||
0x0c, 0xe8, 0x78, 0x68, 0xfc, 0x7c, 0x9c, 0x55, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x65, 0xfe, 0xff, 0xbf, 0x05, 0x2f, 0x3c, 0x1e,
|
||||
0x02, 0xfa, 0x35, 0x81, 0xef, 0x42, 0xb9, 0xa8, 0x5f, 0x6d, 0x50, 0xaf,
|
||||
0x80, 0x74, 0xb5, 0x76, 0xad, 0xd0, 0x5a, 0xc0, 0x44, 0xb0, 0x49, 0x2a,
|
||||
0x1c, 0x35, 0x33, 0x93, 0x5f, 0x00, 0x08, 0x49, 0x94, 0xb7, 0xab, 0x66,
|
||||
0xd3, 0xd7, 0xcc, 0xfe, 0x68, 0x9e, 0xf0, 0xae, 0x7e, 0x26, 0x1d, 0x4a,
|
||||
0x85, 0xf7, 0x0c, 0xaa, 0xd6, 0x4f, 0x0c, 0x6a, 0xde, 0xd6, 0x8b, 0xb8,
|
||||
0xc0, 0xbe, 0x0b, 0xab, 0x9a, 0x3f, 0xe4, 0x8a, 0x1c, 0x1b, 0x81, 0x1b,
|
||||
0x8d, 0x6e, 0xeb, 0xa3, 0xac, 0x44, 0x9a, 0x51, 0x29, 0x50, 0x2d, 0x93,
|
||||
0xa0, 0x23, 0xb8, 0x30, 0x33, 0x15, 0xe0, 0x11, 0x8b, 0x6e, 0xe3, 0x6f,
|
||||
0x62, 0xc1, 0xba, 0x8b, 0x25, 0xac, 0xd3, 0x13, 0x47, 0xee, 0xc5, 0x56,
|
||||
0x62, 0x9e, 0xbd, 0x5e, 0x03, 0xdb, 0x78, 0xde, 0x5e, 0xf9, 0xb9, 0x4e,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0xd0,
|
||||
0x87, 0x1a, 0x49, 0x23, 0x54, 0x58, 0x48, 0x00, 0x80, 0x87, 0x16, 0x01,
|
||||
0xa0, 0xbd, 0x8c, 0x8b, 0x01, 0x1a, 0xdc, 0xfc, 0x0f, 0xe2, 0x98, 0xa5,
|
||||
0x10, 0x78, 0x25, 0x6f, 0x9b, 0x99, 0x99, 0x59, 0x82, 0xef, 0x3b, 0xc0,
|
||||
0xa7, 0xd6, 0x7c, 0xb6, 0x5e, 0xe0, 0xd5, 0x9b, 0x31, 0x32, 0xf2, 0xf9,
|
||||
0xde, 0x89, 0xa6, 0x89, 0x90, 0x6d, 0xa8, 0x10, 0xb0, 0x71, 0x7a, 0x1e,
|
||||
0x85, 0xb0, 0x81, 0x18, 0x2a, 0x71, 0xb1, 0x7b, 0xbd, 0x29, 0xdc, 0x08,
|
||||
0x0e, 0xc6, 0xc5, 0xfd, 0x4d, 0xce, 0x87, 0x98, 0xd2, 0xe6, 0x0f, 0x11,
|
||||
0x2f, 0x7e, 0xf0, 0xe0, 0xd8, 0x47, 0x1d, 0x04, 0xc6, 0x9b, 0xde, 0x35,
|
||||
0x20, 0x57, 0x3e, 0xd4, 0x57, 0xb8, 0x20, 0x9d, 0x95, 0xae, 0xb6, 0x21,
|
||||
0x82, 0xac, 0xde, 0xdd, 0x9b, 0x60, 0x94, 0xe9, 0x4b, 0xb4, 0x59, 0xef,
|
||||
0x06, 0xa7, 0xe4, 0x65, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x00, 0x00, 0x00, 0x30, 0xc2, 0x35, 0xb2, 0xc0, 0xe8, 0x0e, 0x8a, 0xaf,
|
||||
0x28, 0x09, 0x1e, 0x7e, 0x9e, 0x37, 0x96, 0x87, 0x92, 0x36, 0x29, 0xd7,
|
||||
0x70, 0xd5, 0x84, 0xef, 0xb7, 0x50, 0x42, 0x00, 0xcd, 0xcc, 0xcc, 0x04,
|
||||
0x5a, 0x14, 0xea, 0x45, 0xb8, 0x82, 0x05, 0x94, 0x10, 0x12, 0xd3, 0x2e,
|
||||
0x7d, 0x94, 0x1b, 0x04, 0x9a, 0x3c, 0x6d, 0xa5, 0x6c, 0xbb, 0xbd, 0x4e,
|
||||
0x34, 0xe1, 0xe0, 0x16, 0xcb, 0x55, 0x94, 0x0a, 0x7a, 0x18, 0xa0, 0xdd,
|
||||
0x7b, 0x74, 0xd0, 0x54, 0x3c, 0xb9, 0xcd, 0x54, 0xd0, 0x97, 0xfd, 0xfa,
|
||||
0xd5, 0x19, 0xe9, 0x34, 0x4e, 0x6c, 0xa1, 0xea, 0xf3, 0x7f, 0x31, 0x5e,
|
||||
0xf8, 0x88, 0xbe, 0xec, 0x02, 0xf0, 0xce, 0x75, 0xdb, 0x51, 0xfa, 0x68,
|
||||
0x41, 0x49, 0x10, 0x03, 0xab, 0xff, 0x7f, 0x3a, 0xf5, 0x29, 0xa9, 0x1c,
|
||||
0xc3, 0x3d, 0x5f, 0x4d, 0x66, 0x9b, 0x65, 0x04, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0x24, 0x7a, 0x63, 0x86, 0x07,
|
||||
0x09, 0xeb, 0x71, 0x66, 0xa4, 0x43, 0x09, 0xeb, 0xbc, 0x2d, 0x8c, 0x2c,
|
||||
0xae, 0xeb, 0x7e, 0xde, 0xbf, 0x45, 0x0a, 0x22, 0x91, 0x27, 0x00, 0x00,
|
||||
0x67, 0x66, 0x66, 0x76, 0xa0, 0x3c, 0xc5, 0xd4, 0xd2, 0xcf, 0xd3, 0x0d,
|
||||
0x97, 0x92, 0x52, 0x7c, 0xf8, 0x5c, 0x73, 0x4d, 0xc5, 0x31, 0x62, 0x40,
|
||||
0x76, 0xe7, 0xb5, 0xfb, 0xaa, 0x68, 0x8e, 0x45, 0xc2, 0xf6, 0xff, 0x3b,
|
||||
0xf3, 0x53, 0xd0, 0x30, 0x9b, 0x29, 0x1d, 0x86, 0x25, 0xbd, 0x0f, 0xf9,
|
||||
0x53, 0x91, 0x10, 0x4c, 0xf5, 0x4a, 0xf3, 0x13, 0x04, 0xf4, 0x79, 0x12,
|
||||
0xc1, 0x4a, 0x58, 0x21, 0x3c, 0x74, 0xe0, 0x5d, 0x22, 0x2e, 0x60, 0x2a,
|
||||
0xca, 0x3d, 0x5f, 0xc2, 0xcb, 0xf2, 0x47, 0x71, 0xd0, 0xd6, 0x63, 0xca,
|
||||
0xf3, 0xd0, 0xe8, 0x39, 0x99, 0x3b, 0xff, 0x06, 0x3e, 0xe5, 0xcd, 0x6b,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0xac,
|
||||
0x99, 0x37, 0xb3, 0x60, 0xfa, 0x50, 0xcb, 0x16, 0x8f, 0x95, 0x6b, 0xcc,
|
||||
0x70, 0x69, 0x74, 0x2e, 0x07, 0xd5, 0x81, 0x98, 0xee, 0x16, 0x88, 0x9b,
|
||||
0x17, 0x00, 0x00, 0x00, 0x34, 0x33, 0x33, 0xa9, 0x30, 0x56, 0xe3, 0x6e,
|
||||
0x00, 0xe2, 0x23, 0x99, 0x51, 0x52, 0x76, 0xe2, 0x4c, 0x35, 0xcc, 0xf4,
|
||||
0xbe, 0x8b, 0x0f, 0xdf, 0xbf, 0xaa, 0x2a, 0xbb, 0x59, 0x1f, 0xbe, 0x5c,
|
||||
0xc0, 0x27, 0xda, 0x2d, 0x45, 0x2e, 0x95, 0x8f, 0xb7, 0x50, 0x74, 0xd9,
|
||||
0xec, 0x14, 0xb8, 0x4c, 0x0d, 0x7b, 0xc9, 0xe0, 0x02, 0xd1, 0x9e, 0x56,
|
||||
0xc9, 0x27, 0xf7, 0xa8, 0xa6, 0xa1, 0xdc, 0x38, 0x3f, 0xff, 0xf9, 0x30,
|
||||
0xfa, 0x8c, 0xc8, 0xab, 0x60, 0x86, 0xf0, 0x52, 0xc4, 0x01, 0xc2, 0x9c,
|
||||
0xb3, 0x42, 0x51, 0x52, 0x54, 0x35, 0x83, 0x05, 0xb2, 0xd5, 0xc6, 0x9b,
|
||||
0x7a, 0x43, 0xa6, 0x6c, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x00, 0x00, 0x00, 0xa5, 0xd1, 0xee, 0x7c, 0xd0, 0xba, 0xcb, 0x9d, 0x26,
|
||||
0x7f, 0xd1, 0xed, 0xba, 0xb3, 0x89, 0xf0, 0x52, 0xc2, 0x4f, 0x0c, 0xf0,
|
||||
0x23, 0xdd, 0x15, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x99, 0x99, 0xe5,
|
||||
0x87, 0xd2, 0xdc, 0x99, 0xaf, 0xaf, 0x6f, 0xca, 0x7f, 0x10, 0x1c, 0x8b,
|
||||
0x6e, 0xb8, 0xd8, 0xc5, 0x9e, 0xd0, 0x03, 0xce, 0x57, 0x95, 0xbd, 0xcc,
|
||||
0xba, 0x0f, 0x5f, 0x2e, 0xbf, 0x91, 0x50, 0x10, 0xfb, 0xe2, 0x60, 0x0a,
|
||||
0x57, 0xb1, 0xc7, 0xdb, 0xc3, 0x18, 0x08, 0x02, 0x48, 0xc8, 0xea, 0xa0,
|
||||
0x46, 0x32, 0xaa, 0x31, 0x46, 0x05, 0xbb, 0xbc, 0x03, 0x8d, 0xee, 0x36,
|
||||
0xff, 0xa5, 0x22, 0x99, 0x0d, 0xd2, 0x0a, 0x51, 0x1a, 0x45, 0x34, 0x44,
|
||||
0x68, 0x5d, 0x72, 0xde, 0x20, 0x7d, 0xa1, 0xa1, 0x77, 0xce, 0xd3, 0xc1,
|
||||
0x91, 0xa1, 0x29, 0x1e, 0xc6, 0xe3, 0x35, 0x14, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0x83, 0x1e, 0x97, 0x8a, 0xf6,
|
||||
0x06, 0xe2, 0xcb, 0xfe, 0xa5, 0x10, 0x1f, 0x59, 0x8b, 0xd6, 0x2f, 0x75,
|
||||
0xeb, 0x3d, 0xff, 0xdf, 0x6e, 0x67, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xcd, 0xcc, 0x4c, 0xbc, 0x97, 0xb3, 0x89, 0x2c, 0x3e, 0x8b, 0x8b, 0xaf,
|
||||
0x9b, 0x36, 0xe5, 0x38, 0x1c, 0x8d, 0x76, 0x2b, 0xf5, 0x49, 0xe6, 0x15,
|
||||
0x6c, 0x21, 0xaf, 0x6e, 0xdd, 0x87, 0x2f, 0x17, 0xee, 0x4e, 0x3a, 0xda,
|
||||
0x8f, 0x49, 0x3f, 0xd1, 0xcc, 0xc1, 0xd6, 0x95, 0x9e, 0x09, 0x52, 0x40,
|
||||
0xab, 0xac, 0x43, 0xc0, 0x04, 0x7a, 0x13, 0x51, 0x40, 0xae, 0xd1, 0x27,
|
||||
0x84, 0xee, 0xf6, 0x4e, 0x8e, 0xd8, 0x03, 0x5c, 0x37, 0x93, 0x2c, 0x43,
|
||||
0x49, 0xac, 0x48, 0xd9, 0xa0, 0x82, 0x6b, 0x9f, 0x73, 0x22, 0xc6, 0x35,
|
||||
0x95, 0x41, 0x89, 0x43, 0xb1, 0x1f, 0x04, 0xfa, 0x25, 0x24, 0x59, 0x31,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x40, 0xe4,
|
||||
0x16, 0xab, 0x32, 0x30, 0xf2, 0x5a, 0x04, 0x1d, 0x4c, 0x5c, 0xea, 0xfe,
|
||||
0x39, 0xbb, 0xe4, 0x26, 0x4f, 0x5e, 0x35, 0xa6, 0x03, 0x05, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x67, 0x66, 0x66, 0xdb, 0x44, 0x0e, 0xbd, 0xc1,
|
||||
0x9b, 0x8c, 0x59, 0xb7, 0xb9, 0xc3, 0xb0, 0x7d, 0x2a, 0x27, 0x26, 0x19,
|
||||
0x9e, 0xa2, 0x35, 0xe4, 0x38, 0x45, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
|
||||
0xbb, 0xda, 0xd7, 0xf5, 0xdc, 0xf5, 0xee, 0x05, 0x0b, 0x14, 0x99, 0x1d,
|
||||
0x2c, 0x77, 0x0b, 0xa8, 0xe0, 0x4c, 0x30, 0xd5, 0xe7, 0x25, 0xa5, 0x00,
|
||||
0x81, 0x5f, 0xbf, 0x8a, 0x0c, 0x7c, 0xdd, 0x18, 0xb0, 0x6c, 0xf4, 0x8f,
|
||||
0x59, 0x92, 0xf6, 0x79, 0xf6, 0x8d, 0x23, 0xf9, 0xaa, 0xc2, 0x33, 0xf1,
|
||||
0x15, 0xa6, 0x48, 0x92, 0x1c, 0xd8, 0x44, 0x2d, 0xe3, 0xdf, 0x33, 0x4c,
|
||||
0xab, 0x74, 0x9b, 0x1a, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x00, 0x00, 0xc0, 0x5a, 0x43, 0x4f, 0x54, 0x90, 0x5b, 0x8f, 0x29, 0xf1,
|
||||
0xbe, 0xb3, 0x8e, 0xbf, 0x2b, 0x63, 0x9b, 0xa4, 0x6d, 0x29, 0xe3, 0xfd,
|
||||
0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x33, 0x93, 0x51,
|
||||
0xa5, 0xa4, 0x2f, 0xc2, 0xf4, 0x64, 0x4f, 0xb1, 0x7e, 0xa0, 0x7e, 0xc7,
|
||||
0x62, 0x51, 0xe8, 0xcd, 0xbb, 0xdf, 0x39, 0x36, 0x36, 0x64, 0xe4, 0xba,
|
||||
0x75, 0x1f, 0xbe, 0x5c, 0x26, 0xa3, 0x44, 0xc5, 0xda, 0xd1, 0xc0, 0x1f,
|
||||
0x86, 0x3b, 0x67, 0x70, 0xe2, 0x82, 0x02, 0x91, 0x2b, 0x40, 0xb0, 0x46,
|
||||
0xe2, 0xcf, 0xe7, 0x51, 0x2b, 0x63, 0x28, 0xac, 0x8c, 0x0d, 0x7e, 0x2f,
|
||||
0xce, 0x8f, 0x89, 0x09, 0xf4, 0x60, 0xb2, 0x30, 0xa1, 0xfd, 0x80, 0xf7,
|
||||
0xf8, 0x2d, 0xa8, 0xa8, 0x0f, 0x4d, 0x4e, 0xaa, 0x92, 0x83, 0x95, 0xa1,
|
||||
0x6b, 0x2c, 0xb2, 0x3e, 0x6f, 0x3e, 0x33, 0x5c, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xd0, 0xb0, 0xb1, 0x11, 0x5d, 0x5f,
|
||||
0x5d, 0xc3, 0x95, 0x1a, 0x6e, 0xc2, 0xfe, 0xd5, 0xed, 0x59, 0xd2, 0x9f,
|
||||
0x2b, 0xf7, 0xc8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x9a, 0x99, 0x59, 0xbf, 0xae, 0x61, 0x76, 0x5e, 0x8d, 0xfd, 0x9d, 0xc7,
|
||||
0xb2, 0xa0, 0x04, 0x7f, 0xbb, 0x90, 0x38, 0xa3, 0x7e, 0x5e, 0x2c, 0xdf,
|
||||
0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e, 0x1a, 0xc0, 0x6a, 0xe1,
|
||||
0x44, 0x05, 0x40, 0x61, 0xb6, 0x93, 0x2b, 0x00, 0x16, 0x8a, 0x70, 0x5b,
|
||||
0xbe, 0x39, 0x1f, 0x89, 0xbd, 0x8f, 0x95, 0xd9, 0xad, 0x4a, 0x11, 0xe6,
|
||||
0x30, 0xc0, 0xdb, 0x53, 0x2a, 0xb9, 0x1b, 0x91, 0x35, 0x4b, 0x6b, 0xe3,
|
||||
0x17, 0xdf, 0x05, 0x6f, 0xfd, 0xd9, 0x0a, 0xc7, 0x4d, 0x05, 0x6d, 0x5e,
|
||||
0x2d, 0xc9, 0x42, 0x0f, 0x43, 0x96, 0x20, 0xf6, 0x13, 0xd5, 0x35, 0x2d,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x30, 0x32,
|
||||
0x38, 0x7a, 0x48, 0x1a, 0x77, 0x0f, 0x63, 0xf3, 0x88, 0x03, 0x96, 0xed,
|
||||
0xc5, 0x2c, 0x20, 0xd4, 0xa5, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xcd, 0xcc, 0x84, 0x26, 0x24, 0x60, 0xdd, 0x3b,
|
||||
0x8b, 0xb9, 0x0a, 0xa7, 0x81, 0xbc, 0x27, 0xda, 0x6d, 0xbc, 0x60, 0x76,
|
||||
0xd7, 0xe7, 0xa3, 0x70, 0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17,
|
||||
0xa4, 0x2d, 0x51, 0x76, 0xad, 0x02, 0x00, 0xd3, 0xd3, 0x9a, 0x21, 0x0c,
|
||||
0x03, 0x85, 0xab, 0x50, 0x0b, 0xbf, 0x2d, 0x26, 0x8b, 0x88, 0x87, 0xdd,
|
||||
0xb0, 0xc1, 0xe5, 0x0f, 0x6a, 0xf8, 0xca, 0x23, 0x3a, 0xb0, 0xb8, 0xed,
|
||||
0x03, 0xb6, 0x19, 0x17, 0xb2, 0xd7, 0xaf, 0x25, 0x0f, 0xe8, 0x5d, 0x96,
|
||||
0x88, 0x6b, 0xd7, 0x7f, 0xe0, 0xb0, 0x41, 0x57, 0xa1, 0x14, 0x1d, 0x55,
|
||||
0x14, 0x63, 0xdb, 0x2a, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x00, 0x00, 0x64, 0x7f, 0x44, 0x75, 0xc4, 0x97, 0x37, 0x63, 0x51, 0x19,
|
||||
0x64, 0xd6, 0x26, 0x62, 0xe4, 0x5c, 0xc4, 0xac, 0xa2, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x66, 0xf6, 0x93,
|
||||
0xf5, 0xd8, 0x74, 0xee, 0xd7, 0xdd, 0xc1, 0x95, 0x0c, 0x6d, 0x23, 0x58,
|
||||
0x1b, 0x30, 0xd1, 0x7c, 0xdd, 0x80, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
|
||||
0x98, 0x97, 0x8e, 0x45, 0xd3, 0x87, 0x93, 0x2f, 0xa7, 0x92, 0xea, 0x68,
|
||||
0xc2, 0x30, 0x3c, 0x9d, 0xfc, 0xe5, 0x9a, 0xe5, 0x63, 0x86, 0xfe, 0xd6,
|
||||
0x66, 0x52, 0x9b, 0xc9, 0x7e, 0x85, 0xd7, 0x18, 0x5e, 0x6c, 0x19, 0x6d,
|
||||
0x74, 0x08, 0x36, 0xf3, 0xae, 0xf9, 0x4c, 0xa7, 0xa7, 0x41, 0xb5, 0x25,
|
||||
0x15, 0x62, 0xff, 0xae, 0x96, 0x82, 0xd3, 0x78, 0xf7, 0x79, 0x28, 0x91,
|
||||
0xd6, 0xb7, 0xed, 0x30, 0xea, 0x73, 0x77, 0x25, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xac, 0xba, 0x36, 0xd5, 0x4b, 0x79,
|
||||
0xf3, 0x04, 0xf5, 0x8c, 0x51, 0xd7, 0x9c, 0xc4, 0x42, 0x49, 0x0f, 0x61,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x34, 0x33, 0x09, 0xd5, 0x70, 0xac, 0x80, 0x04, 0x8f, 0xd9, 0xd8, 0xe8,
|
||||
0xa6, 0xb4, 0x00, 0x28, 0x46, 0x98, 0xca, 0xc7, 0xd2, 0xac, 0x94, 0xc2,
|
||||
0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c, 0xcf, 0x90, 0x68, 0xf1,
|
||||
0x6e, 0x4c, 0xca, 0x87, 0x8f, 0x21, 0x48, 0xa0, 0x16, 0x8b, 0x99, 0xd9,
|
||||
0x87, 0x24, 0x9b, 0x40, 0x22, 0xa8, 0x2f, 0x78, 0xa6, 0xa1, 0x66, 0xe9,
|
||||
0x3d, 0x18, 0x20, 0x3d, 0x8f, 0xe7, 0x2a, 0x62, 0x9d, 0x7a, 0x59, 0x9f,
|
||||
0x19, 0xad, 0x4c, 0x22, 0xf7, 0xd8, 0xff, 0x2f, 0xd1, 0x08, 0x62, 0xfa,
|
||||
0xea, 0x32, 0xde, 0xeb, 0x34, 0xb7, 0x2f, 0xce, 0x59, 0xae, 0x58, 0x5e,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xb5, 0x26,
|
||||
0x62, 0x47, 0xcf, 0x65, 0x84, 0xa1, 0x99, 0xdc, 0x4d, 0x33, 0x06, 0x56,
|
||||
0x18, 0xe9, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x9a, 0x99, 0x05, 0xdc, 0x12, 0x36, 0xab, 0x88,
|
||||
0x30, 0xba, 0x5f, 0xa0, 0xfb, 0x47, 0x07, 0x26, 0x35, 0x41, 0x2f, 0x9d,
|
||||
0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e,
|
||||
0x8a, 0xde, 0x53, 0x36, 0x9b, 0x28, 0xd7, 0xc2, 0x9a, 0xe9, 0x54, 0x8f,
|
||||
0x12, 0x82, 0x62, 0x7c, 0x80, 0x95, 0xb6, 0xe4, 0xb6, 0x95, 0x68, 0x3a,
|
||||
0x12, 0x91, 0xc6, 0x53, 0x8d, 0xb2, 0x5f, 0x00, 0x98, 0x26, 0x75, 0x9a,
|
||||
0x48, 0x00, 0x22, 0x78, 0xd9, 0x15, 0xe9, 0x48, 0x9c, 0x41, 0x43, 0x00,
|
||||
0x86, 0x64, 0xcf, 0xef, 0x41, 0xe0, 0x64, 0xc8, 0x06, 0xbd, 0xf3, 0x38,
|
||||
0x14, 0x4d, 0xb4, 0x29, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x00, 0x00, 0x03, 0xc6, 0x05, 0x53, 0x9b, 0xa2, 0x22, 0xa8, 0x0c, 0xfa,
|
||||
0x63, 0x9c, 0xc2, 0x5e, 0x8d, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcd, 0x4c, 0x74, 0x4a,
|
||||
0xab, 0x56, 0x10, 0x49, 0x78, 0xb6, 0xd1, 0x04, 0x5c, 0xb0, 0x7e, 0x50,
|
||||
0x06, 0xcf, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e,
|
||||
0xdd, 0x87, 0x2f, 0x17, 0xa3, 0xd0, 0xde, 0x7e, 0x48, 0x51, 0x64, 0x73,
|
||||
0x21, 0x48, 0x05, 0xf8, 0xb9, 0xaf, 0xa6, 0xc6, 0xfa, 0xf1, 0xaa, 0xea,
|
||||
0xdc, 0x98, 0xbb, 0xb0, 0x67, 0xa5, 0xd0, 0x60, 0x25, 0x56, 0x02, 0x04,
|
||||
0x13, 0x70, 0x85, 0x7a, 0x7e, 0xf8, 0xf8, 0x6c, 0x77, 0xe5, 0x5b, 0x32,
|
||||
0x93, 0xdd, 0x77, 0x1c, 0xa8, 0x36, 0x1c, 0x19, 0x15, 0xd6, 0x65, 0x8a,
|
||||
0x9f, 0x83, 0x4a, 0xdf, 0x0f, 0x3c, 0x5c, 0x71, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x40, 0x58, 0x89, 0xea, 0xfc, 0xd4, 0xce,
|
||||
0xe7, 0xab, 0x47, 0x2a, 0x12, 0x95, 0x93, 0x9d, 0x14, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x67, 0x66, 0xa3, 0xab, 0x8c, 0xef, 0x89, 0x25, 0x47, 0x52, 0x2b, 0x9e,
|
||||
0xa5, 0x3b, 0xf4, 0xbe, 0xea, 0xe7, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
|
||||
0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0xce, 0xc3, 0x34, 0xb3,
|
||||
0x70, 0x4e, 0x4d, 0xbe, 0xc5, 0x5a, 0x0c, 0x13, 0x5b, 0xee, 0x07, 0xbf,
|
||||
0x95, 0x12, 0x7e, 0xed, 0x4d, 0xa4, 0x8e, 0xc1, 0x70, 0x34, 0x9e, 0x6a,
|
||||
0x3b, 0x06, 0xa6, 0x65, 0x0f, 0x2c, 0x93, 0x88, 0xf7, 0xef, 0xc9, 0x9c,
|
||||
0x0d, 0xd3, 0xa8, 0x1f, 0x8b, 0xd7, 0x35, 0xe3, 0xcb, 0xa0, 0x72, 0x39,
|
||||
0x0b, 0xa8, 0x5a, 0x6f, 0x95, 0xeb, 0x29, 0x88, 0x52, 0x3e, 0x01, 0x12,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0xc0, 0x8a, 0x2f,
|
||||
0x82, 0xda, 0x05, 0x96, 0x7e, 0x40, 0x9e, 0xef, 0x9e, 0xdb, 0x4c, 0x0c,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x34, 0x93, 0x07, 0x17, 0x22, 0x7d, 0xc9, 0xe4,
|
||||
0xa3, 0x4b, 0x66, 0x4e, 0x3b, 0x84, 0x67, 0x34, 0x04, 0xe0, 0xe7, 0x3a,
|
||||
0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c,
|
||||
0x0d, 0x88, 0x5f, 0xf8, 0xac, 0x4e, 0xb4, 0x30, 0x5c, 0xc6, 0xff, 0x6c,
|
||||
0x72, 0xb2, 0xcf, 0x0b, 0x70, 0xaf, 0x96, 0x28, 0xa9, 0x31, 0xaf, 0xbc,
|
||||
0x71, 0x72, 0xf3, 0xf2, 0x77, 0x8e, 0xac, 0x18, 0x59, 0xc0, 0x96, 0x6f,
|
||||
0xa6, 0xde, 0x5d, 0x3b, 0x9f, 0x66, 0x1f, 0x89, 0xf0, 0x8b, 0xe8, 0x62,
|
||||
0xe1, 0x64, 0x16, 0x30, 0xad, 0xff, 0xfd, 0xd9, 0x7f, 0xd8, 0xb0, 0x25,
|
||||
0x9b, 0x02, 0xa4, 0x00, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x00, 0xd0, 0x69, 0x1e, 0xe4, 0x15, 0x78, 0x19, 0x0a, 0xda, 0x1f, 0xdf,
|
||||
0xc2, 0x56, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x59, 0xe1, 0xb5,
|
||||
0xf5, 0x60, 0x05, 0x8f, 0x81, 0xda, 0xfd, 0xe9, 0x29, 0x5a, 0x76, 0x21,
|
||||
0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd,
|
||||
0xba, 0x0f, 0x5f, 0x2e, 0xfd, 0xdf, 0x81, 0x19, 0x3c, 0xad, 0xc6, 0x8b,
|
||||
0x58, 0xfe, 0x51, 0x28, 0xb5, 0xf8, 0xdd, 0x10, 0x20, 0x61, 0x30, 0xb3,
|
||||
0xb5, 0xf5, 0x76, 0x71, 0xa8, 0xc3, 0x57, 0xe4, 0x0c, 0xda, 0x70, 0x11,
|
||||
0x1e, 0xbd, 0x07, 0x1a, 0xc6, 0xa1, 0x7e, 0x7c, 0xf2, 0x1c, 0xd0, 0x90,
|
||||
0x6d, 0x8f, 0x21, 0x11, 0xde, 0x84, 0x40, 0xb2, 0x19, 0x13, 0x42, 0x9c,
|
||||
0xf6, 0xc4, 0x72, 0xf5, 0x9e, 0x74, 0xbe, 0x50, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x00, 0x30, 0x32, 0x71, 0x23, 0x2e, 0xdd, 0x8b,
|
||||
0xf5, 0xda, 0xc1, 0xf5, 0x60, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xcd, 0x04, 0x3e, 0x2b, 0xd5, 0x66, 0x7c, 0x0c, 0x14, 0xe9, 0xcd, 0x52,
|
||||
0x02, 0x82, 0xbf, 0x10, 0x01, 0xf8, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70,
|
||||
0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17, 0xc0, 0x5b, 0xf8, 0x94,
|
||||
0x48, 0x19, 0xd1, 0x27, 0xe0, 0x36, 0x2a, 0x8e, 0xe7, 0x18, 0x70, 0x0f,
|
||||
0xe1, 0x8e, 0xf4, 0x48, 0x72, 0x5c, 0x8e, 0x9c, 0xfb, 0xb9, 0x62, 0xb7,
|
||||
0xa6, 0x15, 0x84, 0x5e, 0x6f, 0x6b, 0x14, 0xb3, 0x43, 0x24, 0xad, 0xe3,
|
||||
0x9e, 0x76, 0x84, 0x7a, 0x92, 0x8d, 0x04, 0x40, 0x51, 0x73, 0x6b, 0x18,
|
||||
0xc1, 0x91, 0x25, 0x20, 0x40, 0x67, 0x02, 0x0e, 0x72, 0x76, 0x27, 0x6b,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0xa4, 0xd8, 0x1d,
|
||||
0xa2, 0x5e, 0xa7, 0x37, 0x70, 0xad, 0xd1, 0x9c, 0x02, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x67, 0x76, 0x60, 0xee, 0x6c, 0x59, 0x24, 0x5c,
|
||||
0x74, 0x80, 0x2a, 0x4d, 0x98, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
|
||||
0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
|
||||
0x22, 0x62, 0xe0, 0xd7, 0x4f, 0x07, 0xfe, 0x3a, 0x15, 0x87, 0xb6, 0x74,
|
||||
0x37, 0x21, 0xa4, 0x0a, 0x98, 0x5e, 0x28, 0x12, 0x77, 0xf1, 0x1f, 0x50,
|
||||
0xd9, 0x8f, 0xf9, 0x40, 0x41, 0x6f, 0x0f, 0x4b, 0x87, 0x7d, 0x5e, 0x14,
|
||||
0x57, 0xf5, 0xe8, 0x74, 0xc4, 0xd4, 0x27, 0xfc, 0x86, 0xd9, 0x56, 0x66,
|
||||
0x17, 0xd6, 0x97, 0xe5, 0x69, 0x2a, 0x6b, 0x03, 0x85, 0x38, 0xda, 0x70,
|
||||
0xb2, 0xf6, 0xdd, 0x52, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x00, 0xac, 0x7f, 0x7a, 0x7f, 0xcc, 0x6b, 0x89, 0xc3, 0x0c, 0x8f, 0x01,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x69, 0x13, 0xea,
|
||||
0xec, 0xb4, 0x88, 0x9c, 0xa2, 0x9b, 0x58, 0x31, 0xcf, 0x1c, 0xfe, 0x42,
|
||||
0x04, 0xe0, 0xe7, 0x3a, 0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba,
|
||||
0x75, 0x1f, 0xbe, 0x5c, 0x4c, 0x07, 0x42, 0x14, 0x93, 0xe2, 0x83, 0x93,
|
||||
0xa3, 0xa9, 0x72, 0x37, 0xa9, 0xb2, 0x58, 0xa7, 0x6a, 0x94, 0x53, 0xb5,
|
||||
0xeb, 0xad, 0x3b, 0x56, 0x46, 0x6b, 0x61, 0xa3, 0x8c, 0x14, 0x56, 0x32,
|
||||
0xd4, 0x10, 0xf3, 0xf6, 0x64, 0x82, 0xef, 0xb0, 0x24, 0xd2, 0xc2, 0x79,
|
||||
0xde, 0xb0, 0x42, 0x9e, 0x65, 0x05, 0x88, 0x74, 0x2f, 0x8b, 0x58, 0xd5,
|
||||
0x42, 0x1a, 0x06, 0xa3, 0xd4, 0xd1, 0xfc, 0x12, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x00, 0xc5, 0x1c, 0xd9, 0xfc, 0x35, 0x0a, 0xad,
|
||||
0x17, 0xee, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x9a, 0x25, 0x9e, 0xe6, 0xbc, 0xaf, 0xb1, 0x88, 0x37, 0xd7, 0x97, 0x99,
|
||||
0x67, 0x0e, 0x7f, 0x21, 0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1,
|
||||
0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e, 0x5d, 0x53, 0x09, 0xab,
|
||||
0xc2, 0x58, 0x42, 0x94, 0x15, 0x71, 0x15, 0x1b, 0xfa, 0xfb, 0xcd, 0xc6,
|
||||
0xbc, 0xa4, 0x00, 0x49, 0x74, 0xd3, 0x9c, 0x04, 0x81, 0xd7, 0x09, 0x28,
|
||||
0x7d, 0x37, 0x37, 0x58, 0x8a, 0x94, 0x1a, 0x79, 0x8a, 0x7e, 0x94, 0x29,
|
||||
0xbd, 0xab, 0x65, 0x5f, 0xc6, 0x94, 0xed, 0xdd, 0x94, 0xc1, 0xf1, 0xd8,
|
||||
0xed, 0x88, 0x80, 0x47, 0x31, 0x52, 0xda, 0x94, 0x30, 0x60, 0xac, 0x0a,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x83, 0x86, 0xfc,
|
||||
0xaf, 0x41, 0xb9, 0x0e, 0x8e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x4d, 0xac, 0x2d, 0x08, 0x88, 0xfd, 0x14, 0x1e,
|
||||
0x24, 0x78, 0xcc, 0xcc, 0x33, 0x87, 0xbf, 0x10, 0x01, 0xf8, 0xb9, 0xce,
|
||||
0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17,
|
||||
0xbb, 0xc3, 0xc0, 0xea, 0xbd, 0x9b, 0xd6, 0x22, 0x75, 0x96, 0x6c, 0xca,
|
||||
0xc1, 0x2b, 0x45, 0x76, 0x2e, 0x4e, 0x2b, 0xf7, 0xfa, 0x9e, 0x64, 0x2d,
|
||||
0x4b, 0xd8, 0xd9, 0x06, 0x24, 0xa3, 0xce, 0x49, 0x17, 0xd6, 0x83, 0xe8,
|
||||
0x15, 0xc0, 0xbf, 0x4f, 0xf8, 0x85, 0x9d, 0xaf, 0xb6, 0x85, 0x62, 0x31,
|
||||
0x89, 0x06, 0x10, 0xd3, 0x0a, 0xf1, 0xd5, 0x03, 0x50, 0x2c, 0x1e, 0xf4,
|
||||
0xa8, 0xb6, 0x91, 0x2e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x40, 0x0c, 0xab, 0x80, 0x53, 0x26, 0xc2, 0x54, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6b, 0xbb, 0x49,
|
||||
0x36, 0x72, 0x46, 0x68, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
|
||||
0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
|
||||
0x98, 0x97, 0x8e, 0x45, 0x80, 0x00, 0x64, 0x53, 0xae, 0xef, 0x99, 0x13,
|
||||
0xc9, 0x1c, 0x53, 0x45, 0x0c, 0xdc, 0x97, 0xd5, 0x80, 0x4b, 0x56, 0x86,
|
||||
0xa3, 0xbc, 0x78, 0xc4, 0xc2, 0x0c, 0xf7, 0xaf, 0xfd, 0x42, 0xe9, 0x1b,
|
||||
0x5f, 0x2f, 0xa6, 0x4f, 0xa5, 0x53, 0xb5, 0xec, 0x36, 0x50, 0xc6, 0xd1,
|
||||
0x3f, 0xdf, 0xaf, 0x63, 0x9f, 0x25, 0x1d, 0x40, 0xd2, 0xb5, 0x83, 0x24,
|
||||
0x67, 0x57, 0x4d, 0x0c, 0x7b, 0x8c, 0x90, 0x3d, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xc0, 0xfa, 0x49, 0xea, 0x2a, 0x92, 0x32, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x94, 0x9d, 0x8c, 0xc4, 0x08, 0x6c, 0x2a, 0x66, 0x32, 0xe3, 0x31, 0x33,
|
||||
0xcf, 0x1c, 0xfe, 0x42, 0x04, 0xe0, 0xe7, 0x3a, 0xd3, 0xac, 0x94, 0xc2,
|
||||
0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c, 0x57, 0x7e, 0xfd, 0x4b,
|
||||
0xb2, 0xe8, 0x7d, 0x9b, 0x0e, 0x3c, 0x51, 0xa9, 0x15, 0xcb, 0x9e, 0xfa,
|
||||
0x45, 0x73, 0x63, 0x75, 0xac, 0xb6, 0x70, 0xff, 0xcb, 0x0a, 0xb6, 0xd9,
|
||||
0xd9, 0xf6, 0x7d, 0x5e, 0x31, 0x38, 0x8e, 0xff, 0x0c, 0xba, 0x37, 0xee,
|
||||
0x71, 0x85, 0x91, 0xa1, 0xc8, 0x13, 0x11, 0xc4, 0x7c, 0x0f, 0x6e, 0xca,
|
||||
0x37, 0x32, 0xdb, 0x2a, 0x5a, 0xf3, 0x69, 0x45, 0x0f, 0x15, 0xe4, 0x1e,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xd0, 0xb2, 0x9d, 0x4f,
|
||||
0x2c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x5a, 0x83, 0xa6, 0xa4, 0x69, 0x0f, 0x33, 0x33,
|
||||
0x99, 0xf1, 0x98, 0x99, 0x67, 0x0e, 0x7f, 0x21, 0x02, 0xf0, 0x73, 0x9d,
|
||||
0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e,
|
||||
0xa0, 0xbc, 0x92, 0xcb, 0x0d, 0xff, 0xd0, 0xea, 0x29, 0x6b, 0x52, 0x84,
|
||||
0x98, 0xe7, 0xf8, 0xf0, 0xfb, 0x66, 0xbe, 0x18, 0xab, 0x4c, 0x1b, 0x71,
|
||||
0x16, 0xec, 0xb7, 0x81, 0x69, 0x77, 0xeb, 0x57, 0x47, 0xad, 0x0d, 0x3a,
|
||||
0x20, 0x42, 0x92, 0xf1, 0xc1, 0xd3, 0xef, 0x25, 0xf5, 0x26, 0x55, 0xc3,
|
||||
0x98, 0x49, 0x24, 0xec, 0xbe, 0x30, 0x09, 0xa8, 0x07, 0x14, 0x8f, 0xce,
|
||||
0x6f, 0xdd, 0xa9, 0x71, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0x30, 0xc2, 0xb2, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0xed, 0x68, 0x0c,
|
||||
0x84, 0x99, 0x99, 0x99, 0xcc, 0x78, 0xcc, 0xcc, 0x33, 0x87, 0xbf, 0x10,
|
||||
0x01, 0xf8, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e,
|
||||
0xdd, 0x87, 0x2f, 0x17, 0x88, 0x7c, 0xf4, 0x83, 0xfd, 0x16, 0xf9, 0xf0,
|
||||
0x1a, 0xcc, 0xc4, 0xea, 0xf5, 0xba, 0x03, 0x19, 0x4a, 0x1b, 0x5d, 0x52,
|
||||
0xe2, 0xfd, 0x78, 0xb1, 0x3d, 0xd3, 0x03, 0x25, 0x35, 0x11, 0x34, 0x47,
|
||||
0x4d, 0xd6, 0x17, 0x27, 0xbf, 0xde, 0x87, 0x2c, 0xde, 0x46, 0x07, 0xcd,
|
||||
0x2f, 0xa1, 0x31, 0x26, 0xe5, 0x8d, 0x87, 0xe1, 0xda, 0xac, 0xfe, 0x37,
|
||||
0x97, 0x79, 0x72, 0xc7, 0x3f, 0x28, 0x4a, 0x5e, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0xe4, 0xc5, 0xbd, 0x0a, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0xf7, 0xec, 0x75, 0x26, 0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66,
|
||||
0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
|
||||
0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0xb3, 0xc4, 0x59, 0xb6,
|
||||
0x35, 0x71, 0xaf, 0xe7, 0x73, 0x4c, 0x03, 0xe2, 0x69, 0x50, 0x19, 0x36,
|
||||
0x65, 0x43, 0xd5, 0x33, 0x7f, 0x31, 0xf1, 0x0e, 0x89, 0xa3, 0x4f, 0x55,
|
||||
0xdd, 0xf3, 0x67, 0x57, 0xf1, 0xcb, 0xe8, 0x3c, 0x7f, 0x68, 0x6a, 0x29,
|
||||
0xe5, 0x60, 0x35, 0x3e, 0x72, 0x40, 0x3c, 0x8b, 0x39, 0x01, 0xa5, 0x9e,
|
||||
0x73, 0x99, 0x7f, 0x87, 0x18, 0xb4, 0x68, 0xc6, 0xd2, 0x7b, 0xa9, 0x71,
|
||||
0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
|
||||
0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
|
||||
0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xac, 0x68, 0x06, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0xca, 0xff, 0x91, 0x99, 0x65, 0x66, 0x66, 0x66,
|
||||
0x32, 0xe3, 0x31, 0x33, 0xcf, 0x1c, 0xfe, 0x42, 0x04, 0xe0, 0xe7, 0x3a,
|
||||
0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c,
|
||||
0x71, 0x02, 0x27, 0x40, 0x1d, 0xe1, 0x11, 0xee, 0x3b, 0xc9, 0x13, 0xe2,
|
||||
0x90, 0x29, 0xf8, 0x0b, 0x70, 0x4b, 0x7b, 0xfa, 0xd2, 0xd6, 0xf5, 0x8f,
|
||||
0x45, 0xff, 0xc8, 0xc4, 0xc9, 0x4c, 0xf9, 0x5e, 0x0e, 0x66, 0x85, 0x5f,
|
||||
0x88, 0xff, 0xd3, 0xb6, 0xd8, 0x0a, 0x67, 0xd6, 0x91, 0x18, 0x5b, 0xe5,
|
||||
0xc9, 0xa9, 0xd9, 0x55, 0xb4, 0x30, 0xde, 0x83, 0xe5, 0x10, 0x9a, 0x05,
|
||||
0x55, 0x53, 0xde, 0x55, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
|
||||
0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
|
||||
0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
|
||||
0xd5, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0xc8, 0xcc, 0xcc,
|
||||
0x32, 0x33, 0x33, 0x33, 0x99, 0xf1, 0x98, 0x99, 0x67, 0x0e, 0x7f, 0x21,
|
||||
0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd,
|
||||
0xba, 0x0f, 0x5f, 0x2e, 0x01, 0x00, 0x00, 0xc0, 0xa9, 0xaa, 0xaa, 0x6a,
|
||||
0x54, 0xd4, 0x53, 0x15, 0x58, 0xd6, 0x6d, 0x37, 0x5a, 0x5b, 0xd4, 0x08,
|
||||
0xb2, 0xb0, 0x9f, 0xd9, 0x2c, 0x08, 0x7b, 0x3b, 0x0c, 0x84, 0x44, 0x6a,
|
||||
0x08, 0x75, 0x50, 0x67, 0x4d, 0xe0, 0x04, 0xae, 0x38, 0x92, 0x4a, 0x99,
|
||||
0x8b, 0x1e, 0xbb, 0x5f, 0x89, 0x70, 0x45, 0x82, 0x02, 0xe8, 0xe3, 0xe9,
|
||||
0xea, 0x60, 0x2a, 0xd0, 0xa1, 0xe3, 0x59, 0x47, 0x01, 0x00, 0x00, 0x00,
|
||||
0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
|
||||
0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
|
||||
0x37, 0x1a, 0x49, 0x4d, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
0x65, 0x66, 0x66, 0xe6, 0x98, 0x99, 0x99, 0x19, 0xcc, 0xa6, 0xcb, 0x4c,
|
||||
0x35, 0x59, 0x9e, 0xba, 0x03, 0xe4, 0x8a, 0xd3, 0x38, 0x17, 0x42, 0x8a,
|
||||
0xb2, 0xd7, 0x87, 0x03, 0x87, 0x5b, 0x26, 0x51, 0x01, 0x00, 0x00, 0x40,
|
||||
0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
|
||||
0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
|
||||
0x7e, 0x3d, 0xf2, 0x56, 0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc,
|
||||
0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
|
||||
0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45
|
||||
};
|
||||
unsigned int constants_2_len = 11904;
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,271 +0,0 @@
|
||||
#include "poseidon.cuh"
|
||||
|
||||
template <typename S>
|
||||
__global__ void prepare_poseidon_states(S * inp, S * states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config) {
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
int state_number = idx / config.t;
|
||||
if (state_number >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
int element_number = idx % config.t;
|
||||
|
||||
S prepared_element;
|
||||
|
||||
// Domain separation
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
prepared_element = inp[state_number * (config.t - 1) + element_number - 1];
|
||||
}
|
||||
|
||||
// Add pre-round constant
|
||||
prepared_element = prepared_element + config.round_constants[element_number];
|
||||
|
||||
// Store element in state
|
||||
states[idx] = prepared_element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ __forceinline__ S sbox_alpha_five(S element) {
|
||||
S result = S::sqr(element);
|
||||
result = S::sqr(result);
|
||||
return result * element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S vecs_mul_matrix(S element, S * matrix, int element_number, int vec_number, int size, S * shared_states) {
|
||||
shared_states[threadIdx.x] = element;
|
||||
__syncthreads();
|
||||
|
||||
element = S::zero();
|
||||
for (int i = 0; i < size; i++) {
|
||||
element = element + (shared_states[vec_number * size + i] * matrix[i * size + element_number]);
|
||||
}
|
||||
__syncthreads();
|
||||
return element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S full_round(S element,
|
||||
size_t rc_offset,
|
||||
int local_state_number,
|
||||
int element_number,
|
||||
bool multiply_by_mds,
|
||||
bool add_round_constant,
|
||||
S * shared_states,
|
||||
const PoseidonConfiguration<S> config) {
|
||||
element = sbox_alpha_five(element);
|
||||
if (add_round_constant) {
|
||||
element = element + config.round_constants[rc_offset + element_number];
|
||||
}
|
||||
|
||||
// Multiply all the states by mds matrix
|
||||
S * matrix = multiply_by_mds ? config.mds_matrix : config.non_sparse_matrix;
|
||||
return vecs_mul_matrix(element, matrix, element_number, local_state_number, config.t, shared_states);
|
||||
}
|
||||
|
||||
// Execute full rounds
|
||||
template <typename S>
|
||||
__global__ void full_rounds(S * states, size_t number_of_states, size_t rc_offset, bool first_half, const PoseidonConfiguration<S> config) {
|
||||
extern __shared__ S shared_states[];
|
||||
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
int state_number = idx / config.t;
|
||||
if (state_number >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
int local_state_number = threadIdx.x / config.t;
|
||||
int element_number = idx % config.t;
|
||||
|
||||
for (int i = 0; i < config.full_rounds_half - 1; i++) {
|
||||
states[idx] = full_round(states[idx],
|
||||
rc_offset,
|
||||
local_state_number,
|
||||
element_number,
|
||||
true,
|
||||
true,
|
||||
shared_states,
|
||||
config);
|
||||
rc_offset += config.t;
|
||||
}
|
||||
|
||||
states[idx] = full_round(states[idx],
|
||||
rc_offset,
|
||||
local_state_number,
|
||||
element_number,
|
||||
!first_half,
|
||||
first_half,
|
||||
shared_states,
|
||||
config);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S partial_round(S * state,
|
||||
size_t rc_offset,
|
||||
int round_number,
|
||||
const PoseidonConfiguration<S> config) {
|
||||
S element = state[0];
|
||||
element = sbox_alpha_five(element);
|
||||
element = element + config.round_constants[rc_offset];
|
||||
|
||||
S * sparse_matrix = &config.sparse_matrices[(config.t * 2 - 1) * round_number];
|
||||
|
||||
state[0] = element * sparse_matrix[0];
|
||||
for (int i = 1; i < config.t; i++) {
|
||||
state[0] = state[0] + (state[i] * sparse_matrix[i]);
|
||||
}
|
||||
|
||||
for (int i = 1; i < config.t; i++) {
|
||||
state[i] = state[i] + (element * sparse_matrix[config.t + i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Execute partial rounds
|
||||
template <typename S>
|
||||
__global__ void partial_rounds(S * states, size_t number_of_states, size_t rc_offset, const PoseidonConfiguration<S> config) {
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
|
||||
S * state = &states[idx * config.t];
|
||||
|
||||
for (int i = 0; i < config.partial_rounds; i++) {
|
||||
partial_round(state, rc_offset, i, config);
|
||||
rc_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S>
|
||||
__global__ void get_hash_results(S * states, size_t number_of_states, S * out, int t) {
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
|
||||
out[idx] = states[idx * t + 1];
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type) {
|
||||
// Used in matrix multiplication
|
||||
|
||||
S * states, * inp_device;
|
||||
|
||||
// allocate memory for {blocks} states of {t} scalars each
|
||||
cudaMalloc(&states, blocks * this->t * sizeof(S));
|
||||
|
||||
// Move input to cuda
|
||||
cudaMalloc(&inp_device, blocks * (this->t - 1) * sizeof(S));
|
||||
cudaMemcpy(inp_device, inp, blocks * (this->t - 1) * sizeof(S), cudaMemcpyHostToDevice);
|
||||
|
||||
size_t rc_offset = 0;
|
||||
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {t} elements in each state, and {blocks} states total
|
||||
int number_of_threads = (256 / this->t) * this->t;
|
||||
int hashes_per_block = number_of_threads / this->t;
|
||||
int total_number_of_threads = blocks * this->t;
|
||||
int number_of_blocks = total_number_of_threads / number_of_threads +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads);
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
int singlehash_block_size = 128;
|
||||
int number_of_singlehash_blocks = blocks / singlehash_block_size + static_cast<bool>(blocks % singlehash_block_size);
|
||||
|
||||
// Pick the domain_tag accordinaly
|
||||
S domain_tag;
|
||||
switch (hash_type) {
|
||||
case HashType::ConstInputLen:
|
||||
domain_tag = this->const_input_no_pad_domain_tag;
|
||||
break;
|
||||
|
||||
case HashType::MerkleTree:
|
||||
domain_tag = this->tree_domain_tag;
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// Domain separation and adding pre-round constants
|
||||
prepare_poseidon_states <<< number_of_blocks, number_of_threads >>> (inp_device, states, blocks, domain_tag, this->config);
|
||||
rc_offset += this->t;
|
||||
cudaFree(inp_device);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "Domain separation: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// execute half full rounds
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t >>> (states, blocks, rc_offset, true, this->config);
|
||||
rc_offset += this->t * this->config.full_rounds_half;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// execute partial rounds
|
||||
partial_rounds <<< number_of_singlehash_blocks, singlehash_block_size >>> (states, blocks, rc_offset, this->config);
|
||||
rc_offset += this->config.partial_rounds;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// execute half full rounds
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t >>> (states, blocks, rc_offset, false, this->config);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// get output
|
||||
S * out_device;
|
||||
cudaMalloc(&out_device, blocks * sizeof(S));
|
||||
get_hash_results <<< number_of_singlehash_blocks, singlehash_block_size >>> (states, blocks, out_device, this->config.t);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
std::cout << "Get hash results" << std::endl;
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
#endif
|
||||
cudaMemcpy(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost);
|
||||
cudaFree(out_device);
|
||||
cudaFree(states);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceReset();
|
||||
#endif
|
||||
}
|
||||
@@ -1,133 +0,0 @@
|
||||
#pragma once
|
||||
#include "constants.cuh"
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <chrono>
|
||||
|
||||
#define ARITY 3
|
||||
|
||||
template <typename S>
|
||||
__host__ void print_buffer_from_cuda(S * device_ptr, size_t size) {
|
||||
S * buffer = static_cast< S * >(malloc(size * sizeof(S)));
|
||||
cudaMemcpy(buffer, device_ptr, size * sizeof(S), cudaMemcpyDeviceToHost);
|
||||
|
||||
std::cout << "Start print" << std::endl;
|
||||
for(int i = 0; i < size / ARITY; i++) {
|
||||
std::cout << "State #" << i << std::endl;
|
||||
for (int j = 0; j < ARITY; j++) {
|
||||
std::cout << buffer[i * ARITY + j] << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
free(buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
template <typename S>
|
||||
__device__ void print_scalar(S element, int data) {
|
||||
printf("D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
|
||||
data,
|
||||
threadIdx.x,
|
||||
element.limbs_storage.limbs[0],
|
||||
element.limbs_storage.limbs[1],
|
||||
element.limbs_storage.limbs[2],
|
||||
element.limbs_storage.limbs[3],
|
||||
element.limbs_storage.limbs[4],
|
||||
element.limbs_storage.limbs[5],
|
||||
element.limbs_storage.limbs[6],
|
||||
element.limbs_storage.limbs[7]
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename S>
|
||||
struct PoseidonConfiguration {
|
||||
uint32_t partial_rounds, full_rounds_half, t;
|
||||
S * round_constants, * mds_matrix, * non_sparse_matrix, *sparse_matrices;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
class Poseidon {
|
||||
public:
|
||||
uint32_t t;
|
||||
PoseidonConfiguration<S> config;
|
||||
|
||||
enum HashType {
|
||||
ConstInputLen,
|
||||
MerkleTree,
|
||||
};
|
||||
|
||||
Poseidon(const uint32_t arity) {
|
||||
t = arity + 1;
|
||||
this->config.t = t;
|
||||
|
||||
// Pre-calculate domain tags
|
||||
// Domain tags will vary for different applications of Poseidon
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
|
||||
tree_domain_tag = S::from(tree_domain_tag_value);
|
||||
|
||||
const_input_no_pad_domain_tag = S::one();
|
||||
|
||||
// TO-DO: implement binary shifts for scalar type
|
||||
// const_input_no_pad_domain_tag = S::one() << 64;
|
||||
// const_input_no_pad_domain_tag *= S::from(arity);
|
||||
|
||||
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
|
||||
|
||||
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
|
||||
uint32_t mds_matrix_len = t * t;
|
||||
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
|
||||
|
||||
// All the constants are stored in a single file
|
||||
S * constants = load_constants<S>(arity);
|
||||
|
||||
S * mds_offset = constants + round_constants_len;
|
||||
S * non_sparse_offset = mds_offset + mds_matrix_len;
|
||||
S * sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate the memory for constants
|
||||
cudaMalloc(&this->config.round_constants, sizeof(S) * round_constants_len);
|
||||
cudaMalloc(&this->config.mds_matrix, sizeof(S) * mds_matrix_len);
|
||||
cudaMalloc(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len);
|
||||
cudaMalloc(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len);
|
||||
|
||||
// Copy the constants to device
|
||||
cudaMemcpy(this->config.round_constants, constants,
|
||||
sizeof(S) * round_constants_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(this->config.mds_matrix, mds_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(this->config.non_sparse_matrix, non_sparse_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(this->config.sparse_matrices, sparse_matrices_offset,
|
||||
sizeof(S) * sparse_matrices_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
~Poseidon() {
|
||||
cudaFree(this->config.round_constants);
|
||||
cudaFree(this->config.mds_matrix);
|
||||
cudaFree(this->config.non_sparse_matrix);
|
||||
cudaFree(this->config.sparse_matrices);
|
||||
}
|
||||
|
||||
// Hash multiple preimages in parallel
|
||||
void hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type);
|
||||
|
||||
private:
|
||||
S tree_domain_tag, const_input_no_pad_domain_tag;
|
||||
};
|
||||
@@ -1,48 +0,0 @@
|
||||
#define DEBUG
|
||||
|
||||
#include "../../curves/bls12_381/curve_config.cuh"
|
||||
#include "../../curves/bls12_381/poseidon.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
const int arity = 2;
|
||||
const int t = arity + 1;
|
||||
|
||||
Poseidon<BLS12_381::scalar_t> poseidon(arity);
|
||||
|
||||
int number_of_blocks = 4;
|
||||
|
||||
BLS12_381::scalar_t input = BLS12_381::scalar_t::zero();
|
||||
BLS12_381::scalar_t * in_ptr = static_cast< BLS12_381::scalar_t * >(malloc(number_of_blocks * arity * sizeof(BLS12_381::scalar_t)));
|
||||
for (uint32_t i = 0; i < number_of_blocks * arity; i++) {
|
||||
// std::cout << input << std::endl;
|
||||
in_ptr[i] = input;
|
||||
input = input + BLS12_381::scalar_t::one();
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
BLS12_381::scalar_t * out_ptr = static_cast< BLS12_381::scalar_t * >(malloc(number_of_blocks * sizeof(BLS12_381::scalar_t)));
|
||||
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
poseidon.hash_blocks(in_ptr, number_of_blocks, out_ptr, Poseidon<BLS12_381::scalar_t>::HashType::MerkleTree);
|
||||
|
||||
#ifdef DEBUG
|
||||
for (int i = 0; i < number_of_blocks; i++) {
|
||||
std::cout << out_ptr[i] << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
|
||||
free(in_ptr);
|
||||
free(out_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,155 +0,0 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
namespace PARAMS{
|
||||
struct fp_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
static constexpr unsigned modulus_bits_count = 253;
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega4= {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega5= {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291, 0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
|
||||
static constexpr storage<limbs_count> omega6= {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34, 0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
|
||||
static constexpr storage<limbs_count> omega7= {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1, 0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
|
||||
static constexpr storage<limbs_count> omega8= {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111, 0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
|
||||
static constexpr storage<limbs_count> omega9= {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244, 0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
|
||||
static constexpr storage<limbs_count> omega10= {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4, 0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
|
||||
static constexpr storage<limbs_count> omega11= {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645, 0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
|
||||
static constexpr storage<limbs_count> omega12= {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb, 0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
|
||||
static constexpr storage<limbs_count> omega13= {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc, 0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
|
||||
static constexpr storage<limbs_count> omega14= {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f, 0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
|
||||
static constexpr storage<limbs_count> omega15= {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557, 0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
|
||||
static constexpr storage<limbs_count> omega16= {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112, 0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
|
||||
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63, 0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f, 0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6, 0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd, 0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b, 0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a, 0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69, 0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c, 0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3, 0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2, 0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3, 0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016, 0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571, 0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894, 0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c, 0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec, 0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc, 0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024, 0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7, 0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db, 0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3, 0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e, 0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1, 0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697, 0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf, 0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
|
||||
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
|
||||
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
|
||||
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
|
||||
static constexpr storage<limbs_count> inv4= {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
|
||||
static constexpr storage<limbs_count> inv5= {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
|
||||
static constexpr storage<limbs_count> inv6= {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
|
||||
static constexpr storage<limbs_count> inv7= {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
|
||||
static constexpr storage<limbs_count> inv8= {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
|
||||
static constexpr storage<limbs_count> inv9= {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
|
||||
static constexpr storage<limbs_count> inv10= {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
|
||||
static constexpr storage<limbs_count> inv11= {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
|
||||
static constexpr storage<limbs_count> inv12= {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
|
||||
static constexpr storage<limbs_count> inv13= {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
|
||||
static constexpr storage<limbs_count> inv14= {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
|
||||
static constexpr storage<limbs_count> inv15= {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
|
||||
static constexpr storage<limbs_count> inv16= {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
|
||||
static constexpr storage<limbs_count> inv17= {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
|
||||
static constexpr storage<limbs_count> inv18= {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
|
||||
static constexpr storage<limbs_count> inv19= {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
|
||||
static constexpr storage<limbs_count> inv20= {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
|
||||
static constexpr storage<limbs_count> inv21= {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
|
||||
static constexpr storage<limbs_count> inv22= {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
|
||||
static constexpr storage<limbs_count> inv23= {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
|
||||
static constexpr storage<limbs_count> inv24= {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
|
||||
static constexpr storage<limbs_count> inv25= {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
|
||||
static constexpr storage<limbs_count> inv26= {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
|
||||
static constexpr storage<limbs_count> inv27= {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
|
||||
static constexpr storage<limbs_count> inv28= {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
|
||||
static constexpr storage<limbs_count> inv29= {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
|
||||
};
|
||||
|
||||
struct fq_config{
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr unsigned modulus_bits_count = 377;
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
|
||||
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
|
||||
static constexpr storage<limbs_count> generator_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
|
||||
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// TODO: correct parameters for G2 here
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
}
|
||||
@@ -1,193 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../utils/storage.cuh"
|
||||
|
||||
namespace PARAMS{
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr unsigned modulus_bits_count = 255;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
|
||||
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
|
||||
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
|
||||
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
|
||||
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
|
||||
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
|
||||
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
|
||||
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
|
||||
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
|
||||
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
|
||||
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
|
||||
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
|
||||
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
|
||||
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
|
||||
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
|
||||
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
|
||||
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
|
||||
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
|
||||
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
|
||||
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
|
||||
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
|
||||
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
|
||||
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
|
||||
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
|
||||
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
|
||||
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
|
||||
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
|
||||
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
|
||||
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
|
||||
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
|
||||
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
|
||||
|
||||
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
|
||||
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
|
||||
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
|
||||
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
|
||||
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
|
||||
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
|
||||
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
|
||||
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
|
||||
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
|
||||
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
|
||||
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
|
||||
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
|
||||
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
|
||||
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
|
||||
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
|
||||
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
|
||||
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
|
||||
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
|
||||
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
|
||||
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
|
||||
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
|
||||
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
|
||||
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
|
||||
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
|
||||
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
|
||||
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
|
||||
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
|
||||
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
|
||||
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
|
||||
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
|
||||
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr unsigned modulus_bits_count = 381;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
|
||||
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
|
||||
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
}
|
||||
@@ -1,155 +0,0 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
namespace PARAMS{
|
||||
struct fp_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
|
||||
////
|
||||
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
////
|
||||
static constexpr storage<limbs_count> omega_inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
////
|
||||
////
|
||||
static constexpr storage<limbs_count> inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
|
||||
////
|
||||
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bits_count = 254;
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
|
||||
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
|
||||
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
|
||||
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
|
||||
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
|
||||
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
|
||||
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
|
||||
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
|
||||
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
|
||||
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
|
||||
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
|
||||
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
|
||||
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
|
||||
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
|
||||
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
|
||||
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
|
||||
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
|
||||
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
|
||||
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
|
||||
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
|
||||
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
|
||||
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
|
||||
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
|
||||
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
|
||||
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
|
||||
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
|
||||
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
|
||||
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
|
||||
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
|
||||
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
|
||||
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
|
||||
};
|
||||
|
||||
struct fq_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522, 0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45, 0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bits_count = 254;
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17, 0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// TODO: correct parameters for G2 here
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../primitives/field.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
|
||||
#if defined(FEATURE_BLS12_381)
|
||||
#include "bls12_381/params.cuh"
|
||||
#elif defined(FEATURE_BLS12_377)
|
||||
#include "bls12_377/params.cuh"
|
||||
#elif defined(FEATURE_BN254)
|
||||
#include "bn254/params.cuh"
|
||||
#else
|
||||
# error "no FEATURE"
|
||||
#endif
|
||||
|
||||
typedef Field<PARAMS::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
@@ -1,5 +0,0 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
#include "poseidon.cu"
|
||||
@@ -1,327 +0,0 @@
|
||||
#ifndef _LDE
|
||||
#define _LDE
|
||||
#include <cuda.h>
|
||||
#include "../appUtils/ntt/lde.cu"
|
||||
#include "../appUtils/ntt/ntt.cuh"
|
||||
#include "../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" scalar_t* build_domain_cuda(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega(logn), stream);
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<scalar_t,scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<projective_t,scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<scalar_t,scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<projective_t,scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda(scalar_t* d_out, scalar_t *d_evaluations, scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda(projective_t* d_out, projective_t *d_evaluations, scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda(scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda(scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda(projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda(projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,92 +0,0 @@
|
||||
#ifndef _MSM
|
||||
#define _MSM
|
||||
#include "../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda(projective_t *out, affine_t points[],
|
||||
scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
|
||||
scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<scalar_t, projective_t, affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,26 +0,0 @@
|
||||
#ifndef _POSEIDON
|
||||
#define _POSEIDON
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
#include "../appUtils/poseidon/poseidon.cu"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
template class Poseidon<scalar_t>;
|
||||
|
||||
extern "C" int poseidon_multi_cuda(scalar_t input[], scalar_t* out,
|
||||
size_t number_of_blocks, int arity, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
Poseidon<scalar_t> poseidon(arity);
|
||||
poseidon.hash_blocks(input, number_of_blocks, out, Poseidon<scalar_t>::HashType::MerkleTree);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,19 +0,0 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq(projective_t *point1, projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == point_field_t::zero()) && (point1->y == point_field_t::zero()) && (point1->z == point_field_t::zero())) &&
|
||||
!((point2->x == point_field_t::zero()) && (point2->y == point_field_t::zero()) && (point2->z == point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2(g2_projective_t *point1, g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == g2_point_field_t::zero()) && (point1->y == g2_point_field_t::zero()) && (point1->z == g2_point_field_t::zero())) &&
|
||||
!((point2->x == g2_point_field_t::zero()) && (point2->y == g2_point_field_t::zero()) && (point2->z == g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
@@ -1,149 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "field.cuh"
|
||||
|
||||
#define HOST_INLINE __host__ __forceinline__
|
||||
#define DEVICE_INLINE __device__ __forceinline__
|
||||
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
|
||||
|
||||
template <typename CONFIG> class ExtensionField {
|
||||
private:
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
FWide imaginary;
|
||||
|
||||
ExtensionField HOST_DEVICE_INLINE get_lower() {
|
||||
return ExtensionField { real.get_lower(), imaginary.get_lower() };
|
||||
}
|
||||
|
||||
ExtensionField HOST_DEVICE_INLINE get_higher_with_slack() {
|
||||
return ExtensionField { real.get_higher_with_slack(), imaginary.get_higher_with_slack() };
|
||||
}
|
||||
};
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys) {
|
||||
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
|
||||
}
|
||||
|
||||
// an incomplete impl that assumes that xs > ys
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys) {
|
||||
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
|
||||
}
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
FF imaginary;
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField zero() {
|
||||
return ExtensionField { FF::zero(), FF::zero() };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField one() {
|
||||
return ExtensionField { FF::one(), FF::zero() };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_x() {
|
||||
return ExtensionField { FF { CONFIG::generator_x_re }, FF { CONFIG::generator_x_im } };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_y() {
|
||||
return ExtensionField { FF { CONFIG::generator_y_re }, FF { CONFIG::generator_y_im } };
|
||||
}
|
||||
|
||||
|
||||
static HOST_INLINE ExtensionField rand_host() {
|
||||
return ExtensionField { FF::rand_host(), FF::rand_host() };
|
||||
}
|
||||
|
||||
template <unsigned REDUCTION_SIZE = 1> static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionField &xs) {
|
||||
return ExtensionField { FF::reduce<REDUCTION_SIZE>(&xs.real), FF::reduce<REDUCTION_SIZE>(&xs.imaginary) };
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs) {
|
||||
os << "{ Real: " << xs.real << " }; { Imaginary: " << xs.imaginary << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys) {
|
||||
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys) {
|
||||
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
FWide real_prod = FF::mul_wide(xs.real * ys.real);
|
||||
FWide imaginary_prod = FF::mul_wide(xs.imaginary * ys.imaginary);
|
||||
FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
|
||||
FWide i_sq_times_im = FF::mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator*(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
FF real_prod = xs.real * ys.real;
|
||||
FF imaginary_prod = xs.imaginary * ys.imaginary;
|
||||
FF prod_of_sums = (xs.real + xs.imaginary) * (ys.real + ys.imaginary);
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
return (xs.real == ys.real) && (xs.imaginary == ys.imaginary);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
return !(xs == ys);
|
||||
}
|
||||
|
||||
template <const ExtensionField& mutliplier>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_const(const ExtensionField &xs) {
|
||||
constexpr uint32_t mul_real = mutliplier.real.limbs_storage.limbs[0];
|
||||
constexpr uint32_t mul_imaginary = mutliplier.imaginary.limbs_storage.limbs[0];
|
||||
FF real_prod = FF::template mul_unsigned<mul_real>(xs.real);
|
||||
FF imaginary_prod = FF::template mul_unsigned<mul_imaginary>(xs.imaginary);
|
||||
FF re_im = FF::template mul_unsigned<mul_real>(xs.imaginary);
|
||||
FF im_re = FF::template mul_unsigned<mul_imaginary>(xs.real);
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField { real_prod + i_sq_times_im, re_im + im_re };
|
||||
}
|
||||
|
||||
template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField &xs) {
|
||||
return { FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary) };
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return xs * xs;
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs) {
|
||||
return ExtensionField { FF::neg(xs.real), FF::neg(xs.imaginary) };
|
||||
}
|
||||
|
||||
// inverse assumes that xs is nonzero
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs) {
|
||||
ExtensionField xs_conjugate = { xs.real, FF::neg(xs.imaginary) };
|
||||
// TODO: wide here
|
||||
FF xs_norm_squared = FF::sqr(xs.real) + FF::sqr(xs.imaginary);
|
||||
return xs_conjugate * ExtensionField { FF::inverse(xs_norm_squared), FF::zero() };
|
||||
}
|
||||
};
|
||||
@@ -1,49 +0,0 @@
|
||||
#include <cuda.h>
|
||||
#include "../curves/bls12_381/curve_config.cuh"
|
||||
#include "../curves/bls12_377/curve_config.cuh"
|
||||
#include "../curves/bn254/curve_config.cuh"
|
||||
#include "projective.cuh"
|
||||
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
@@ -1,133 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "affine.cuh"
|
||||
|
||||
template <typename FF, class SCALAR_FF, const FF& B_VALUE>
|
||||
class Projective {
|
||||
friend Affine<FF>;
|
||||
|
||||
public:
|
||||
FF x;
|
||||
FF y;
|
||||
FF z;
|
||||
|
||||
static HOST_DEVICE_INLINE Projective zero() {
|
||||
return {FF::zero(), FF::one(), FF::zero()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Affine<FF> to_affine(const Projective &point) {
|
||||
FF denom = FF::inverse(point.z);
|
||||
return {point.x * denom, point.y * denom};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF> &point) {
|
||||
return {point.x, point.y, FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective generator() {
|
||||
return {FF::generator_x(), FF::generator_y(), FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective neg(const Projective &point) {
|
||||
return {point.x, FF::neg(point.y), point.z};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2) {
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF Z2 = p2.z; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17 < 2
|
||||
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
|
||||
const FF t25 = t07 * t22; // t25 ← t07 · t22 < 2
|
||||
const FF X3 = t25 - t24; // X3 ← t25 − t24 < 2
|
||||
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
|
||||
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
|
||||
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
|
||||
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
|
||||
return {X3, Y3, Z3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Projective& p2) {
|
||||
return p1 + neg(p2);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Affine<FF>& p2) {
|
||||
// TODO: change the implementation to a more efficient mixed adder later on
|
||||
return p1 + from_affine(p2);
|
||||
}
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point) {
|
||||
os << "Point { x: " << point.x << "; y: " << point.y << "; z: " << point.z << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Affine<FF>& p2) {
|
||||
return p1 + Affine<FF>::neg(p2);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point) {
|
||||
Projective res = zero();
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
|
||||
if (i > 0) {
|
||||
res = res + res;
|
||||
}
|
||||
if (scalar.get_scalar_digit(SCALAR_FF::NBITS - i - 1, 1)) {
|
||||
res = res + point;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Projective& p1, const Projective& p2) {
|
||||
return (p1.x * p2.z == p2.x * p1.z) && (p1.y * p2.z == p2.y * p1.z);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Projective &point) {
|
||||
return point.x == FF::zero() && point.y != FF::zero() && point.z == FF::zero();
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_on_curve(const Projective &point) {
|
||||
if (is_zero(point))
|
||||
return true;
|
||||
bool eq_holds = (FF::template mul_const<B_VALUE>(FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
|
||||
return point.z != FF::zero() && eq_holds;
|
||||
}
|
||||
|
||||
static HOST_INLINE Projective rand_host() {
|
||||
SCALAR_FF rand_scalar = SCALAR_FF::rand_host();
|
||||
return rand_scalar * generator();
|
||||
}
|
||||
};
|
||||
@@ -22,6 +22,10 @@ FetchContent_Declare(
|
||||
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
|
||||
# boosting lib
|
||||
include_directories("/home/miner/include/boost_1_80_0")
|
||||
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
@@ -1,9 +1,3 @@
|
||||
#ifndef MSM
|
||||
#define MSM
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "../../primitives/affine.cuh"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cub/device/device_radix_sort.cuh>
|
||||
@@ -12,6 +6,7 @@
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../curves/curve_config.cuh"
|
||||
#include "msm.cuh"
|
||||
|
||||
|
||||
@@ -83,16 +78,16 @@ __global__ void split_scalars_kernel(unsigned *buckets_indices, unsigned *point_
|
||||
|
||||
//this kernel adds up the points in each bucket
|
||||
template <typename P, typename A>
|
||||
// __global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
|
||||
// unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
|
||||
__global__ void accumulate_buckets_kernel(P *buckets, unsigned *bucket_offsets, unsigned *bucket_sizes, unsigned *single_bucket_indices, unsigned *point_indices, A *points, unsigned nof_buckets, unsigned *nof_buckets_to_compute, unsigned msm_idx_shift){
|
||||
__global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
|
||||
unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid >= *nof_buckets_to_compute){
|
||||
return;
|
||||
}
|
||||
unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
|
||||
unsigned bucket_index = msm_index * nof_buckets + (single_bucket_indices[tid]&((1<<msm_idx_shift)-1));
|
||||
unsigned bucket_size = bucket_sizes[tid];
|
||||
if (tid>=nof_buckets*batch_size || bucket_size == 0){ //if the bucket is empty we don't need to continue
|
||||
return;
|
||||
}
|
||||
unsigned bucket_offset = bucket_offsets[tid];
|
||||
for (unsigned i = 0; i < bucket_sizes[tid]; i++) //add the relevant points starting from the relevant offset up to the bucket size
|
||||
{
|
||||
@@ -106,7 +101,7 @@ template <typename P>
|
||||
__global__ void big_triangle_sum_kernel(P* buckets, P* final_sums, unsigned nof_bms, unsigned c){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid >= nof_bms) return;
|
||||
if (tid>nof_bms) return;
|
||||
P line_sum = buckets[(tid+1)*(1<<c)-1];
|
||||
final_sums[tid] = line_sum;
|
||||
for (unsigned i = (1<<c)-2; i >0; i--)
|
||||
@@ -152,16 +147,16 @@ __global__ void final_accumulation_kernel(P* final_sums, P* final_results, unsig
|
||||
|
||||
//this function computes msm using the bucket method
|
||||
template <typename S, typename P, typename A>
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream) {
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device) {
|
||||
|
||||
S *d_scalars;
|
||||
A *d_points;
|
||||
if (!on_device) {
|
||||
//copy scalars and point to gpu
|
||||
cudaMallocAsync(&d_scalars, sizeof(S) * size, stream);
|
||||
cudaMallocAsync(&d_points, sizeof(A) * size, stream);
|
||||
cudaMemcpyAsync(d_scalars, scalars, sizeof(S) * size, cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_points, points, sizeof(A) * size, cudaMemcpyHostToDevice, stream);
|
||||
cudaMalloc(&d_scalars, sizeof(S) * size);
|
||||
cudaMalloc(&d_points, sizeof(A) * size);
|
||||
cudaMemcpy(d_scalars, scalars, sizeof(S) * size, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_points, points, sizeof(A) * size, cudaMemcpyHostToDevice);
|
||||
}
|
||||
else {
|
||||
d_scalars = scalars;
|
||||
@@ -178,140 +173,134 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
nof_bms++;
|
||||
}
|
||||
unsigned nof_buckets = nof_bms<<c;
|
||||
cudaMallocAsync(&buckets, sizeof(P) * nof_buckets, stream);
|
||||
cudaMalloc(&buckets, sizeof(P) * nof_buckets);
|
||||
|
||||
// launch the bucket initialization kernel with maximum threads
|
||||
unsigned NUM_THREADS = 1 << 10;
|
||||
unsigned NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, nof_buckets);
|
||||
initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, nof_buckets);
|
||||
|
||||
unsigned *bucket_indices;
|
||||
unsigned *point_indices;
|
||||
cudaMallocAsync(&bucket_indices, sizeof(unsigned) * size * (nof_bms+1), stream);
|
||||
cudaMallocAsync(&point_indices, sizeof(unsigned) * size * (nof_bms+1), stream);
|
||||
cudaMalloc(&bucket_indices, sizeof(unsigned) * size * (nof_bms+1));
|
||||
cudaMalloc(&point_indices, sizeof(unsigned) * size * (nof_bms+1));
|
||||
|
||||
//split scalars into digits
|
||||
NUM_THREADS = 1 << 10;
|
||||
NUM_BLOCKS = (size * (nof_bms+1) + NUM_THREADS - 1) / NUM_THREADS;
|
||||
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size,
|
||||
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size,
|
||||
nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
|
||||
|
||||
|
||||
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
|
||||
unsigned *sort_indices_temp_storage{};
|
||||
size_t sort_indices_temp_storage_bytes;
|
||||
// The second to last parameter is the default value supplied explicitly to allow passing the stream
|
||||
// See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + size, bucket_indices,
|
||||
point_indices + size, point_indices, size, 0, sizeof(unsigned) * 8, stream);
|
||||
cudaMallocAsync(&sort_indices_temp_storage, sort_indices_temp_storage_bytes, stream);
|
||||
point_indices + size, point_indices, size);
|
||||
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
|
||||
for (unsigned i = 0; i < nof_bms; i++) {
|
||||
unsigned offset_out = i * size;
|
||||
unsigned offset_in = offset_out + size;
|
||||
// The second to last parameter is the default value supplied explicitly to allow passing the stream
|
||||
// See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in, bucket_indices + offset_out,
|
||||
point_indices + offset_in, point_indices + offset_out, size, 0, sizeof(unsigned) * 8, stream);
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in,
|
||||
bucket_indices + offset_out, point_indices + offset_in, point_indices + offset_out, size);
|
||||
}
|
||||
cudaFreeAsync(sort_indices_temp_storage, stream);
|
||||
cudaFree(sort_indices_temp_storage);
|
||||
|
||||
//find bucket_sizes
|
||||
unsigned *single_bucket_indices;
|
||||
unsigned *bucket_sizes;
|
||||
unsigned *nof_buckets_to_compute;
|
||||
cudaMallocAsync(&single_bucket_indices, sizeof(unsigned)*nof_buckets, stream);
|
||||
cudaMallocAsync(&bucket_sizes, sizeof(unsigned)*nof_buckets, stream);
|
||||
cudaMallocAsync(&nof_buckets_to_compute, sizeof(unsigned), stream);
|
||||
cudaMalloc(&single_bucket_indices, sizeof(unsigned)*nof_buckets);
|
||||
cudaMalloc(&bucket_sizes, sizeof(unsigned)*nof_buckets);
|
||||
cudaMalloc(&nof_buckets_to_compute, sizeof(unsigned));
|
||||
unsigned *encode_temp_storage{};
|
||||
size_t encode_temp_storage_bytes = 0;
|
||||
cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, bucket_indices, single_bucket_indices, bucket_sizes,
|
||||
nof_buckets_to_compute, nof_bms*size, stream);
|
||||
cudaMallocAsync(&encode_temp_storage, encode_temp_storage_bytes, stream);
|
||||
nof_buckets_to_compute, nof_bms*size);
|
||||
cudaMalloc(&encode_temp_storage, encode_temp_storage_bytes);
|
||||
cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, bucket_indices, single_bucket_indices, bucket_sizes,
|
||||
nof_buckets_to_compute, nof_bms*size, stream);
|
||||
cudaFreeAsync(encode_temp_storage, stream);
|
||||
nof_buckets_to_compute, nof_bms*size);
|
||||
cudaFree(encode_temp_storage);
|
||||
|
||||
//get offsets - where does each new bucket begin
|
||||
unsigned* bucket_offsets;
|
||||
cudaMallocAsync(&bucket_offsets, sizeof(unsigned)*nof_buckets, stream);
|
||||
cudaMalloc(&bucket_offsets, sizeof(unsigned)*nof_buckets);
|
||||
unsigned* offsets_temp_storage{};
|
||||
size_t offsets_temp_storage_bytes = 0;
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets, stream);
|
||||
cudaMallocAsync(&offsets_temp_storage, offsets_temp_storage_bytes, stream);
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets, stream);
|
||||
cudaFreeAsync(offsets_temp_storage, stream);
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets);
|
||||
cudaMalloc(&offsets_temp_storage, offsets_temp_storage_bytes);
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets);
|
||||
cudaFree(offsets_temp_storage);
|
||||
|
||||
//launch the accumulation kernel with maximum threads
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices,
|
||||
d_points, nof_buckets, nof_buckets_to_compute, c+bm_bitsize);
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices,
|
||||
d_points, nof_buckets, 1, c+bm_bitsize);
|
||||
|
||||
#ifdef SSM_SUM
|
||||
//sum each bucket
|
||||
NUM_THREADS = 1 << 10;
|
||||
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
ssm_buckets_kernel<fake_point, fake_scalar><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, single_bucket_indices, nof_buckets, c);
|
||||
ssm_buckets_kernel<fake_point, fake_scalar><<<NUM_BLOCKS, NUM_THREADS>>>(buckets, single_bucket_indices, nof_buckets, c);
|
||||
|
||||
//sum each bucket module
|
||||
P* final_results;
|
||||
cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
|
||||
cudaMalloc(&final_results, sizeof(P) * nof_bms);
|
||||
NUM_THREADS = 1<<c;
|
||||
NUM_BLOCKS = nof_bms;
|
||||
sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(buckets, final_results);
|
||||
sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS>>>(buckets, final_results);
|
||||
#endif
|
||||
|
||||
#ifdef BIG_TRIANGLE
|
||||
P* final_results;
|
||||
cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
|
||||
cudaMalloc(&final_results, sizeof(P) * nof_bms);
|
||||
//launch the bucket module sum kernel - a thread for each bucket module
|
||||
NUM_THREADS = nof_bms;
|
||||
NUM_BLOCKS = 1;
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, final_results, nof_bms, c);
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, final_results, nof_bms, c);
|
||||
#endif
|
||||
|
||||
P* d_final_result;
|
||||
if (!on_device)
|
||||
cudaMallocAsync(&d_final_result, sizeof(P), stream);
|
||||
cudaMalloc(&d_final_result, sizeof(P));
|
||||
|
||||
//launch the double and add kernel, a single thread
|
||||
final_accumulation_kernel<P, S><<<1,1,0,stream>>>(final_results, on_device ? final_result : d_final_result, 1, nof_bms, c);
|
||||
final_accumulation_kernel<P, S><<<1,1>>>(final_results, on_device ? final_result : d_final_result, 1, nof_bms, c);
|
||||
|
||||
//copy final result to host
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaDeviceSynchronize();
|
||||
if (!on_device)
|
||||
cudaMemcpyAsync(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpy(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost);
|
||||
|
||||
//free memory
|
||||
if (!on_device) {
|
||||
cudaFreeAsync(d_points, stream);
|
||||
cudaFreeAsync(d_scalars, stream);
|
||||
cudaFreeAsync(d_final_result, stream);
|
||||
cudaFree(d_points);
|
||||
cudaFree(d_scalars);
|
||||
cudaFree(d_final_result);
|
||||
}
|
||||
cudaFreeAsync(buckets, stream);
|
||||
cudaFreeAsync(bucket_indices, stream);
|
||||
cudaFreeAsync(point_indices, stream);
|
||||
cudaFreeAsync(single_bucket_indices, stream);
|
||||
cudaFreeAsync(bucket_sizes, stream);
|
||||
cudaFreeAsync(nof_buckets_to_compute, stream);
|
||||
cudaFreeAsync(bucket_offsets, stream);
|
||||
cudaFreeAsync(final_results, stream);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaFree(buckets);
|
||||
cudaFree(bucket_indices);
|
||||
cudaFree(point_indices);
|
||||
cudaFree(single_bucket_indices);
|
||||
cudaFree(bucket_sizes);
|
||||
cudaFree(nof_buckets_to_compute);
|
||||
cudaFree(bucket_offsets);
|
||||
cudaFree(final_results);
|
||||
}
|
||||
|
||||
//this function computes msm using the bucket method
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream){
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device){
|
||||
|
||||
unsigned total_size = batch_size * msm_size;
|
||||
S *d_scalars;
|
||||
A *d_points;
|
||||
if (!on_device) {
|
||||
//copy scalars and point to gpu
|
||||
cudaMallocAsync(&d_scalars, sizeof(S) * total_size, stream);
|
||||
cudaMallocAsync(&d_points, sizeof(A) * total_size, stream);
|
||||
cudaMemcpyAsync(d_scalars, scalars, sizeof(S) * total_size, cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_points, points, sizeof(A) * total_size, cudaMemcpyHostToDevice, stream);
|
||||
cudaMalloc(&d_scalars, sizeof(S) * total_size);
|
||||
cudaMalloc(&d_points, sizeof(A) * total_size);
|
||||
cudaMemcpy(d_scalars, scalars, sizeof(S) * total_size, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_points, points, sizeof(A) * total_size, cudaMemcpyHostToDevice);
|
||||
}
|
||||
else {
|
||||
d_scalars = scalars;
|
||||
@@ -328,129 +317,131 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
|
||||
unsigned bm_bitsize = ceil(log2(nof_bms));
|
||||
unsigned nof_buckets = (nof_bms<<c);
|
||||
unsigned total_nof_buckets = nof_buckets*batch_size;
|
||||
cudaMallocAsync(&buckets, sizeof(P) * total_nof_buckets, stream);
|
||||
cudaMalloc(&buckets, sizeof(P) * total_nof_buckets);
|
||||
|
||||
//lanch the bucket initialization kernel with maximum threads
|
||||
unsigned NUM_THREADS = 1 << 10;
|
||||
unsigned NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, total_nof_buckets);
|
||||
initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, total_nof_buckets);
|
||||
|
||||
unsigned *bucket_indices;
|
||||
unsigned *point_indices;
|
||||
cudaMallocAsync(&bucket_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size), stream);
|
||||
cudaMallocAsync(&point_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size), stream);
|
||||
cudaMalloc(&bucket_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size));
|
||||
cudaMalloc(&point_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size));
|
||||
|
||||
//split scalars into digits
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (total_size * nof_bms + msm_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(bucket_indices + msm_size, point_indices + msm_size, d_scalars, total_size,
|
||||
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + msm_size, point_indices + msm_size, d_scalars, total_size,
|
||||
msm_log_size, nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
|
||||
|
||||
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
|
||||
unsigned *sorted_bucket_indices;
|
||||
unsigned *sorted_point_indices;
|
||||
cudaMallocAsync(&sorted_bucket_indices, sizeof(unsigned) * (total_size * nof_bms), stream);
|
||||
cudaMallocAsync(&sorted_point_indices, sizeof(unsigned) * (total_size * nof_bms), stream);
|
||||
cudaMalloc(&sorted_bucket_indices, sizeof(unsigned) * (total_size * nof_bms));
|
||||
cudaMalloc(&sorted_point_indices, sizeof(unsigned) * (total_size * nof_bms));
|
||||
|
||||
unsigned *sort_indices_temp_storage{};
|
||||
size_t sort_indices_temp_storage_bytes;
|
||||
// The second to last parameter is the default value supplied explicitly to allow passing the stream
|
||||
// See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
|
||||
point_indices + msm_size, sorted_point_indices, total_size * nof_bms, 0, sizeof(unsigned)*8, stream);
|
||||
cudaMallocAsync(&sort_indices_temp_storage, sort_indices_temp_storage_bytes, stream);
|
||||
// The second to last parameter is the default value supplied explicitly to allow passing the stream
|
||||
// See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
|
||||
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
|
||||
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
|
||||
// for (unsigned i = 0; i < nof_bms*batch_size; i++) {
|
||||
// unsigned offset_out = i * msm_size;
|
||||
// unsigned offset_in = offset_out + msm_size;
|
||||
// cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in,
|
||||
// bucket_indices + offset_out, point_indices + offset_in, point_indices + offset_out, msm_size);
|
||||
// }
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
|
||||
point_indices + msm_size, sorted_point_indices, total_size * nof_bms, 0, sizeof(unsigned)*8, stream);
|
||||
cudaFreeAsync(sort_indices_temp_storage, stream);
|
||||
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
|
||||
cudaFree(sort_indices_temp_storage);
|
||||
|
||||
//find bucket_sizes
|
||||
unsigned *single_bucket_indices;
|
||||
unsigned *bucket_sizes;
|
||||
unsigned *total_nof_buckets_to_compute;
|
||||
cudaMallocAsync(&single_bucket_indices, sizeof(unsigned)*total_nof_buckets, stream);
|
||||
cudaMallocAsync(&bucket_sizes, sizeof(unsigned)*total_nof_buckets, stream);
|
||||
cudaMallocAsync(&total_nof_buckets_to_compute, sizeof(unsigned), stream);
|
||||
cudaMalloc(&single_bucket_indices, sizeof(unsigned)*total_nof_buckets);
|
||||
cudaMalloc(&bucket_sizes, sizeof(unsigned)*total_nof_buckets);
|
||||
cudaMalloc(&total_nof_buckets_to_compute, sizeof(unsigned));
|
||||
unsigned *encode_temp_storage{};
|
||||
size_t encode_temp_storage_bytes = 0;
|
||||
cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, sorted_bucket_indices, single_bucket_indices, bucket_sizes,
|
||||
total_nof_buckets_to_compute, nof_bms*total_size, stream);
|
||||
cudaMallocAsync(&encode_temp_storage, encode_temp_storage_bytes, stream);
|
||||
total_nof_buckets_to_compute, nof_bms*total_size);
|
||||
cudaMalloc(&encode_temp_storage, encode_temp_storage_bytes);
|
||||
cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, sorted_bucket_indices, single_bucket_indices, bucket_sizes,
|
||||
total_nof_buckets_to_compute, nof_bms*total_size, stream);
|
||||
cudaFreeAsync(encode_temp_storage, stream);
|
||||
total_nof_buckets_to_compute, nof_bms*total_size);
|
||||
cudaFree(encode_temp_storage);
|
||||
|
||||
//get offsets - where does each new bucket begin
|
||||
unsigned* bucket_offsets;
|
||||
cudaMallocAsync(&bucket_offsets, sizeof(unsigned)*total_nof_buckets, stream);
|
||||
cudaMalloc(&bucket_offsets, sizeof(unsigned)*total_nof_buckets);
|
||||
unsigned* offsets_temp_storage{};
|
||||
size_t offsets_temp_storage_bytes = 0;
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets, stream);
|
||||
cudaMallocAsync(&offsets_temp_storage, offsets_temp_storage_bytes, stream);
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets, stream);
|
||||
cudaFreeAsync(offsets_temp_storage, stream);
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets);
|
||||
cudaMalloc(&offsets_temp_storage, offsets_temp_storage_bytes);
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets);
|
||||
cudaFree(offsets_temp_storage);
|
||||
|
||||
//launch the accumulation kernel with maximum threads
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
|
||||
d_points, nof_buckets, total_nof_buckets_to_compute, c+bm_bitsize);
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
|
||||
d_points, nof_buckets, batch_size, c+bm_bitsize);
|
||||
|
||||
#ifdef SSM_SUM
|
||||
//sum each bucket
|
||||
NUM_THREADS = 1 << 10;
|
||||
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
ssm_buckets_kernel<P, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, single_bucket_indices, nof_buckets, c);
|
||||
ssm_buckets_kernel<P, S><<<NUM_BLOCKS, NUM_THREADS>>>(buckets, single_bucket_indices, nof_buckets, c);
|
||||
|
||||
//sum each bucket module
|
||||
P* final_results;
|
||||
cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
|
||||
cudaMalloc(&final_results, sizeof(P) * nof_bms);
|
||||
NUM_THREADS = 1<<c;
|
||||
NUM_BLOCKS = nof_bms;
|
||||
sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(buckets, final_results);
|
||||
sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS>>>(buckets, final_results);
|
||||
#endif
|
||||
|
||||
#ifdef BIG_TRIANGLE
|
||||
P* bm_sums;
|
||||
cudaMallocAsync(&bm_sums, sizeof(P) * nof_bms * batch_size, stream);
|
||||
cudaMalloc(&bm_sums, sizeof(P) * nof_bms * batch_size);
|
||||
//launch the bucket module sum kernel - a thread for each bucket module
|
||||
NUM_THREADS = 1<<8;
|
||||
NUM_BLOCKS = (nof_bms*batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bm_sums, nof_bms*batch_size, c);
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bm_sums, nof_bms*batch_size, c);
|
||||
#endif
|
||||
|
||||
P* d_final_results;
|
||||
if (!on_device)
|
||||
cudaMallocAsync(&d_final_results, sizeof(P)*batch_size, stream);
|
||||
cudaMalloc(&d_final_results, sizeof(P)*batch_size);
|
||||
|
||||
//launch the double and add kernel, a single thread for each msm
|
||||
NUM_THREADS = 1<<8;
|
||||
NUM_BLOCKS = (batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
|
||||
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
|
||||
|
||||
//copy final result to host
|
||||
cudaDeviceSynchronize();
|
||||
if (!on_device)
|
||||
cudaMemcpyAsync(final_results, d_final_results, sizeof(P)*batch_size, cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpy(final_results, d_final_results, sizeof(P)*batch_size, cudaMemcpyDeviceToHost);
|
||||
|
||||
//free memory
|
||||
if (!on_device) {
|
||||
cudaFreeAsync(d_points, stream);
|
||||
cudaFreeAsync(d_scalars, stream);
|
||||
cudaFreeAsync(d_final_results, stream);
|
||||
cudaFree(d_points);
|
||||
cudaFree(d_scalars);
|
||||
cudaFree(d_final_results);
|
||||
}
|
||||
cudaFreeAsync(buckets, stream);
|
||||
cudaFreeAsync(bucket_indices, stream);
|
||||
cudaFreeAsync(point_indices, stream);
|
||||
cudaFreeAsync(sorted_bucket_indices, stream);
|
||||
cudaFreeAsync(sorted_point_indices, stream);
|
||||
cudaFreeAsync(single_bucket_indices, stream);
|
||||
cudaFreeAsync(bucket_sizes, stream);
|
||||
cudaFreeAsync(total_nof_buckets_to_compute, stream);
|
||||
cudaFreeAsync(bucket_offsets, stream);
|
||||
cudaFreeAsync(bm_sums, stream);
|
||||
cudaFree(buckets);
|
||||
cudaFree(bucket_indices);
|
||||
cudaFree(point_indices);
|
||||
cudaFree(sorted_bucket_indices);
|
||||
cudaFree(sorted_point_indices);
|
||||
cudaFree(single_bucket_indices);
|
||||
cudaFree(bucket_sizes);
|
||||
cudaFree(total_nof_buckets_to_compute);
|
||||
cudaFree(bucket_offsets);
|
||||
cudaFree(bm_sums);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -465,44 +456,45 @@ __global__ void to_proj_kernel(A* affine_points, P* proj_points, unsigned N){
|
||||
|
||||
//the function computes msm using ssm
|
||||
template <typename S, typename P, typename A>
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream){ //works up to 2^8
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device){ //works up to 2^8
|
||||
|
||||
S *scalars;
|
||||
A *a_points;
|
||||
P *p_points;
|
||||
P *results;
|
||||
|
||||
cudaMallocAsync(&scalars, sizeof(S) * size, stream);
|
||||
cudaMallocAsync(&a_points, sizeof(A) * size, stream);
|
||||
cudaMallocAsync(&p_points, sizeof(P) * size, stream);
|
||||
cudaMallocAsync(&results, sizeof(P) * size, stream);
|
||||
cudaMalloc(&scalars, sizeof(S) * size);
|
||||
cudaMalloc(&a_points, sizeof(A) * size);
|
||||
cudaMalloc(&p_points, sizeof(P) * size);
|
||||
cudaMalloc(&results, sizeof(P) * size);
|
||||
|
||||
//copy inputs to device
|
||||
cudaMemcpyAsync(scalars, h_scalars, sizeof(S) * size, cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(a_points, h_points, sizeof(A) * size, cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpy(scalars, h_scalars, sizeof(S) * size, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(a_points, h_points, sizeof(A) * size, cudaMemcpyHostToDevice);
|
||||
|
||||
//convert to projective representation and multiply each point by its scalar using single scalar multiplication
|
||||
unsigned NUM_THREADS = size;
|
||||
to_proj_kernel<<<1,NUM_THREADS, 0, stream>>>(a_points, p_points, size);
|
||||
ssm_kernel<<<1,NUM_THREADS, 0, stream>>>(scalars, p_points, results, size);
|
||||
to_proj_kernel<<<1,NUM_THREADS>>>(a_points, p_points, size);
|
||||
ssm_kernel<<<1,NUM_THREADS>>>(scalars, p_points, results, size);
|
||||
|
||||
P *final_result;
|
||||
cudaMallocAsync(&final_result, sizeof(P), stream);
|
||||
cudaMalloc(&final_result, sizeof(P));
|
||||
|
||||
//assuming msm size is a power of 2
|
||||
//sum all the ssm results
|
||||
NUM_THREADS = size;
|
||||
sum_reduction_kernel<<<1,NUM_THREADS, 0, stream>>>(results, final_result);
|
||||
sum_reduction_kernel<<<1,NUM_THREADS>>>(results, final_result);
|
||||
|
||||
//copy result to host
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaMemcpyAsync(h_final_result, final_result, sizeof(P), cudaMemcpyDeviceToHost, stream);
|
||||
cudaDeviceSynchronize();
|
||||
cudaMemcpy(h_final_result, final_result, sizeof(P), cudaMemcpyDeviceToHost);
|
||||
|
||||
//free memory
|
||||
cudaFreeAsync(scalars, stream);
|
||||
cudaFreeAsync(a_points, stream);
|
||||
cudaFreeAsync(p_points, stream);
|
||||
cudaFreeAsync(results, stream);
|
||||
cudaFreeAsync(final_result, stream);
|
||||
cudaFree(scalars);
|
||||
cudaFree(a_points);
|
||||
cudaFree(p_points);
|
||||
cudaFree(results);
|
||||
cudaFree(final_result);
|
||||
|
||||
}
|
||||
|
||||
@@ -510,12 +502,12 @@ void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cuda
|
||||
template <typename A, typename S, typename P>
|
||||
void reference_msm(S* scalars, A* a_points, unsigned size){
|
||||
|
||||
P *points = new P[size];
|
||||
// P points[size];
|
||||
P points[size];
|
||||
for (unsigned i = 0; i < size ; i++)
|
||||
{
|
||||
points[i] = P::from_affine(a_points[i]);
|
||||
}
|
||||
|
||||
|
||||
P res = P::zero();
|
||||
|
||||
@@ -530,29 +522,110 @@ void reference_msm(S* scalars, A* a_points, unsigned size){
|
||||
}
|
||||
|
||||
unsigned get_optimal_c(const unsigned size) {
|
||||
if (size < 17)
|
||||
return 1;
|
||||
// return 15;
|
||||
return ceil(log2(size))-4;
|
||||
return 10;
|
||||
}
|
||||
|
||||
//this function is used to compute msms of size larger than 256
|
||||
template <typename S, typename P, typename A>
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream){
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device){
|
||||
unsigned c = get_optimal_c(size);
|
||||
// unsigned c = 6;
|
||||
// unsigned bitsize = 32;
|
||||
unsigned bitsize = 255;
|
||||
bucket_method_msm(bitsize, c, scalars, points, size, result, on_device, stream);
|
||||
bucket_method_msm(bitsize, c, scalars, points, size, result, on_device);
|
||||
}
|
||||
|
||||
// this function is used to compute a batches of msms of size larger than 256
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream){
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device){
|
||||
unsigned c = get_optimal_c(msm_size);
|
||||
// unsigned c = 6;
|
||||
// unsigned bitsize = 32;
|
||||
unsigned bitsize = 255;
|
||||
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device, stream);
|
||||
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device);
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C"
|
||||
int msm_cuda(projective_t *out, affine_t points[],
|
||||
scalar_t scalars[], size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (count>256){
|
||||
large_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false);
|
||||
}
|
||||
else{
|
||||
short_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false);
|
||||
}
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
|
||||
scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm<scalar_t, projective_t, affine_t>(scalars, points, batch_size, msm_size, out, false);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,22 @@
|
||||
#ifndef MSM_H
|
||||
#define MSM_H
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/affine.cuh"
|
||||
#include "../../curves/curve_config.cuh"
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream);
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream);
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);
|
||||
|
||||
template <typename A, typename S, typename P>
|
||||
void reference_msm(S* scalars, A* a_points, unsigned size);
|
||||
#endif
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device);
|
||||
463
icicle/appUtils/ntt/lde.cu
Normal file
463
icicle/appUtils/ntt/lde.cu
Normal file
@@ -0,0 +1,463 @@
|
||||
#include <cuda.h>
|
||||
#include "ntt.cuh"
|
||||
#include "../vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "lde.cuh"
|
||||
|
||||
|
||||
/**
|
||||
* Interpolate a batch of polynomials from their evaluations on the same subgroup.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
|
||||
* @param d_out The variable to write coefficients of the resulting polynomials into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_evaluations Input array of evaluations of all polynomials of type E (elements).
|
||||
* @param d_domain Domain on which the polynomials are evaluated. Must be a subgroup.
|
||||
* @param n Length of `d_domain` array, also equal to the number of evaluations of each polynomial.
|
||||
* @param batch_size The size of the batch; the length of `d_evaluations` is `n` * `batch_size`.
|
||||
*/
|
||||
template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size) {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaMemcpy(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice);
|
||||
|
||||
int NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
|
||||
int NUM_BLOCKS = batch_size * max(int((n / 2) / NUM_THREADS), 1);
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
|
||||
}
|
||||
|
||||
NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, scalar_t::inv_log_size(logn));
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interpolate a polynomial from its evaluations on a subgroup.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
|
||||
* @param d_out The variable to write coefficients of the resulting polynomial into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_evaluations Input array of evaluations that have type E (elements).
|
||||
* @param d_domain Domain on which the polynomial is evaluated. Must be a subgroup.
|
||||
* @param n Length of `d_evaluations` and the size `d_domain` arrays (they should have equal length).
|
||||
*/
|
||||
template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n) {
|
||||
return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1);
|
||||
}
|
||||
|
||||
template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
|
||||
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid < n) {
|
||||
arr[tid] = val;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate a batch of polynomials on the same coset.
|
||||
* @param d_out The evaluations of the polynomials on coset `u` * `d_domain`.
|
||||
* @param d_coefficients Input array of coefficients of all polynomials of type E (elements) to be evaluated in-place on a coset.
|
||||
* @param d_domain Domain on which the polynomials are evaluated (see `coset` flag). Must be a subgroup.
|
||||
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
|
||||
* @param n The number of coefficients, which might be different from `domain_size`.
|
||||
* @param batch_size The size of the batch; the length of `d_coefficients` is `n` * `batch_size`.
|
||||
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers) {
|
||||
uint32_t logn = uint32_t(log(domain_size) / log(2));
|
||||
if (domain_size > n) {
|
||||
// allocate and initialize an array of stream handles to parallelize data copying across batches
|
||||
cudaStream_t *memcpy_streams = (cudaStream_t *) malloc(batch_size * sizeof(cudaStream_t));
|
||||
for (int i = 0; i < batch_size; i++)
|
||||
{
|
||||
cudaStreamCreate(&(memcpy_streams[i]));
|
||||
|
||||
cudaMemcpyAsync(&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
|
||||
int NUM_THREADS = MAX_THREADS_BATCH;
|
||||
int NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
fill_array <E> <<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>> (&d_out[i * domain_size + n], E::zero(), domain_size - n);
|
||||
|
||||
cudaStreamSynchronize(memcpy_streams[i]);
|
||||
cudaStreamDestroy(memcpy_streams[i]);
|
||||
}
|
||||
} else
|
||||
cudaMemcpy(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice);
|
||||
|
||||
if (coset)
|
||||
batch_vector_mult(coset_powers, d_out, domain_size, batch_size);
|
||||
|
||||
int NUM_THREADS = min(domain_size / 2, MAX_THREADS_BATCH);
|
||||
int chunks = max(int((domain_size / 2) / NUM_THREADS), 1);
|
||||
int NUM_BLOCKS = batch_size * chunks;
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate a polynomial on a coset.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs, so the order of outputs is bit-reversed.
|
||||
* @param d_out The evaluations of the polynomial on coset `u` * `d_domain`.
|
||||
* @param d_coefficients Input array of coefficients of a polynomial of type E (elements).
|
||||
* @param d_domain Domain on which the polynomial is evaluated (see `coset` flag). Must be a subgroup.
|
||||
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
|
||||
* @param n The number of coefficients, which might be different from `domain_size`.
|
||||
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers) {
|
||||
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers);
|
||||
}
|
||||
|
||||
int interpolate_scalars(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n) {
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
|
||||
int interpolate_scalars_batch(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size) {
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
|
||||
int interpolate_points(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n) {
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
|
||||
int interpolate_points_batch(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size) {
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
|
||||
int evaluate_scalars(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n) {
|
||||
scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_scalars_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
|
||||
scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_points(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n) {
|
||||
scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_points_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size) {
|
||||
scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_scalars_on_coset(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers) {
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
|
||||
int evaluate_scalars_on_coset_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers) {
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
|
||||
int evaluate_points_on_coset(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers) {
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
|
||||
int evaluate_points_on_coset_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers) {
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
|
||||
extern "C" scalar_t* build_domain_cuda(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega(logn));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda(scalar_t* d_out, scalar_t *d_evaluations, scalar_t *d_domain, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_scalars(d_out, d_evaluations, d_domain, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_scalars_batch(d_out, d_evaluations, d_domain, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda(projective_t* d_out, projective_t *d_evaluations, scalar_t *d_domain, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_points(d_out, d_evaluations, d_domain, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_points_batch(d_out, d_evaluations, d_domain, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars(d_out, d_coefficients, d_domain, domain_size, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points(d_out, d_coefficients, d_domain, domain_size, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars_on_coset(d_out, d_coefficients, d_domain, domain_size, n, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars_on_coset_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points_on_coset(d_out, d_coefficients, d_domain, domain_size, n, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points_on_coset_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda(scalar_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda(scalar_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda(projective_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda(projective_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
31
icicle/appUtils/ntt/lde.cuh
Normal file
31
icicle/appUtils/ntt/lde.cuh
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
|
||||
|
||||
int interpolate_scalars(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n);
|
||||
|
||||
int interpolate_scalars_batch(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size);
|
||||
|
||||
int interpolate_points(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n);
|
||||
|
||||
int interpolate_points_batch(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size);
|
||||
|
||||
int evaluate_scalars(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n);
|
||||
|
||||
int evaluate_scalars_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);
|
||||
|
||||
int evaluate_points(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n);
|
||||
|
||||
int evaluate_points_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size);
|
||||
|
||||
int evaluate_scalars_on_coset(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers);
|
||||
|
||||
int evaluate_scalars_on_coset_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers);
|
||||
|
||||
int evaluate_points_on_coset(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers);
|
||||
|
||||
int evaluate_points_on_coset_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers);
|
||||
54
icicle/appUtils/ntt/ntt.cu
Normal file
54
icicle/appUtils/ntt/ntt.cu
Normal file
@@ -0,0 +1,54 @@
|
||||
#include <cuda.h>
|
||||
#include "ntt.cuh"
|
||||
|
||||
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -1,10 +1,22 @@
|
||||
#ifndef NTT
|
||||
#define NTT
|
||||
#pragma once
|
||||
#include "../../curves/curve_config.cuh"
|
||||
|
||||
const uint32_t MAX_NUM_THREADS = 1024;
|
||||
const uint32_t MAX_THREADS_BATCH = 256;
|
||||
|
||||
/**
|
||||
* Copy twiddle factors array to device (returns a pointer to the device allocated array).
|
||||
* @param twiddles input empty array.
|
||||
* @param n_twiddles length of twiddle factors.
|
||||
*/
|
||||
scalar_t * copy_twiddle_factors_to_device(scalar_t * twiddles, uint32_t n_twiddles) {
|
||||
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
|
||||
scalar_t * d_twiddles;
|
||||
cudaMalloc( & d_twiddles, size_twiddles);
|
||||
cudaMemcpy(d_twiddles, twiddles, size_twiddles, cudaMemcpyHostToDevice);
|
||||
return d_twiddles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the twiddle factors.
|
||||
* Outputs: d_twiddles[i] = omega^i.
|
||||
@@ -12,11 +24,8 @@ const uint32_t MAX_THREADS_BATCH = 256;
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
*/
|
||||
template < typename S > __global__ void twiddle_factors_kernel(S * d_twiddles, uint32_t n_twiddles, S omega) {
|
||||
for (uint32_t i = 0; i < n_twiddles; i++) {
|
||||
d_twiddles[i] = S::zero();
|
||||
}
|
||||
d_twiddles[0] = S::one();
|
||||
__global__ void twiddle_factors_kernel(scalar_t * d_twiddles, uint32_t n_twiddles, scalar_t omega) {
|
||||
d_twiddles[0] = scalar_t::one();
|
||||
for (uint32_t i = 0; i < n_twiddles - 1; i++) {
|
||||
d_twiddles[i + 1] = omega * d_twiddles[i];
|
||||
}
|
||||
@@ -28,12 +37,11 @@ const uint32_t MAX_THREADS_BATCH = 256;
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
*/
|
||||
template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega, cudaStream_t stream) {
|
||||
size_t size_twiddles = n_twiddles * sizeof(S);
|
||||
S * d_twiddles;
|
||||
cudaMallocAsync(& d_twiddles, size_twiddles, stream);
|
||||
twiddle_factors_kernel<S> <<< 1, 1, 0, stream>>> (d_twiddles, n_twiddles, omega);
|
||||
cudaStreamSynchronize(stream);
|
||||
scalar_t * fill_twiddle_factors_array(uint32_t n_twiddles, scalar_t omega) {
|
||||
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
|
||||
scalar_t * d_twiddles;
|
||||
cudaMalloc( & d_twiddles, size_twiddles);
|
||||
twiddle_factors_kernel <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
|
||||
return d_twiddles;
|
||||
}
|
||||
|
||||
@@ -47,7 +55,7 @@ const uint32_t MAX_THREADS_BATCH = 256;
|
||||
*/
|
||||
__device__ __host__ uint32_t reverseBits(uint32_t num, uint32_t logn) {
|
||||
unsigned int reverse_num = 0;
|
||||
for (uint32_t i = 0; i < logn; i++) {
|
||||
for (int i = 0; i < logn; i++) {
|
||||
if ((num & (1 << i))) reverse_num |= 1 << ((logn - 1) - i);
|
||||
}
|
||||
return reverse_num;
|
||||
@@ -90,14 +98,14 @@ template < typename T > __global__ void reverse_order_kernel(T* arr, T* arr_reve
|
||||
* @param logn log(n).
|
||||
* @param batch_size the size of the batch.
|
||||
*/
|
||||
template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size, cudaStream_t stream) {
|
||||
template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size) {
|
||||
T* arr_reversed;
|
||||
cudaMallocAsync(&arr_reversed, n * batch_size * sizeof(T), stream);
|
||||
cudaMalloc(&arr_reversed, n * batch_size * sizeof(T));
|
||||
int number_of_threads = MAX_THREADS_BATCH;
|
||||
int number_of_blocks = (n * batch_size + number_of_threads - 1) / number_of_threads;
|
||||
reverse_order_kernel <<<number_of_blocks, number_of_threads, 0, stream>>> (arr, arr_reversed, n, logn, batch_size);
|
||||
cudaMemcpyAsync(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice, stream);
|
||||
cudaFreeAsync(arr_reversed, stream);
|
||||
reverse_order_kernel <<<number_of_blocks, number_of_threads>>> (arr, arr_reversed, n, logn, batch_size);
|
||||
cudaMemcpy(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice);
|
||||
cudaFree(arr_reversed);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -108,8 +116,8 @@ template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t lo
|
||||
* @param n length of `arr`.
|
||||
* @param logn log(n).
|
||||
*/
|
||||
template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn, cudaStream_t stream) {
|
||||
reverse_order_batch(arr, n, logn, 1, stream);
|
||||
template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn) {
|
||||
reverse_order_batch(arr, n, logn, 1);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -156,15 +164,14 @@ template < typename E, typename S > __global__ void template_normalize_kernel(E
|
||||
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
|
||||
* @param n_twiddles length of d_twiddles.
|
||||
*/
|
||||
template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles, cudaStream_t stream) {
|
||||
template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles) {
|
||||
uint32_t m = 2;
|
||||
// TODO: optimize with separate streams for each iteration
|
||||
for (uint32_t s = 0; s < logn; s++) {
|
||||
for (uint32_t i = 0; i < n; i += m) {
|
||||
uint32_t shifted_m = m >> 1;
|
||||
uint32_t number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
|
||||
uint32_t number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
|
||||
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks, 0, stream >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
|
||||
int shifted_m = m >> 1;
|
||||
int number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
|
||||
int number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
|
||||
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
|
||||
}
|
||||
m <<= 1;
|
||||
}
|
||||
@@ -179,47 +186,96 @@ template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr
|
||||
* @param n_twiddles length of d_twiddles.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_twiddles, uint32_t n_twiddles, bool inverse, cudaStream_t stream) {
|
||||
template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_twiddles, uint32_t n_twiddles, bool inverse) {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
size_t size_E = n * sizeof(E);
|
||||
E * arrReversed = template_reverse_order < E > (arr, n, logn);
|
||||
E * d_arrReversed;
|
||||
cudaMallocAsync( & d_arrReversed, size_E, stream);
|
||||
cudaMemcpyAsync(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice, stream);
|
||||
template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles, stream);
|
||||
cudaMalloc( & d_arrReversed, size_E);
|
||||
cudaMemcpy(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice);
|
||||
template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles);
|
||||
if (inverse) {
|
||||
int NUM_THREADS = MAX_NUM_THREADS;
|
||||
int NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream >>> (d_arrReversed, n, S::inv_log_size(logn));
|
||||
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arrReversed, n, S::inv_log_size(logn));
|
||||
}
|
||||
cudaMemcpyAsync(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost, stream);
|
||||
cudaFreeAsync(d_arrReversed, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaMemcpy(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost);
|
||||
cudaFree(d_arrReversed);
|
||||
return arrReversed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey Elliptic Curve NTT.
|
||||
* NOTE! this function assumes that d_twiddles are located in the device memory.
|
||||
* @param arr input array of type projective_t.
|
||||
* @param n length of d_arr.
|
||||
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
|
||||
* @param n_twiddles length of d_twiddles.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
projective_t * ecntt(projective_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
|
||||
return ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* @param arr input array of type E (element).
|
||||
* NOTE! this function assumes that d_twiddles are located in the device memory.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param n length of d_arr.
|
||||
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
|
||||
* @param n_twiddles length of d_twiddles.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
scalar_t * ntt(scalar_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
|
||||
return ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param n length of d_arr.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse, cudaStream_t stream) {
|
||||
extern "C" uint32_t ntt_end2end(scalar_t * arr, uint32_t n, bool inverse) {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n;
|
||||
S * twiddles = new S[n_twiddles];
|
||||
S * d_twiddles;
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
|
||||
scalar_t * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
|
||||
} else{
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
}
|
||||
E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse, stream);
|
||||
scalar_t * result = ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
for(int i = 0; i < n; i++){
|
||||
arr[i] = result[i];
|
||||
}
|
||||
cudaFreeAsync(d_twiddles, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaFree(d_twiddles);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* @param arr input array of type projective_t.
|
||||
* @param n length of d_arr.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
extern "C" uint32_t ecntt_end2end(projective_t * arr, uint32_t n, bool inverse) {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n;
|
||||
scalar_t * twiddles = new scalar_t[n_twiddles];
|
||||
scalar_t * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
} else{
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
}
|
||||
projective_t * result = ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
for(int i = 0; i < n; i++){
|
||||
arr[i] = result[i];
|
||||
}
|
||||
cudaFree(d_twiddles);
|
||||
return 0; // TODO add
|
||||
}
|
||||
|
||||
@@ -233,14 +289,14 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
|
||||
* @param logn log(n).
|
||||
* @param task log(n).
|
||||
*/
|
||||
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
|
||||
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
uint32_t reversed = reverseBits(i, logn);
|
||||
if (reversed > i) {
|
||||
T tmp = arr[task * n + i];
|
||||
arr[task * n + i] = arr[task * n + reversed];
|
||||
arr[task * n + reversed] = tmp;
|
||||
}
|
||||
uint32_t reversed = reverseBits(i, logn);
|
||||
if (reversed > i) {
|
||||
T tmp = arr[task * n + i];
|
||||
arr[task * n + i] = arr[task * n + reversed];
|
||||
arr[task * n + reversed] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -331,55 +387,101 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//TODO: batch ntt and ecntt can be unified into batch_template
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* This is a bached version - meaning it assumes than the input array
|
||||
* consists of N arrays of size n. The function performs n-size NTT on each small array.
|
||||
* @param arr input array of type BLS12_381::scalar_t.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param arr_size number of total elements = n * N.
|
||||
* @param n size of batch.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse, cudaStream_t stream) {
|
||||
extern "C" uint32_t ntt_end2end_batch(scalar_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
|
||||
int batches = int(arr_size / n);
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
|
||||
size_t size_E = arr_size * sizeof(E);
|
||||
S * d_twiddles;
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
|
||||
size_t size_E = arr_size * sizeof(scalar_t);
|
||||
scalar_t * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
} else{
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
}
|
||||
E * d_arr;
|
||||
cudaMallocAsync( & d_arr, size_E, stream);
|
||||
cudaMemcpyAsync(d_arr, arr, size_E, cudaMemcpyHostToDevice, stream);
|
||||
scalar_t * d_arr;
|
||||
cudaMalloc( & d_arr, size_E);
|
||||
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
|
||||
int NUM_THREADS = MAX_THREADS_BATCH;
|
||||
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
|
||||
ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, logn, batches);
|
||||
ntt_template_kernel_rev_ord<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
|
||||
|
||||
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
|
||||
int chunks = max(int((n / 2) / NUM_THREADS), 1);
|
||||
int total_tasks = batches * chunks;
|
||||
NUM_BLOCKS = total_tasks;
|
||||
|
||||
//TODO: this loop also can be unrolled
|
||||
for (uint32_t s = 0; s < logn; s++)
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
|
||||
cudaStreamSynchronize(stream);
|
||||
ntt_template_kernel<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
|
||||
}
|
||||
|
||||
if (inverse == true)
|
||||
{
|
||||
NUM_THREADS = MAX_NUM_THREADS;
|
||||
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel < scalar_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, scalar_t::inv_log_size(logn));
|
||||
}
|
||||
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
|
||||
cudaFree(d_arr);
|
||||
cudaFree(d_twiddles);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* This is a bached version - meaning it assumes than the input array
|
||||
* consists of N arrays of size n. The function performs n-size NTT on each small array.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param arr_size number of total elements = n * N.
|
||||
* @param n size of batch.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
extern "C" uint32_t ecntt_end2end_batch(projective_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
|
||||
int batches = int(arr_size / n);
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
|
||||
size_t size_E = arr_size * sizeof(projective_t);
|
||||
scalar_t * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
} else{
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
}
|
||||
projective_t * d_arr;
|
||||
cudaMalloc( & d_arr, size_E);
|
||||
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
|
||||
int NUM_THREADS = MAX_THREADS_BATCH;
|
||||
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
|
||||
ntt_template_kernel_rev_ord<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
|
||||
|
||||
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
|
||||
int chunks = max(int((n / 2) / NUM_THREADS), 1);
|
||||
int total_tasks = batches * chunks;
|
||||
NUM_BLOCKS = total_tasks;
|
||||
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
|
||||
}
|
||||
if (inverse == true)
|
||||
{
|
||||
NUM_THREADS = MAX_NUM_THREADS;
|
||||
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream>>> (d_arr, arr_size, S::inv_log_size(logn));
|
||||
template_normalize_kernel < projective_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, scalar_t::inv_log_size(logn));
|
||||
}
|
||||
cudaMemcpyAsync(arr, d_arr, size_E, cudaMemcpyDeviceToHost, stream);
|
||||
cudaFreeAsync(d_arr, stream);
|
||||
cudaFreeAsync(d_twiddles, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
|
||||
cudaFree(d_arr);
|
||||
cudaFree(d_twiddles);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
@@ -1,26 +1,21 @@
|
||||
#ifndef _VEC_MULT
|
||||
#define _VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../primitives/field.cuh"
|
||||
#include "../utils/storage.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../curves/curve_config.cuh"
|
||||
#include "ve_mod_mult.cuh"
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point(projective_t *inout,
|
||||
scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
size_t device_id)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<projective_t, scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
vector_mod_mult<projective_t, scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -33,15 +28,12 @@ extern "C" int32_t vec_mod_mult_point(projective_t *inout,
|
||||
extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
|
||||
scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
size_t device_id)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<scalar_t, scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
vector_mod_mult<scalar_t, scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -55,15 +47,12 @@ extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
|
||||
scalar_t *input,
|
||||
scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
size_t device_id)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
matrix_mod_mult<scalar_t>(matrix_flattened, input, output, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -72,4 +61,3 @@ extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,5 +1,3 @@
|
||||
#ifndef VEC_MULT
|
||||
#define VEC_MULT
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
@@ -19,7 +17,7 @@ __global__ void vectorModMult(S *scalar_vec, E *element_vec, E *result, size_t n
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments, cudaStream_t stream) // TODO: in place so no need for third result vector
|
||||
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments) // TODO: in place so no need for third result vector
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
|
||||
@@ -28,24 +26,23 @@ int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments, cudaStream_
|
||||
// Allocate memory on the device for the input vectors, the output vector, and the modulus
|
||||
S *d_vec_a;
|
||||
E *d_vec_b, *d_result;
|
||||
cudaMallocAsync(&d_vec_a, n_elments * sizeof(S), stream);
|
||||
cudaMallocAsync(&d_vec_b, n_elments * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_result, n_elments * sizeof(E), stream);
|
||||
cudaMalloc(&d_vec_a, n_elments * sizeof(S));
|
||||
cudaMalloc(&d_vec_b, n_elments * sizeof(E));
|
||||
cudaMalloc(&d_result, n_elments * sizeof(E));
|
||||
|
||||
// Copy the input vectors and the modulus from the host to the device
|
||||
cudaMemcpyAsync(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpy(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice);
|
||||
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
vectorModMult<<<num_blocks, threads_per_block, 0, stream>>>(d_vec_a, d_vec_b, d_result, n_elments);
|
||||
vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);
|
||||
|
||||
cudaMemcpyAsync(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpy(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFreeAsync(d_vec_a, stream);
|
||||
cudaFreeAsync(d_vec_b, stream);
|
||||
cudaFreeAsync(d_result, stream);
|
||||
cudaFree(d_vec_a);
|
||||
cudaFree(d_vec_b);
|
||||
cudaFree(d_result);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -61,12 +58,12 @@ __global__ void batchVectorMult(S *scalar_vec, E *element_vec, unsigned n_scalar
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size, cudaStream_t stream)
|
||||
int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size)
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
|
||||
int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(scalar_vec, element_vec, n_scalars, batch_size);
|
||||
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS>>>(scalar_vec, element_vec, n_scalars, batch_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -84,7 +81,7 @@ __global__ void matrixVectorMult(E *matrix_elements, E *vector_elements, E *resu
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim, cudaStream_t stream)
|
||||
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
|
||||
@@ -92,24 +89,22 @@ int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t di
|
||||
|
||||
// Allocate memory on the device for the input vectors, the output vector, and the modulus
|
||||
E *d_matrix, *d_vector, *d_result;
|
||||
cudaMallocAsync(&d_matrix, (dim * dim) * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_vector, dim * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_result, dim * sizeof(E), stream);
|
||||
cudaMalloc(&d_matrix, (dim * dim) * sizeof(E));
|
||||
cudaMalloc(&d_vector, dim * sizeof(E));
|
||||
cudaMalloc(&d_result, dim * sizeof(E));
|
||||
|
||||
// Copy the input vectors and the modulus from the host to the device
|
||||
cudaMemcpyAsync(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpy(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice);
|
||||
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
matrixVectorMult<<<num_blocks, threads_per_block, 0, stream>>>(d_matrix, d_vector, d_result, dim);
|
||||
matrixVectorMult<<<num_blocks, threads_per_block>>>(d_matrix, d_vector, d_result, dim);
|
||||
|
||||
cudaMemcpyAsync(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpy(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost);
|
||||
|
||||
cudaFreeAsync(d_matrix, stream);
|
||||
cudaFreeAsync(d_vector, stream);
|
||||
cudaFreeAsync(d_result, stream);
|
||||
cudaFree(d_matrix);
|
||||
cudaFree(d_vector);
|
||||
cudaFree(d_result);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
176
icicle/curves/bls12_381.cuh
Normal file
176
icicle/curves/bls12_381.cuh
Normal file
@@ -0,0 +1,176 @@
|
||||
#pragma once
|
||||
|
||||
#include "../utils/storage.cuh"
|
||||
|
||||
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr unsigned modulus_bits_count = 255;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
|
||||
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
|
||||
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
|
||||
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
|
||||
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
|
||||
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
|
||||
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
|
||||
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
|
||||
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
|
||||
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
|
||||
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
|
||||
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
|
||||
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
|
||||
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
|
||||
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
|
||||
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
|
||||
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
|
||||
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
|
||||
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
|
||||
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
|
||||
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
|
||||
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
|
||||
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
|
||||
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
|
||||
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
|
||||
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
|
||||
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
|
||||
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
|
||||
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
|
||||
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
|
||||
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
|
||||
|
||||
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
|
||||
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
|
||||
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
|
||||
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
|
||||
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
|
||||
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
|
||||
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
|
||||
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
|
||||
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
|
||||
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
|
||||
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
|
||||
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
|
||||
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
|
||||
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
|
||||
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
|
||||
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
|
||||
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
|
||||
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
|
||||
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
|
||||
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
|
||||
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
|
||||
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
|
||||
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
|
||||
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
|
||||
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
|
||||
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
|
||||
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
|
||||
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
|
||||
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
|
||||
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
|
||||
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr unsigned modulus_bits_count = 381;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
};
|
||||
|
||||
struct group_generator {
|
||||
static constexpr storage<fq_config::limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
|
||||
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<fq_config::limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
|
||||
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
};
|
||||
|
||||
static constexpr unsigned weierstrass_b = 4;
|
||||
14
icicle/curves/curve_config.cuh
Normal file
14
icicle/curves/curve_config.cuh
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include "../primitives/field.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
|
||||
#include "bls12_381.cuh"
|
||||
// #include "bn254.cuh"
|
||||
|
||||
|
||||
typedef Field<fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<fq_config> point_field_t;
|
||||
typedef Projective<point_field_t, scalar_field_t, group_generator, weierstrass_b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
86
icicle/curves/curve_template.cuh
Normal file
86
icicle/curves/curve_template.cuh
Normal file
@@ -0,0 +1,86 @@
|
||||
#pragma once
|
||||
|
||||
#include "../utils/storage.cuh"
|
||||
|
||||
|
||||
// y^2 = weierstrass_a * x^3 + weierstrass_b
|
||||
static constexpr unsigned weierstrass_b = 4;
|
||||
|
||||
// a generator of the elliptic curve group
|
||||
struct group_generator {
|
||||
static constexpr storage<fq_config::limbs_count> generator_x = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> generator_y = {0x4abe706c, 0x5ea93e35, 0x00e1de5d, 0x6346b8ed, 0x92848344, 0xda9dd85e,
|
||||
0xc9926b26, 0xc760f988, 0xf3763e9b, 0xb33cffc3, 0xd40d6212, 0x0a989bad};
|
||||
};
|
||||
|
||||
/// SCALAR FIELD
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8; // array size of 32bit int fo form a field element
|
||||
static constexpr unsigned modulus_bits_count = 255; // field bit size
|
||||
// field modulus split into array, ordered in Little-Endian
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513 -> 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// Scalar specific
|
||||
static constexpr storage<limbs_count> omega = {0xa5d36306, 0xe206da11, 0x378fbf96, 0x0ad1347b, 0xe0f8245f, 0xfc3e8acf, 0xa0f704f4, 0x564c0a11};
|
||||
static constexpr storage<limbs_count> omega_inv = {3629396834, 2518295853, 1679307267, 1346818424, 3118225798, 1256349690, 3322524792, 958081110};
|
||||
static constexpr storage<limbs_count> inv_2 = {2147483649,2147483647,2147429887,2849952257,80800770,429714436,2496577188,972477353};
|
||||
static constexpr storage<limbs_count> inv_4 = {1073741825,1073741823,1073661183,4274928386,121201155,644571654,1597382134,1458716030};
|
||||
static constexpr storage<limbs_count> inv_256 = {16777217,16777215,4244528547,1315563102,26752557,3943079472,3597918154,1937357227};
|
||||
static constexpr storage<limbs_count> inv_512 = {8388609,8388607,4269694161,1360250160,94177049,2401254172,2148052617,1941155967};
|
||||
static constexpr storage<limbs_count> inv_4096 = {1048577,1074790399,3217972249,3546834984,2300657127,1589027946,3026903920,1944479864};
|
||||
};
|
||||
|
||||
|
||||
/// BASE FIELD
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
static constexpr unsigned limbs_count = 12; // array size of 32bit int fo form a field element
|
||||
static constexpr unsigned modulus_bits_count = 381; // field bit size
|
||||
// field modulus split into array, ordered in Little-Endian
|
||||
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787 -> 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
};
|
||||
@@ -5,9 +5,6 @@
|
||||
#include "../utils/host_math.cuh"
|
||||
#include <random>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#define HOST_INLINE __host__ __forceinline__
|
||||
#define DEVICE_INLINE __device__ __forceinline__
|
||||
@@ -26,23 +23,6 @@ template <class CONFIG> class Field {
|
||||
return Field { CONFIG::one };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field from(uint32_t value) {
|
||||
storage<TLC> scalar;
|
||||
scalar.limbs[0] = value;
|
||||
for (int i = 1; i < TLC; i++) {
|
||||
scalar.limbs[i] = 0;
|
||||
}
|
||||
return Field { scalar };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field generator_x() {
|
||||
return Field { CONFIG::generator_x };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field generator_y() {
|
||||
return Field { CONFIG::generator_y };
|
||||
}
|
||||
|
||||
static constexpr HOST_INLINE Field omega(uint32_t log_size) {
|
||||
// Quick fix to linking issue, permanent fix will follow
|
||||
switch (log_size) {
|
||||
@@ -113,7 +93,6 @@ template <class CONFIG> class Field {
|
||||
case 32:
|
||||
return Field { CONFIG::omega32 };
|
||||
}
|
||||
return Field { CONFIG::one };
|
||||
// return Field { CONFIG::omega[log_size-1] };
|
||||
}
|
||||
|
||||
@@ -187,7 +166,6 @@ template <class CONFIG> class Field {
|
||||
case 32:
|
||||
return Field { CONFIG::omega_inv32 };
|
||||
}
|
||||
return Field { CONFIG::one };
|
||||
// return Field { CONFIG::omega_inv[log_size-1] };
|
||||
}
|
||||
|
||||
@@ -259,7 +237,6 @@ template <class CONFIG> class Field {
|
||||
case 32:
|
||||
return Field { CONFIG::inv32 };
|
||||
}
|
||||
return Field { CONFIG::one };
|
||||
// return Field { CONFIG::inv[log_size-1] };
|
||||
}
|
||||
|
||||
@@ -267,13 +244,14 @@ template <class CONFIG> class Field {
|
||||
return Field { CONFIG::modulus };
|
||||
}
|
||||
|
||||
|
||||
// private:
|
||||
typedef storage<TLC> ff_storage;
|
||||
typedef storage<2*TLC> ff_wide_storage;
|
||||
|
||||
static constexpr unsigned slack_bits = 32 * TLC - NBITS;
|
||||
|
||||
struct Wide {
|
||||
struct wide {
|
||||
ff_wide_storage limbs_storage;
|
||||
|
||||
Field HOST_DEVICE_INLINE get_lower() {
|
||||
@@ -302,15 +280,15 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
};
|
||||
|
||||
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys) {
|
||||
Wide rs = {};
|
||||
friend HOST_DEVICE_INLINE wide operator+(wide xs, const wide& ys) {
|
||||
wide rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
// an incomplete impl that assumes that xs > ys
|
||||
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys) {
|
||||
Wide rs = {};
|
||||
friend HOST_DEVICE_INLINE wide operator-(wide xs, const wide& ys) {
|
||||
wide rs = {};
|
||||
sub_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
@@ -359,9 +337,7 @@ template <class CONFIG> class Field {
|
||||
const uint32_t *y = ys.limbs;
|
||||
uint32_t *r = rs.limbs;
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (unsigned i = 1; i < (CARRY_OUT ? TLC : TLC - 1); i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
@@ -377,9 +353,7 @@ template <class CONFIG> class Field {
|
||||
const uint32_t *y = ys.limbs;
|
||||
uint32_t *r = rs.limbs;
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (unsigned i = 1; i < (CARRY_OUT ? 2 * TLC : 2 * TLC - 1); i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
@@ -413,6 +387,14 @@ template <class CONFIG> class Field {
|
||||
return CARRY_OUT ? carry : 0;
|
||||
}
|
||||
|
||||
static constexpr HOST_INLINE uint32_t sub_limbs_partial_host(uint32_t* x, uint32_t* y, uint32_t* r, uint32_t num_limbs) {
|
||||
uint32_t carry = 0;
|
||||
host_math::carry_chain<2 * TLC, false, true> chain;
|
||||
for (unsigned i = 0; i < num_limbs; i++)
|
||||
r[i] = chain.sub(x[i], y[i], carry);
|
||||
return carry;
|
||||
}
|
||||
|
||||
template <bool CARRY_OUT, typename T> static constexpr HOST_DEVICE_INLINE uint32_t add_limbs(const T &xs, const T &ys, T &rs) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
return add_sub_limbs_device<false, CARRY_OUT>(xs, ys, rs);
|
||||
@@ -437,17 +419,41 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void mul_n_msb(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC, size_t start_i = 0) {
|
||||
#pragma unroll
|
||||
for (size_t i = start_i; i < n; i += 2) {
|
||||
acc[i] = ptx::mul_lo(a[i], bi);
|
||||
acc[i + 1] = ptx::mul_hi(a[i], bi);
|
||||
}
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void cmad_n(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC) {
|
||||
// multiply scalar by vector
|
||||
// acc = acc + bi*A[::2]
|
||||
acc[0] = ptx::mad_lo_cc(a[0], bi, acc[0]);
|
||||
acc[1] = ptx::madc_hi_cc(a[0], bi, acc[1]);
|
||||
#pragma unroll
|
||||
#pragma unroll
|
||||
for (size_t i = 2; i < n; i += 2) {
|
||||
acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
|
||||
acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void cmad_n_msb(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC, size_t a_start_idx=0) {
|
||||
// multiply scalar by vector
|
||||
// acc = acc + bi*A[::2]
|
||||
acc[a_start_idx] = ptx::mad_lo_cc(a[a_start_idx], bi, acc[a_start_idx]);
|
||||
acc[a_start_idx + 1] = ptx::madc_hi_cc(a[a_start_idx], bi, acc[a_start_idx + 1]);
|
||||
#pragma unroll
|
||||
for (size_t i = a_start_idx + 2; i < n; i += 2) {
|
||||
acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
|
||||
acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void mad_row(uint32_t *odd, uint32_t *even, const uint32_t *a, uint32_t bi, size_t n = TLC) {
|
||||
// odd = odd + bi*A
|
||||
// even = even + bi*A
|
||||
cmad_n(odd, a + 1, bi, n - 2);
|
||||
odd[n - 2] = ptx::madc_lo_cc(a[n - 1], bi, 0);
|
||||
odd[n - 1] = ptx::madc_hi(a[n - 1], bi, 0);
|
||||
@@ -455,6 +461,16 @@ template <class CONFIG> class Field {
|
||||
odd[n - 1] = ptx::addc(odd[n - 1], 0);
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void mad_row_msb(uint32_t *odd, uint32_t *even, const uint32_t *a, uint32_t bi, size_t n = TLC, size_t a_start_idx = 0) {
|
||||
// odd = odd + bi*A
|
||||
// even = even + bi*A
|
||||
cmad_n_msb(odd, a + 1, bi, n - 2, a_start_idx - 1);
|
||||
odd[n - 2] = ptx::madc_lo_cc(a[n - 1], bi, 0);
|
||||
odd[n - 1] = ptx::madc_hi(a[n - 1], bi, 0);
|
||||
cmad_n_msb(even, a, bi, n, a_start_idx);
|
||||
odd[n - 1] = ptx::addc(odd[n - 1], 0);
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void multiply_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
const uint32_t *a = as.limbs;
|
||||
const uint32_t *b = bs.limbs;
|
||||
@@ -476,13 +492,289 @@ template <class CONFIG> class Field {
|
||||
even[i + 1] = ptx::addc(even[i + 1], 0);
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void mult_no_carry(uint32_t a, uint32_t b, uint32_t *r) {
|
||||
r[0] = ptx::mul_lo(a, b);
|
||||
r[1] = ptx::mul_hi(a, b);
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void ingo_multiply_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
const uint32_t *a = as.limbs;
|
||||
const uint32_t *b = bs.limbs;
|
||||
uint32_t *r = rs.limbs;
|
||||
uint32_t i, j;
|
||||
uint32_t *even = rs.limbs;
|
||||
__align__(8) uint32_t odd[2 * TLC];
|
||||
for (uint32_t i = 0; i < 2 * TLC; i++)
|
||||
{
|
||||
even[i] = 0;
|
||||
odd[i] = 0;
|
||||
}
|
||||
// first row special case, no carry in no carry out. split to non parts, even and odd.
|
||||
for (i = 0; i < TLC - 1; i+=2 )
|
||||
{
|
||||
mult_no_carry(b[0], a[i], &even[i]);
|
||||
mult_no_carry(b[0], a[i + 1], &odd[i]);
|
||||
}
|
||||
|
||||
// doing two rows at one loop
|
||||
for (i = 1; i < TLC - 1; i+=2)
|
||||
{
|
||||
// odd bi's
|
||||
// multiply accumulate even part of new row with odd part prev row (needs a carry)
|
||||
// // j = 0, no carry in, only carry out
|
||||
odd[i - 1] = ptx::mad_lo_cc(a[0], b[i], odd[i - 1]);
|
||||
odd[i] = ptx::madc_hi_cc(a[0], b[i], odd[i]);
|
||||
// for loop carry in carry out
|
||||
for (j = 2; j < TLC; j+=2) // 2, 4, 6
|
||||
{
|
||||
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
|
||||
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
|
||||
}
|
||||
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
|
||||
|
||||
// multiply accumulate odd part of new row with even part prev row (doesnt need a carry)
|
||||
// j = 1, no carry in, only carry out
|
||||
even[i + 1] = ptx::mad_lo_cc(a[1], b[i], even[i + 1]);
|
||||
even[i + 2] = ptx::madc_hi_cc(a[1], b[i], even[i + 2]);
|
||||
// for loop carry in carry out
|
||||
for (j = 3; j < TLC; j+=2)
|
||||
{
|
||||
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
|
||||
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
|
||||
}
|
||||
|
||||
// even bi's
|
||||
// multiply accumulate even part of new row with even part of prev row // needs a carry
|
||||
// j = 0, no carry in, only carry out
|
||||
even[i + 1] = ptx::mad_lo_cc(a[0], b[i + 1], even[i + 1]);
|
||||
even[i + 2] = ptx::madc_hi_cc(a[0], b[i + 1], even[i + 2]);
|
||||
// for loop, carry in, carry out.
|
||||
for (j = 2; j < TLC; j+=2)
|
||||
{
|
||||
even[i + j + 1] = ptx::madc_lo_cc(a[j], b[i + 1], even[i + j + 1]);
|
||||
even[i + j + 2] = ptx::madc_hi_cc(a[j], b[i + 1], even[i + j + 2]);
|
||||
}
|
||||
even[i + j + 1] = ptx::addc(even[i + j + 1], 0); // handling last carry
|
||||
|
||||
// multiply accumulate odd part of new row with odd part of prev row
|
||||
// j = 1, no carry in, only carry out
|
||||
odd[i + 1] = ptx::mad_lo_cc(a[1], b[i + 1], odd[i + 1]);
|
||||
odd[i + 2] = ptx::madc_hi_cc(a[1], b[i + 1], odd[i + 2]);
|
||||
// for loop, carry in, carry out.
|
||||
for (j = 3; j < TLC; j+=2)
|
||||
{
|
||||
odd[i + j] = ptx::madc_lo_cc(a[j], b[i + 1], odd[i + j]);
|
||||
odd[i + j + 1] = ptx::madc_hi_cc(a[j], b[i + 1], odd[i + j + 1]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
odd[i - 1] = ptx::mad_lo_cc(a[0], b[i], odd[i - 1]);
|
||||
odd[i] = ptx::madc_hi_cc(a[0], b[i], odd[i]);
|
||||
// for loop carry in carry out
|
||||
for (j = 2; j < TLC; j+=2)
|
||||
{
|
||||
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
|
||||
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
|
||||
}
|
||||
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
|
||||
|
||||
// multiply accumulate odd part of new row with even part prev row
|
||||
// j = 1, no carry in, only carry out
|
||||
even[i + 1] = ptx::mad_lo_cc(a[1], b[i], even[i + 1]);
|
||||
even[i + 2] = ptx::madc_hi_cc(a[1], b[i], even[i + 2]);
|
||||
// for loop carry in carry out
|
||||
for (j = 3; j < TLC; j+=2)
|
||||
{
|
||||
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
|
||||
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
|
||||
}
|
||||
|
||||
// add even and odd parts
|
||||
even[1] = ptx::add_cc(even[1], odd[0]);
|
||||
for (i = 1; i < 2 * TLC - 2; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], 0);
|
||||
}
|
||||
|
||||
|
||||
static DEVICE_INLINE void ingo_msb_multiply_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
const uint32_t *a = as.limbs;
|
||||
const uint32_t *b = bs.limbs;
|
||||
uint32_t *r = rs.limbs;
|
||||
uint32_t i, j;
|
||||
uint32_t *even = rs.limbs;
|
||||
__align__(8) uint32_t odd[2 * TLC];
|
||||
for (uint32_t i = 0; i < 2 * TLC; i++)
|
||||
{
|
||||
even[i] = 0;
|
||||
odd[i] = 0;
|
||||
}
|
||||
// only last element from first row.
|
||||
mult_no_carry(b[0], a[TLC - 1], &odd[TLC - 2]);
|
||||
|
||||
// doing two rows at one loop
|
||||
#pragma unroll
|
||||
for (i = 1; i < TLC - 1; i+=2)
|
||||
{
|
||||
const uint32_t first_active_j = TLC - 1 - i;
|
||||
const uint32_t first_active_j_odd = first_active_j + (1 - (first_active_j % 2));
|
||||
const uint32_t first_active_j_even = first_active_j + first_active_j % 2 ;
|
||||
// odd bi's
|
||||
// multiply accumulate even part of new row with odd part prev row (needs a carry)
|
||||
// j = 0, no carry in, only carry out
|
||||
odd[first_active_j_even + i - 1] = ptx::mad_lo_cc(a[first_active_j_even], b[i], odd[first_active_j_even + i - 1]);
|
||||
odd[first_active_j_even + i] = ptx::madc_hi_cc(a[first_active_j_even], b[i], odd[first_active_j_even + i]);
|
||||
// for loop carry in carry out
|
||||
#pragma unroll
|
||||
for (j = first_active_j_even + 2; j < TLC; j+=2)
|
||||
{
|
||||
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
|
||||
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
|
||||
}
|
||||
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
|
||||
|
||||
// multiply accumulate odd part of new row with even part prev row (doesnt need a carry)
|
||||
// j = 1, no carry in, only carry out
|
||||
even[i + first_active_j_odd] = ptx::mad_lo_cc(a[first_active_j_odd], b[i], even[i + first_active_j_odd]);
|
||||
even[i + first_active_j_odd + 1] = ptx::madc_hi_cc(a[first_active_j_odd], b[i], even[i + first_active_j_odd + 1]);
|
||||
// for loop carry in carry out
|
||||
#pragma unroll
|
||||
for (j = first_active_j_odd + 2; j < TLC; j+=2)
|
||||
{
|
||||
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
|
||||
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
|
||||
}
|
||||
|
||||
// even bi's
|
||||
uint32_t const first_active_j1 = TLC - 1 - (i + 1) ;
|
||||
uint32_t const first_active_j_odd1 = first_active_j1 + (1 - (first_active_j1 % 2));
|
||||
uint32_t const first_active_j_even1 = first_active_j1 + first_active_j1 % 2;
|
||||
// multiply accumulate even part of new row with even part of prev row // needs a carry
|
||||
// j = 0, no carry in, only carry out
|
||||
even[first_active_j_even1 + i + 1] = ptx::mad_lo_cc(a[first_active_j_even1], b[i + 1], even[first_active_j_even1 + i + 1]);
|
||||
even[first_active_j_even1 + i + 2] = ptx::madc_hi_cc(a[first_active_j_even1], b[i + 1], even[first_active_j_even1 + i + 2]);
|
||||
// for loop, carry in, carry out.
|
||||
#pragma unroll
|
||||
for (j = first_active_j_even1 + 2; j < TLC; j+=2)
|
||||
{
|
||||
even[i + j + 1] = ptx::madc_lo_cc(a[j], b[i + 1], even[i + j + 1]);
|
||||
even[i + j + 2] = ptx::madc_hi_cc(a[j], b[i + 1], even[i + j + 2]);
|
||||
}
|
||||
even[i + j + 1] = ptx::addc(even[i + j + 1], 0); // handling last carry
|
||||
|
||||
// multiply accumulate odd part of new row with odd part of prev row
|
||||
// j = 1, no carry in, only carry out
|
||||
odd[first_active_j_odd1 + i] = ptx::mad_lo_cc(a[first_active_j_odd1], b[i + 1], odd[first_active_j_odd1 + i]);
|
||||
odd[first_active_j_odd1+ i + 1] = ptx::madc_hi_cc(a[first_active_j_odd1], b[i + 1], odd[first_active_j_odd1 + i + 1]);
|
||||
// for loop, carry in, carry out.
|
||||
#pragma unroll
|
||||
for (j = first_active_j_odd1 + 2; j < TLC; j+=2)
|
||||
{
|
||||
odd[i + j] = ptx::madc_lo_cc(a[j], b[i + 1], odd[i + j]);
|
||||
odd[i + j + 1] = ptx::madc_hi_cc(a[j], b[i + 1], odd[i + j + 1]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// last round, i = TLC - 1
|
||||
odd[i - 1] = ptx::mad_lo_cc(a[0], b[i], odd[i - 1]);
|
||||
odd[i] = ptx::madc_hi_cc(a[0], b[i], odd[i]);
|
||||
// for loop carry in carry out
|
||||
#pragma unroll
|
||||
for (j = 2; j < TLC; j+=2)
|
||||
{
|
||||
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
|
||||
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
|
||||
}
|
||||
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
|
||||
|
||||
// multiply accumulate odd part of new row with even part prev row
|
||||
// j = 1, no carry in, only carry out
|
||||
even[i + 1] = ptx::mad_lo_cc(a[1], b[i], even[i + 1]);
|
||||
even[i + 2] = ptx::madc_hi_cc(a[1], b[i], even[i + 2]);
|
||||
// for loop carry in carry out
|
||||
#pragma unroll
|
||||
for (j = 3; j < TLC; j+=2)
|
||||
{
|
||||
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
|
||||
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
|
||||
}
|
||||
|
||||
// add even and odd parts
|
||||
even[1] = ptx::add_cc(even[1], odd[0]);
|
||||
#pragma unroll
|
||||
for (i = 1; i < 2 * TLC - 2; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static DEVICE_INLINE void multiply_lsb_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
// r = a * b is correcrt for the first TLC + 1 digits. (not computing from TLC + 1 to 2*TLC - 2).
|
||||
const uint32_t *a = as.limbs;
|
||||
const uint32_t *b = bs.limbs;
|
||||
uint32_t *even = rs.limbs;
|
||||
__align__(8) uint32_t odd[2 * TLC - 2];
|
||||
mul_n(even, a, b[0]);
|
||||
mul_n(odd, a + 1, b[0]);
|
||||
mad_row(&even[2], &odd[0], a, b[1]);
|
||||
size_t i;
|
||||
#pragma unroll
|
||||
for (i = 2; i < TLC - 1; i += 2) {
|
||||
mad_row(&odd[i], &even[i], a, b[i], TLC - i + 2);
|
||||
mad_row(&even[i + 2], &odd[i], a, b[i + 1], TLC - i + 2);
|
||||
}
|
||||
|
||||
// merge |even| and |odd|
|
||||
even[1] = ptx::add_cc(even[1], odd[0]);
|
||||
for (i = 1; i < TLC + 1; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], 0);
|
||||
}
|
||||
|
||||
static DEVICE_INLINE void multiply_msb_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
const uint32_t *a = as.limbs;
|
||||
const uint32_t *b = bs.limbs;
|
||||
uint32_t *even = rs.limbs;
|
||||
__align__(8) uint32_t odd[2 * TLC - 2];
|
||||
for (int i=0; i<2*TLC - 1; i++)
|
||||
{
|
||||
even[i] = 0;
|
||||
odd[i] = 0;
|
||||
}
|
||||
uint32_t min_indexes_sum = TLC - 1;
|
||||
// only diagonal
|
||||
mul_n_msb(even, a, b[0], TLC, min_indexes_sum);
|
||||
mul_n_msb(odd, a + 1, b[0], TLC, min_indexes_sum - 1);
|
||||
mad_row_msb(&even[2], &odd[0], a, b[1], TLC, min_indexes_sum - 1);
|
||||
size_t i;
|
||||
#pragma unroll
|
||||
for (i = 2; i < TLC - 1; i += 2) {
|
||||
mad_row(&odd[i], &even[i], a, b[i]);
|
||||
mad_row(&even[i + 2], &odd[i], a, b[i + 1]);
|
||||
}
|
||||
// merge |even| and |odd|
|
||||
even[1] = ptx::add_cc(even[1], odd[0]);
|
||||
for (i = 1; i < 2 * TLC - 2; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], 0);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
static HOST_INLINE void multiply_raw_host(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
const uint32_t *a = as.limbs;
|
||||
const uint32_t *b = bs.limbs;
|
||||
uint32_t *r = rs.limbs;
|
||||
for (unsigned i = 0; i < TLC; i++) {
|
||||
uint32_t carry = 0;
|
||||
for (unsigned j = 0; j < TLC; j++)
|
||||
for (unsigned j = 0; j < TLC; j++)
|
||||
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
|
||||
r[TLC + i] = carry;
|
||||
}
|
||||
@@ -496,6 +788,22 @@ template <class CONFIG> class Field {
|
||||
#endif
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE void multiply_raw_lsb(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_lsb_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE void multiply_raw_msb(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
return ingo_msb_multiply_raw_device(as, bs, rs);
|
||||
#else
|
||||
return multiply_raw_host(as, bs, rs);
|
||||
#endif
|
||||
}
|
||||
|
||||
public:
|
||||
ff_storage limbs_storage;
|
||||
|
||||
@@ -507,6 +815,8 @@ template <class CONFIG> class Field {
|
||||
const uint32_t limb_lsb_idx = (digit_num*digit_width) / 32;
|
||||
const uint32_t shift_bits = (digit_num*digit_width) % 32;
|
||||
unsigned rv = limbs_storage.limbs[limb_lsb_idx] >> shift_bits;
|
||||
// printf("get_scalar_func digit %u rv %u\n",digit_num,rv);
|
||||
// if (shift_bits + digit_width > 32) {
|
||||
if ((shift_bits + digit_width > 32) && (limb_lsb_idx+1 < TLC)) {
|
||||
rv += limbs_storage.limbs[limb_lsb_idx + 1] << (32 - shift_bits);
|
||||
}
|
||||
@@ -535,24 +845,20 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const Field& xs) {
|
||||
std::stringstream hex_string;
|
||||
hex_string << std::hex << std::setfill('0');
|
||||
|
||||
for (int i = 0; i < TLC; i++) {
|
||||
hex_string << std::setw(8) << xs.limbs_storage.limbs[i];
|
||||
}
|
||||
|
||||
os << "0x" << hex_string.str();
|
||||
os << "{";
|
||||
for (int i = 0; i < TLC; i++)
|
||||
os << xs.limbs_storage.limbs[i] << ", ";
|
||||
os << "}";
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
|
||||
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
|
||||
Field rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return reduce<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
|
||||
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
|
||||
Field rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0)
|
||||
@@ -563,26 +869,53 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Wide mul_wide(const Field& xs, const Field& ys) {
|
||||
Wide rs = {};
|
||||
static constexpr HOST_DEVICE_INLINE wide mul_wide(const Field& xs, const Field& ys) {
|
||||
wide rs = {};
|
||||
multiply_raw(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
static constexpr DEVICE_INLINE uint32_t sub_limbs_partial_device(uint32_t *x, uint32_t *y, uint32_t *r, uint32_t num_limbs) {
|
||||
r[0] = ptx::sub_cc(x[0], y[0]);
|
||||
#pragma unroll
|
||||
for (unsigned i = 1; i < num_limbs; i++)
|
||||
r[i] = ptx::subc_cc(x[i], y[i]);
|
||||
return ptx::subc(0, 0);
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE uint32_t sub_limbs_partial(uint32_t *x, uint32_t *y, uint32_t *r, uint32_t num_limbs) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
return sub_limbs_partial_device(x, y, r, num_limbs);
|
||||
#else
|
||||
return sub_limbs_partial_host(x, y, r, num_limbs);
|
||||
#endif
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys) {
|
||||
Wide xy = mul_wide(xs, ys);
|
||||
Field xy_hi = xy.get_higher_with_slack();
|
||||
Wide l = {};
|
||||
multiply_raw(xy_hi.limbs_storage, get_m(), l.limbs_storage);
|
||||
//printf("operator* called \n");
|
||||
wide xy = mul_wide(xs, ys); // full mult
|
||||
Field xy_hi = xy.get_higher_with_slack(); // xy << slack_bits
|
||||
wide l = {};
|
||||
multiply_raw_msb(xy_hi.limbs_storage, get_m(), l.limbs_storage); // MSB mult
|
||||
Field l_hi = l.get_higher_with_slack();
|
||||
Wide lp = {};
|
||||
multiply_raw(l_hi.limbs_storage, get_modulus(), lp.limbs_storage);
|
||||
Wide r_wide = xy - lp;
|
||||
Wide r_wide_reduced = {};
|
||||
uint32_t reduced = sub_limbs<true>(r_wide.limbs_storage, modulus_wide(), r_wide_reduced.limbs_storage);
|
||||
r_wide = reduced ? r_wide : r_wide_reduced;
|
||||
wide lp = {};
|
||||
multiply_raw_lsb(l_hi.limbs_storage, get_modulus(), lp.limbs_storage); // LSB mult
|
||||
wide r_wide = xy - lp;
|
||||
wide r_wide_reduced = {};
|
||||
// uint32_t reduced = sub_limbs<true>(r_wide.limbs_storage, modulus_wide(), r_wide_reduced.limbs_storage);
|
||||
// r_wide = reduced ? r_wide : r_wide_reduced;
|
||||
for (unsigned i = 0; i < TLC + 1; i++)
|
||||
{
|
||||
uint32_t carry = sub_limbs_partial(r_wide.limbs_storage.limbs, modulus_wide().limbs, r_wide_reduced.limbs_storage.limbs, TLC + 1);
|
||||
if (carry == 0) // continue to reduce
|
||||
r_wide = r_wide_reduced;
|
||||
else // done
|
||||
break;
|
||||
}
|
||||
|
||||
// number of wrap around is bounded by TLC + 1 times.
|
||||
Field r = r_wide.get_lower();
|
||||
return reduce<1>(r);
|
||||
return (r);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Field& xs, const Field& ys) {
|
||||
@@ -606,24 +939,22 @@ template <class CONFIG> class Field {
|
||||
return !(xs == ys);
|
||||
}
|
||||
|
||||
template <const Field& multiplier, class T> static constexpr HOST_DEVICE_INLINE T mul_const(const T &xs) {
|
||||
return mul_unsigned<multiplier.limbs_storage.limbs[0], T>(xs);
|
||||
}
|
||||
|
||||
template <uint32_t mutliplier, class T, unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T &xs) {
|
||||
T rs = {};
|
||||
T temp = xs;
|
||||
template <unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Field mul(const unsigned scalar, const Field &xs) {
|
||||
Field rs = {};
|
||||
Field temp = xs;
|
||||
unsigned l = scalar;
|
||||
bool is_zero = true;
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
if (mutliplier & (1 << i)) {
|
||||
if (l & 1) {
|
||||
rs = is_zero ? temp : (rs + temp);
|
||||
is_zero = false;
|
||||
}
|
||||
if (mutliplier & ((1 << (31 - i) - 1) << (i + 1)))
|
||||
l >>= 1;
|
||||
if (l == 0)
|
||||
break;
|
||||
temp = temp + temp;
|
||||
}
|
||||
@@ -631,7 +962,7 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const Field& xs) {
|
||||
static constexpr HOST_DEVICE_INLINE wide sqr_wide(const Field& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
|
||||
}
|
||||
8
icicle/primitives/projective.cu
Normal file
8
icicle/primitives/projective.cu
Normal file
@@ -0,0 +1,8 @@
|
||||
#include <cuda.h>
|
||||
#include "../curves/curve_config.cuh"
|
||||
#include "projective.cuh"
|
||||
|
||||
extern "C" bool eq(projective_t *point1, projective_t *point2, size_t device_id = 0)
|
||||
{
|
||||
return (*point1 == *point2);
|
||||
}
|
||||
131
icicle/primitives/projective.cuh
Normal file
131
icicle/primitives/projective.cuh
Normal file
@@ -0,0 +1,131 @@
|
||||
#pragma once
|
||||
|
||||
#include "affine.cuh"
|
||||
|
||||
template <class FF, class SCALAR_FF, class GEN, unsigned B_VALUE>
|
||||
class Projective {
|
||||
friend Affine<FF>;
|
||||
|
||||
public:
|
||||
FF x;
|
||||
FF y;
|
||||
FF z;
|
||||
|
||||
static HOST_DEVICE_INLINE Projective generator() {
|
||||
return { FF { GEN::generator_x }, FF { GEN::generator_y }, FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective zero() {
|
||||
return {FF::zero(), FF::one(), FF::zero()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Affine<FF> to_affine(const Projective &point) {
|
||||
FF denom = FF::inverse(point.z);
|
||||
return {point.x * denom, point.y * denom};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF> &point) {
|
||||
return {point.x, point.y, FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective neg(const Projective &point) {
|
||||
return {point.x, FF::neg(point.y), point.z};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2) {
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF Z2 = p2.z; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::mul(3 * B_VALUE, t02); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::mul(3 * B_VALUE, t17); // t23 ← b3 · t17 < 2
|
||||
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
|
||||
const FF t25 = t07 * t22; // t25 ← t07 · t22 < 2
|
||||
const FF X3 = t25 - t24; // X3 ← t25 − t24 < 2
|
||||
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
|
||||
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
|
||||
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
|
||||
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
|
||||
return {X3, Y3, Z3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Projective& p2) {
|
||||
return p1 + neg(p2);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Affine<FF>& p2) {
|
||||
// TODO: change the implementation to a more efficient mixed adder later on
|
||||
return p1 + from_affine(p2);
|
||||
}
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point) {
|
||||
os << "x: " << point.x << "; y: " << point.y << "; z: " << point.z;
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Affine<FF>& p2) {
|
||||
return p1 + Affine<FF>::neg(p2);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point) {
|
||||
Projective res = zero();
|
||||
#ifdef CUDA_ARCH
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
|
||||
if (i > 0) {
|
||||
res = res + res;
|
||||
}
|
||||
if (scalar.get_scalar_digit(SCALAR_FF::NBITS - i - 1, 1)) {
|
||||
res = res + point;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Projective& p1, const Projective& p2) {
|
||||
return (p1.x * p2.z == p2.x * p1.z) && (p1.y * p2.z == p2.y * p1.z);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Projective &point) {
|
||||
return point.x == FF::zero() && point.y != FF::zero() && point.z == FF::zero();
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_on_curve(const Projective &point) {
|
||||
if (is_zero(point))
|
||||
return true;
|
||||
bool eq_holds = (FF::mul(B_VALUE, FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
|
||||
return point.z != FF::zero() && eq_holds;
|
||||
}
|
||||
|
||||
static HOST_INLINE Projective rand_host() {
|
||||
SCALAR_FF rand_scalar = SCALAR_FF::rand_host();
|
||||
return rand_scalar * generator();
|
||||
}
|
||||
};
|
||||
@@ -1,8 +1,9 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "test_kernels.cuh"
|
||||
|
||||
#include <iostream>
|
||||
#include <boost/multiprecision/cpp_int.hpp>
|
||||
namespace mp = boost::multiprecision;
|
||||
|
||||
template <class T>
|
||||
int device_populate_random(T* d_elements, unsigned n) {
|
||||
@@ -20,92 +21,90 @@ int device_set(T* d_elements, T el, unsigned n) {
|
||||
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
mp::int1024_t convert_to_boost_mp(uint32_t *a, uint32_t length)
|
||||
{
|
||||
mp::int1024_t res = 0;
|
||||
for (uint32_t i = 0; i < length; i++)
|
||||
{
|
||||
res += (mp::int1024_t)(a[i]) << 32 * i;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
class PrimitivesTest : public ::testing::Test {
|
||||
protected:
|
||||
static const unsigned n = 1 << 5;
|
||||
static const unsigned n = 1 << 10;
|
||||
|
||||
proj *points1{};
|
||||
proj *points2{};
|
||||
g2_proj *g2_points1{};
|
||||
g2_proj *g2_points2{};
|
||||
scalar_field *scalars1{};
|
||||
scalar_field *scalars2{};
|
||||
proj *zero_points{};
|
||||
g2_proj *g2_zero_points{};
|
||||
scalar_field *zero_scalars{};
|
||||
scalar_field *one_scalars{};
|
||||
affine *aff_points{};
|
||||
g2_affine *g2_aff_points{};
|
||||
proj *res_points1{};
|
||||
proj *res_points2{};
|
||||
g2_proj *g2_res_points1{};
|
||||
g2_proj *g2_res_points2{};
|
||||
scalar_field *res_scalars1{};
|
||||
scalar_field *res_scalars2{};
|
||||
scalar_field::wide *res_scalars_wide{};
|
||||
scalar_field::wide *res_scalars_wide_full{};
|
||||
|
||||
PrimitivesTest() {
|
||||
assert(!cudaDeviceReset());
|
||||
assert(!cudaMallocManaged(&points1, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&points2, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&g2_points1, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&g2_points2, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&scalars1, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&scalars2, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&zero_points, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&g2_zero_points, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&zero_scalars, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&one_scalars, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&aff_points, n * sizeof(affine)));
|
||||
assert(!cudaMallocManaged(&g2_aff_points, n * sizeof(g2_affine)));
|
||||
assert(!cudaMallocManaged(&res_points1, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&res_points2, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&g2_res_points1, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&g2_res_points2, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&res_scalars1, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&res_scalars2, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&res_scalars_wide, n * sizeof(scalar_field::wide)));
|
||||
assert(!cudaMallocManaged(&res_scalars_wide_full, n * sizeof(scalar_field::wide)));
|
||||
|
||||
}
|
||||
|
||||
~PrimitivesTest() override {
|
||||
cudaFree(points1);
|
||||
cudaFree(points2);
|
||||
cudaFree(g2_points1);
|
||||
cudaFree(g2_points2);
|
||||
cudaFree(scalars1);
|
||||
cudaFree(scalars2);
|
||||
cudaFree(zero_points);
|
||||
cudaFree(g2_zero_points);
|
||||
cudaFree(zero_scalars);
|
||||
cudaFree(one_scalars);
|
||||
cudaFree(aff_points);
|
||||
cudaFree(g2_aff_points);
|
||||
cudaFree(res_points1);
|
||||
cudaFree(res_points2);
|
||||
cudaFree(g2_res_points1);
|
||||
cudaFree(g2_res_points2);
|
||||
cudaFree(res_scalars1);
|
||||
cudaFree(res_scalars2);
|
||||
|
||||
cudaFree(res_scalars_wide);
|
||||
cudaFree(res_scalars_wide_full);
|
||||
|
||||
cudaDeviceReset();
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
ASSERT_EQ(device_populate_random<proj>(points1, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<proj>(points2, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<g2_proj>(g2_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<g2_proj>(g2_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<scalar_field>(scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<scalar_field>(scalars2, n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<proj>(zero_points, proj::zero(), n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<g2_proj>(g2_zero_points, g2_proj::zero(), n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<scalar_field>(zero_scalars, scalar_field::zero(), n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<scalar_field>(one_scalars, scalar_field::one(), n), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(aff_points, 0, n * sizeof(affine)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(g2_aff_points, 0, n * sizeof(g2_affine)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_points1, 0, n * sizeof(proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_points2, 0, n * sizeof(proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(g2_res_points1, 0, n * sizeof(g2_proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(g2_res_points2, 0, n * sizeof(g2_proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_scalars1, 0, n * sizeof(scalar_field)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_scalars2, 0, n * sizeof(scalar_field)), cudaSuccess);
|
||||
|
||||
ASSERT_EQ(cudaMemset(res_scalars_wide, 0, n * sizeof(scalar_field::wide)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_scalars_wide_full, 0, n * sizeof(scalar_field::wide)), cudaSuccess);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -279,103 +278,188 @@ TEST_F(PrimitivesTest, ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
ASSERT_EQ(res_points1[i], points1[i] + res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECRandomPointsAreOnCurve) {
|
||||
for (unsigned i = 0; i < 2; i++)
|
||||
ASSERT_PRED1(g2_proj::is_on_curve, g2_points1[i]);
|
||||
TEST_F(PrimitivesTest, MP_LSB_MULT) {
|
||||
// LSB multiply, check correctness of first TLC + 1 digits result.
|
||||
ASSERT_EQ(mp_lsb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
|
||||
std::cout << "first GPU lsb mult output = 0x";
|
||||
for (int i=0; i<2*scalar_field::TLC; i++)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "first GPU full mult output = 0x";
|
||||
for (int i=0; i<2*scalar_field::TLC; i++)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
for (int i=0; i<scalar_field::TLC + 1; i++)
|
||||
{
|
||||
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionSubtractionCancel) {
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(g2_res_points1, g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
|
||||
TEST_F(PrimitivesTest, MP_MSB_MULT) {
|
||||
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
|
||||
ASSERT_EQ(mp_msb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
|
||||
std::cout << "first GPU msb mult output = 0x";
|
||||
for (int i=2*scalar_field::TLC - 1; i >=0 ; i--)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "first GPU full mult output = 0x";
|
||||
for (int i=2*scalar_field::TLC - 1; i >=0 ; i--)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
for (int i=0; i < 2*scalar_field::TLC - 1; i++)
|
||||
{
|
||||
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
|
||||
std::cout << "matched word idx = " << i << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointZeroAddition) {
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_zero_points, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
|
||||
TEST_F(PrimitivesTest, INGO_MP_MULT) {
|
||||
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
|
||||
ASSERT_EQ(ingo_mp_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
|
||||
std::cout << "INGO = 0x";
|
||||
for (int i=0; i < 2*scalar_field::TLC ; i++)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "ZKSYNC = 0x";
|
||||
for (int i=0; i < 2*scalar_field::TLC ; i++)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
for (int i=0; i < 2*scalar_field::TLC - 1; i++)
|
||||
{
|
||||
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
|
||||
std::cout << "matched word idx = " << i << std::endl;
|
||||
}
|
||||
for (int j=0; j<n; j++)
|
||||
{
|
||||
for (int i=0; i < 2*scalar_field::TLC - 1; i++)
|
||||
{
|
||||
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionHostDeviceEq) {
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i] + g2_points2[i], g2_res_points1[i]);
|
||||
|
||||
TEST_F(PrimitivesTest, INGO_MP_MSB_MULT) {
|
||||
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
|
||||
ASSERT_EQ(ingo_mp_msb_mult(scalars1, scalars2, res_scalars_wide, n), cudaSuccess);
|
||||
std::cout << "INGO MSB = 0x";
|
||||
for (int i=2*scalar_field::TLC - 1; i >= 0 ; i--)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "ZKSYNC = 0x";
|
||||
for (int i=2*scalar_field::TLC - 1; i >= 0 ; i--)
|
||||
{
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
// for (int i=scalar_field::TLC; i < 2*scalar_field::TLC - 1; i++)
|
||||
// {
|
||||
// ASSERT_EQ(in_bound, true);
|
||||
// }
|
||||
// for (int j=0; j<n; j++)
|
||||
// {
|
||||
// for (int i=0; i < 2*scalar_field::TLC - 1; i++)
|
||||
// {
|
||||
// ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
|
||||
// }
|
||||
// }
|
||||
// mp testing
|
||||
mp::int1024_t scalar_1_mp = 0;
|
||||
mp::int1024_t scalar_2_mp = 0;
|
||||
mp::int1024_t res_mp = 0;
|
||||
mp::int1024_t res_gpu = 0;
|
||||
uint32_t num_limbs = scalar_field::TLC;
|
||||
|
||||
for (int j=0; j<n; j++)
|
||||
{
|
||||
uint32_t* scalar1_limbs = scalars1[j].limbs_storage.limbs;
|
||||
uint32_t* scalar2_limbs = scalars2[j].limbs_storage.limbs;
|
||||
scalar_1_mp = convert_to_boost_mp(scalar1_limbs, num_limbs);
|
||||
scalar_2_mp = convert_to_boost_mp(scalar2_limbs, num_limbs);
|
||||
res_mp = scalar_1_mp * scalar_2_mp;
|
||||
res_mp = res_mp >> (num_limbs * 32);
|
||||
res_gpu = convert_to_boost_mp(&(res_scalars_wide[j]).limbs_storage.limbs[num_limbs], num_limbs);
|
||||
std::cout << "res mp = " << res_mp << std::endl;
|
||||
std::cout << "res gpu = " << res_gpu << std::endl;
|
||||
std::cout << "error = " << res_mp - res_gpu << std::endl;
|
||||
bool upper_bound = res_gpu <= res_mp;
|
||||
bool lower_bound = res_gpu > (res_mp - num_limbs);
|
||||
bool in_bound = upper_bound && lower_bound;
|
||||
|
||||
|
||||
ASSERT_EQ(in_bound, true);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationHostDeviceEq) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i] * g2_points1[i], g2_res_points1[i]);
|
||||
TEST_F(PrimitivesTest, INGO_MP_MOD_MULT) {
|
||||
std::cout << " taking num limbs " << std::endl;
|
||||
uint32_t num_limbs = scalar_field::TLC;
|
||||
std::cout << " calling gpu... = " << std::endl;
|
||||
ASSERT_EQ(ingo_mp_mod_mult(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
std::cout << " gpu call done " << std::endl;
|
||||
// mp testing
|
||||
mp::int1024_t scalar_1_mp = 0;
|
||||
mp::int1024_t scalar_2_mp = 0;
|
||||
mp::int1024_t res_mp = 0;
|
||||
mp::int1024_t res_gpu = 0;
|
||||
mp::int1024_t p = convert_to_boost_mp(scalar_field::get_modulus().limbs, num_limbs);
|
||||
std::cout << " p = " << p << std::endl;
|
||||
|
||||
|
||||
for (int j=0; j<n; j++)
|
||||
{
|
||||
uint32_t* scalar1_limbs = scalars1[j].limbs_storage.limbs;
|
||||
uint32_t* scalar2_limbs = scalars2[j].limbs_storage.limbs;
|
||||
scalar_1_mp = convert_to_boost_mp(scalar1_limbs, num_limbs);
|
||||
scalar_2_mp = convert_to_boost_mp(scalar2_limbs, num_limbs);
|
||||
// std::cout << " s1 = " << scalar_1_mp << std::endl;
|
||||
// std::cout << " s2 = " << scalar_2_mp << std::endl;
|
||||
res_mp = (scalar_1_mp * scalar_2_mp) % p;
|
||||
res_gpu = convert_to_boost_mp((res_scalars1[j]).limbs_storage.limbs, num_limbs);
|
||||
std::cout << "res mp = " << res_mp << std::endl;
|
||||
std::cout << "res gpu = " << res_gpu << std::endl;
|
||||
std::cout << "error = " << res_mp - res_gpu << std::endl;
|
||||
ASSERT_EQ(res_gpu, res_mp);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByOne) {
|
||||
ASSERT_EQ(vec_mul(one_scalars, points1, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByMinusOne) {
|
||||
ASSERT_EQ(vec_neg(one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(g2_points1, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByTwo) {
|
||||
ASSERT_EQ(vec_add(one_scalars, one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ((one_scalars[i] + one_scalars[i]) * g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationInverseCancel) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(field_vec_inv(scalars1, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_res_points1, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverMultiplication) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, g2_res_points1, g2_res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverAddition) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, g2_points1, g2_res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(res_scalars1[i] * g2_points1[i], g2_res_points1[i] + g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECProjectiveToAffine) {
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points1, g2_aff_points, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_proj::from_affine(g2_aff_points[i]));
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECMixedPointAddition) {
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_points1[i] + g2_res_points2[i]);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
@@ -3,17 +3,12 @@
|
||||
// TODO: change the curve depending on env variable
|
||||
#include "../curves/bls12_381.cuh"
|
||||
#include "projective.cuh"
|
||||
#include "extension_field.cuh"
|
||||
#include "field.cuh"
|
||||
|
||||
typedef Field<fp_config> scalar_field;
|
||||
typedef Field<fq_config> base_field;
|
||||
typedef Affine<base_field> affine;
|
||||
static constexpr base_field b = base_field{ weierstrass_b };
|
||||
typedef Projective<base_field, scalar_field, b> proj;
|
||||
typedef ExtensionField<fq_config> base_extension_field;
|
||||
typedef Affine<base_extension_field> g2_affine;
|
||||
static constexpr base_extension_field b2 = base_extension_field{ base_field {b_re}, base_field {b_im}};
|
||||
typedef Projective<base_extension_field, scalar_field, b2> g2_proj;
|
||||
typedef Projective<base_field, scalar_field, group_generator, weierstrass_b> proj;
|
||||
|
||||
|
||||
template <class T1, class T2>
|
||||
@@ -98,16 +93,99 @@ int field_vec_sqr(const scalar_field *x, scalar_field *result, const unsigned co
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class P, class A>
|
||||
__global__ void to_affine_points_kernel(const P *x, A *result, const unsigned count) {
|
||||
__global__ void to_affine_points_kernel(const proj *x, affine *result, const unsigned count) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
result[gid] = P::to_affine(x[gid]);
|
||||
result[gid] = proj::to_affine(x[gid]);
|
||||
}
|
||||
|
||||
template <class P, class A> int point_vec_to_affine(const P *x, A *result, const unsigned count) {
|
||||
to_affine_points_kernel<P, A><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int point_vec_to_affine(const proj *x, affine *result, const unsigned count) {
|
||||
to_affine_points_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void mp_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field::multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int mp_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
|
||||
{
|
||||
mp_mult_kernel<<<1, 32>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void mp_lsb_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field::multiply_lsb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int mp_lsb_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
|
||||
{
|
||||
mp_lsb_mult_kernel<<<1, 32>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void mp_msb_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field::multiply_msb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int mp_msb_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
|
||||
{
|
||||
mp_msb_mult_kernel<<<1, 1>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void ingo_mp_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field::ingo_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int ingo_mp_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
|
||||
{
|
||||
ingo_mp_mult_kernel<<<1, 32>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void ingo_mp_msb_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field::ingo_msb_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int ingo_mp_msb_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result, const unsigned n)
|
||||
{
|
||||
ingo_mp_msb_mult_kernel<<<1, n>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void ingo_mp_mod_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field *result) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
result[gid] = x[gid] * y[gid];
|
||||
}
|
||||
|
||||
|
||||
int ingo_mp_mod_mult(const scalar_field *x, scalar_field *y, scalar_field *result, const unsigned n)
|
||||
{
|
||||
ingo_mp_mod_mult_kernel<<<1, n>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
336
src/field.rs
Normal file
336
src/field.rs
Normal file
@@ -0,0 +1,336 @@
|
||||
use std::ffi::c_uint;
|
||||
use std::mem::transmute;
|
||||
|
||||
use ark_bls12_381::{Fq, G1Affine, G1Projective};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field<const NUM_LIMBS: usize> {
|
||||
pub s: [u32; NUM_LIMBS],
|
||||
}
|
||||
|
||||
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field<NUM_LIMBS> {}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Default for Field<NUM_LIMBS> {
|
||||
fn default() -> Self {
|
||||
Field::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
Field { s }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_LIMBS: usize = 12;
|
||||
pub const SCALAR_LIMBS: usize = 8;
|
||||
|
||||
#[cfg(feature = "bn254")]
|
||||
pub const BASE_LIMBS: usize = 8;
|
||||
#[cfg(feature = "bn254")]
|
||||
pub const SCALAR_LIMBS: usize = 8;
|
||||
|
||||
pub type BaseField = Field<BASE_LIMBS>;
|
||||
pub type ScalarField = Field<SCALAR_LIMBS>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
impl BaseField {
|
||||
pub fn limbs(&self) -> [u32; BASE_LIMBS] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField {
|
||||
pub fn limbs(&self) -> [u32; SCALAR_LIMBS] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField {
|
||||
unsafe { transmute(v) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct Point {
|
||||
pub x: BaseField,
|
||||
pub y: BaseField,
|
||||
pub z: BaseField,
|
||||
}
|
||||
|
||||
impl Default for Point {
|
||||
fn default() -> Self {
|
||||
Point::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn zero() -> Self {
|
||||
Point {
|
||||
x: BaseField::zero(),
|
||||
y: BaseField::one(),
|
||||
z: BaseField::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective) -> Point {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point {
|
||||
x: BaseField::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: BaseField::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: BaseField::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinity {
|
||||
pub x: BaseField,
|
||||
pub y: BaseField,
|
||||
}
|
||||
|
||||
impl Default for PointAffineNoInfinity {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField::zero(),
|
||||
y: BaseField::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity {
|
||||
// TODO: generics
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point {
|
||||
Point {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BaseField::one(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine {
|
||||
G1Affine::new(Fq::new(self.x.to_ark()), Fq::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine {
|
||||
G1Affine::new(
|
||||
Fq::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField::from_ark(p.x.into_repr()),
|
||||
y: BaseField::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
// TODO: generics
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point {
|
||||
x: BaseField {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS, "length must be 3 * {}", BASE_LIMBS);
|
||||
Point {
|
||||
x: BaseField {
|
||||
s: value[..BASE_LIMBS].try_into().unwrap(),
|
||||
},
|
||||
y: BaseField {
|
||||
s: value[BASE_LIMBS..BASE_LIMBS * 2].try_into().unwrap(),
|
||||
},
|
||||
z: BaseField {
|
||||
s: value[BASE_LIMBS * 2..].try_into().unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity {
|
||||
PointAffineNoInfinity {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField {
|
||||
pub fn from_limbs(value: &[u32]) -> ScalarField {
|
||||
ScalarField {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bls12_381::Fr;
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, field::{Point, ScalarField}};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
|
||||
let scalar = ScalarField::from_limbs(&limbs);
|
||||
assert_eq!(
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute(),
|
||||
"{:08X?} {:08X?}",
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_point_equality() {
|
||||
let left = Point::zero();
|
||||
let right = Point::zero();
|
||||
assert_eq!(left, right);
|
||||
let right = Point::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 12],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
84
src/utils.rs
Normal file
84
src/utils.rs
Normal file
@@ -0,0 +1,84 @@
|
||||
pub fn from_limbs<T>(limbs: Vec<u32>, chunk_size: usize, f: fn(&[u32]) -> T) -> Vec<T> {
|
||||
let points = limbs
|
||||
.chunks(chunk_size)
|
||||
.map(|lmbs| f(lmbs))
|
||||
.collect::<Vec<T>>();
|
||||
points
|
||||
}
|
||||
|
||||
pub fn u32_vec_to_u64_vec(arr_u32: &[u32]) -> Vec<u64> {
|
||||
let len = (arr_u32.len() / 2) as usize;
|
||||
let mut arr_u64 = vec![0u64; len];
|
||||
|
||||
for i in 0..len {
|
||||
arr_u64[i] = u64::from(arr_u32[i * 2]) | (u64::from(arr_u32[i * 2 + 1]) << 32);
|
||||
}
|
||||
|
||||
arr_u64
|
||||
}
|
||||
|
||||
pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
|
||||
let len = arr_u64.len() * 2;
|
||||
let mut arr_u32 = vec![0u32; len];
|
||||
|
||||
for i in 0..arr_u64.len() {
|
||||
arr_u32[i * 2] = arr_u64[i] as u32;
|
||||
arr_u32[i * 2 + 1] = (arr_u64[i] >> 32) as u32;
|
||||
}
|
||||
|
||||
arr_u32
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_ff::BigInteger256;
|
||||
|
||||
use crate::field::ScalarField;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_u32_vec_to_u64_vec() {
|
||||
let arr_u32 = [1, 0x0fffffff, 3, 0x2fffffff, 5, 0x4fffffff, 7, 0x6fffffff];
|
||||
|
||||
let s = ScalarField::from_ark_transmute(BigInteger256::new(
|
||||
u32_vec_to_u64_vec(&arr_u32).try_into().unwrap(),
|
||||
))
|
||||
.limbs();
|
||||
|
||||
assert_eq!(arr_u32, s);
|
||||
|
||||
let arr_u64_expected = [
|
||||
0x0FFFFFFF00000001,
|
||||
0x2FFFFFFF00000003,
|
||||
0x4FFFFFFF00000005,
|
||||
0x6FFFFFFF00000007,
|
||||
];
|
||||
|
||||
assert_eq!(
|
||||
u32_vec_to_u64_vec(&arr_u32),
|
||||
arr_u64_expected,
|
||||
"{:016X?}",
|
||||
u32_vec_to_u64_vec(&arr_u32)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_u64_vec_to_u32_vec() {
|
||||
let arr_u64 = [
|
||||
0x2FFFFFFF00000001,
|
||||
0x4FFFFFFF00000003,
|
||||
0x6FFFFFFF00000005,
|
||||
0x8FFFFFFF00000007,
|
||||
];
|
||||
|
||||
let arr_u32_expected = [1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7, 0x8fffffff];
|
||||
|
||||
assert_eq!(
|
||||
u64_vec_to_u32_vec(&arr_u64),
|
||||
arr_u32_expected,
|
||||
"{:016X?}",
|
||||
u64_vec_to_u32_vec(&arr_u64)
|
||||
);
|
||||
}
|
||||
}
|
||||
32
test_vectors/B0.csv
Normal file
32
test_vectors/B0.csv
Normal file
@@ -0,0 +1,32 @@
|
||||
0e1015f11a2295fca33b59c866fdc6edef1dd73dc4bc3a6af17c58767101d7c16be8dfdd958a2e937a15d4ea9d70d67813b59a9b95edf4945609e788b212fe95667e001b59078a09a077982e132be58d0f469f481ef483be3c02aa9d9d2d0f44
|
||||
077afd81860c0b18b53602c32718f13b201b613551a451df725e6111e8d4a05bf30b30f76648feb841c6af08631901860b0d15ae8e47f8fc331b311b9bac27fa441cc931f3e8494a6e00f8e057a900672be351a707e503b1e36d39a825a5d146
|
||||
15e48ca8037c5d6f7a5899f01e730fdefa83cfd79df05a356f43079e0ad3f335706a05602c6235e4df1d534fb582d39c024d8d036eeb3914f603bf15f68283a9f4291bb4dacab609432579ecdd77da53f25ce08ee531ea7362046145ea314519
|
||||
18df179320a2ccdbaa1c1b9e4645ef982dc69dcbbb8fbb2568478bdbef48379a3132a9e35c4b68bd83e26f9318835a0a104046222b49e948ab2ea1af24751cfe30ef4e5925f89ba879e13e03f508c4bef0e8cb9cb281c15a329660e3f9a619d6
|
||||
02252df8a50a5b2f5ae88572841a08580269257794fa7d1d7f2f4bc3440965588074b3a8512a8f265ec6b624a0ba64ba188bc88ff51cb97cdce03eb1b307a5df01ff71919ddc4d6fc206f144ecd766c6b8a7b09160b5f80e5782929dfa4013de
|
||||
04038a1dc80c7e4510abbdd4e47e8dcd20f7a815868f67de979f9420e141896fc3b94bf72010ccc86e47583588baff2206d3ecd127ac43640dedd8c030fb28dea75a1276df341d59ce1ec58a1fd544f06800d8b7fe1290260dde8e5102378a76
|
||||
0086af32657ce4ffa584cb6618effc8c72acf4dfae11d5b375fa1f721f273ecf400ce0ac9637e704a12e453bbf7ccc8316b6a4a920a8eea0013fadaef1c58c3ac90f73b628a723d66130252b248595d4abec26de21e4f1da8160a5655dd328f7
|
||||
128750a071c300d37adebb8376d857cbee81ec1c9b0ad18321d970b02e3b15fc82aafbfbba1af75566679bd1ec7cb6c71825cbebc5764e1c0fa7b0a3b3e9ac7033c9ed4b1ef93725b3d9caf637df8fb27cb45a1bc87aedc965c8ad750949d511
|
||||
142827e92db3706779d5e653ae1a42dc969da1644dd406b8c87eeb3105cc1c2bad31f177b4888d1b41b39257468ef84a0834c37d8d5de3ef059508445535be057720c8f505618d72f9365c38e0f617a4a860afbea08e5c4f6c10ccb260e234d9
|
||||
0acb2a26095113da74009be42bc4d1f3b9778b8a103893a14814e1b2e4a1e673afdf1ad3adaa764eca7f84ad93b78a1a0927ce95549917cc3fca1e9eb1c3c92493f9fabd4f590cf0001417548eb0fcfcb64c75a556140781e0ca5a4adbc5d55e
|
||||
100aecf9189b4696c2c0cd3f6d55e539e5c5bcd86ebae0f54b159adf4d6cf0345c72d8d850f01275e9ebb5de45fb95530e7af021cbfaaf66797cd0fbcb91da22123f81943e93d77a98e87d0f71f7bae5e575247bea1898ab3fe5d7de5751565d
|
||||
0d5ef2ccb6a3ef5868578a567abce351690d3901da5c862ffb44608646c9ef201ab837aa61da48739512f81fdc5ee09508f51323f0b7ab5f9fd71d2742284876e97ba9de2a197c66ffcd7f414a60cb18275c78f2820b5f04031da4fde3ce4311
|
||||
1090f9f8f7b4f8ffe0d3123bad29c7017e73b57b11ac1bf22a154aa1df64478007d9cb226fc90a9a2d82537bdf31e2d413a8e89f5ac091c8759ea6d04b6ea0815ef933f6341c709fd0e2e5f7111640743059a426b47a4ef67910f5178e32f8d3
|
||||
0cf7ef3d29867dbc46350cbc3079dc2c87332132f12476b6ac7d3f9c28a3e52eeed50196b617c97cc2fd33fcd300cb381704ef5ea2f2000480cf3917834be0ea88e987c0e977493c8a538a3066782169355cea9cd52d0d6a61004dabc0b5a4b8
|
||||
0b0c9947984b5e1c18ea269f2b95cf7fe77d8a85e266834b906d845446ac571caeabdb454eefd727abac85cbc3ef7361029de52ea0854f65b6090215f0bf5c0bdddf7009de35d7950c4b98de9796cea78669995ab736fb43a13184d572a18263
|
||||
113428126cf7983a57e1e1e04295a2af45fad2cb93a336e83bed47befd4d9a2e722e729e066bd832a5967f3047d7640301af0cef733fcd53e5fc9ce50ae5dfc28ee90fd745a06dca5ebff3fe20e6d6e32dca3cf3c1a1711a47f91b1d18d4aab9
|
||||
097d0380411120ab4244d8a253c464bdaaf9577a41d44ee1bf84990eba070d0ddae15b1e0811e773a1cf7f9e3e7ec8230d84418cf6972b565cf446ea05cf7adec3d1e0b13041df4ea4896a99ed3a6db02c211869175957c00451b6feacdd5478
|
||||
17680474d1d5bb309200042beef6f78e85e31e83d52f30d6c5e2d4412ddea30c79770b0a28b2178157c38ee09ae06cb902a77ca9ab57d0ddcd08e06efd5d360795bdf11c8ffc2816bd3f0198a84d2953d90092e55c49415cfd3275d708dafb08
|
||||
15530a4602967f1a021d5353aabf19b98d6dac70e7aa4ce23295ead2cd2a4c776595954e033704f7227b3063d36180d613989f8ff9a2bda4e859c52e7cb4e8b58dce871b0cc05a8b69b06176b197aaa602aa0a96dd10b2892c9a56d284b7ac5f
|
||||
0d93e873c284d9dec23f1f6be48a90a02ce94ceb41cd3587f13519675a55bb284dd80690d130c33418aac63085a5c6c216fd6edfa3b80684754dddc478856ede410de72758663a4eee41007f6bbafe24f78ecd76a45895bafcc70f2693e626af
|
||||
0459c4536669a7d08d62500e4d3891ff5f1570fd2b7aa3382274a155dc84d83b44b3095118ab8718b80723cbbe01b400046a83cc7f87eea547bcd581fb6528131c74b8f481b51acaa6ef77f88c48530ab91f84e0bc7023c1d53274f578cffbda
|
||||
0dc5946f18316425962f146862983b1ff0846f5aaf331d9d10cd8f8a9d6d5a9cba03ef367e2697a660bddd6fa1344a7406e6d05b35d4170e3338f0d2f51d5ad24d4b38f60c4e74fd59f7de718b44718c04ceb6e56d340ffd96aee3ee803f4312
|
||||
09e598929faf934dee02811854ea57511333cb2e5b78b2c963fa970243cc7df3486d5df1136160494e0ccc966826a9f707141e435c68de0db9a382750e6fdeb09c50330e02b301acbfc938140ddf0bb0d3d45ee19250089e250372cfe2cc6499
|
||||
07453078fa613dbc35babedb9083060adc2591d9574d3d021fb8dc9c313243a9efac6d98ebc2e108cebe20c73aba43990e32b4b0b9134ec9ce759298c1b1c558a06591c98ae99316d2ac9639d4a1c00edf609e7b75f0d9cf9c75bf5596f9f819
|
||||
131375ab69ff8399d1df1708ddd8a5fe5e2ecbae142a6872bbc51e00d72b69a83e7bfe56f240ee5bb7ac7b2424341e5d117ff1576516c5a4f5dc90180795ddf195dbf0cfc0167c2bfcbbc7b2776865d1a7629b4c19c5b55f8a70034d398c12cb
|
||||
16bd528d0b13fd9446d99bfa77c378810d721c5e906f57eea11881f430ccf4cae2d473a0947d9499f81e2e996a03e19f044c16bb47b2cf21be406086ebe492ac5e84890767610ce9e69df12c0292aa2cf390ce3acd6d4a91b480334c9e494f05
|
||||
11cf5f735c95fa58474020d7472cd4c6da864270c252572062c8d0fef2c3dd8e53e64df814ff0f03959ed1951131c51615fc7b4d087e693dea371f0006c7466d7af8f55a6bc1448f45b9d18d1e1893e44d70dbbec5a6003c92bc30e9d9db54aa
|
||||
0712ef99cdeaef41a7a9b3f64434cef70404b9493f3540b6369b01a4ebae1f053ee0c9e35775c16a886dc97b8f6e06a51358240b83b4ada0aa70745e5fc7d0de44736f28c591ff1f7eb40e343bca2399f47799d97e738b5e8574f90fa8b60cf6
|
||||
0224d643a94213611f5dcc61e8c5d45f83cfe03fb73f4bfd1cded142d6937991f1d676ba35b097b2c30a776336ed16b90f2a283dfbfd69b77367fb1d2b37dcf8d3af2a5fb8614acf792b5ec4c926f0216d84dcbc625bdcc830abe09ade99ec7d
|
||||
0f247cb4a90667b3033e4f55f958190d8512467c24b20d64d52e8480d109a86ac7856d8b95322a58ea893d52b20a8b0912b87cab80963e2f9912272628e86f4d41b1a74fd4202d4850fc867237152ee6ab62329fc77967af68b01e5e370f306d
|
||||
0f5d1dfecae6761a2338591bb7c00ae460f2d6c7cd8ad133c0c372fc518be90c72f0d4ff5fadc6fc047a241f69e1f17f11fc68e7268e175a7101bff60b148cb11ca5dad679b9282b81540bee60b6e930de895006da7c422db849b8a0f5983544
|
||||
18ee8e9af278f6437ccdd230d06625591144fdc0ea7223a9e074cffef1c12733fbaf61c75309d1c36193d73f1fd9cd750f3a41223fa895ec6f7bb4017af73bedeadd29b73b02a8131de71289d7cffac9696ce0bc183a70dcc88297d9df413cbe
|
||||
|
32
test_vectors/B1.csv
Normal file
32
test_vectors/B1.csv
Normal file
@@ -0,0 +1,32 @@
|
||||
0e1015f11a2295fca33b59c866fdc6edef1dd73dc4bc3a6af17c58767101d7c16be8dfdd958a2e937a15d4ea9d70d67813b59a9b95edf4945609e788b212fe95667e001b59078a09a077982e132be58d0f469f481ef483be3c02aa9d9d2d0f44
|
||||
129668c6910934b170e7150632683e0854a14d82a0bc978550abfe641e7fb606c5b7f56af254bdb5de4782547a645d7e0f26dc9642eb22787e6dd2ce0d202da77f982193d9457a3f29e191b12b33d8ee8c4d1839e596d67eaeed58ad0ecd8d62
|
||||
0aaa35b52a529690ce8060721eb917a497291cb344c47404da294e1d56cc72f40b7fc2800aace508149957172196f0400e8dfe58f99b56cd2ed998aa6fc0d5e5259f27457c6ff2d8bdbf3df78badf0a666abb6e4603fb7b4157b93be135c688a
|
||||
12b8dc3954a418bbf4e026668ebdde2f2b4fda1e8eb61f659bc6c5d7246c78d84c2ea5b7deaec1123afd1b7e276885de096a72f66a468512129ae5cc2b037d930cca329a889368f0e55874b084099168c7e37a5ec92aa6972dfe57dc50b18ae7
|
||||
089fac77961bcbd9d8c3c03c522a47eef1dcdd0237c8b99dde2f403c17d7026722ea68709d59e57a30d5b4836ded2c74042ba4269a4522e1753d0f1c1c4551783cc3281af54597dfa8fe4d23acedaeed3d2aebb74744698324a0af5e0569ec35
|
||||
18e737c73aa6332065912ad5b1e1ad98b8971cbb87568e35a499219b317e24a65190891813b90f98717eefa0b881006f0fd71aa199908f3059c9e1731ec2714a87f34a1b5be07742b30b69c32791b591eabfaf48b8f9253d5bc07bc0ecb93dda
|
||||
001e996ab8ccbcdbd953f61465b1d0e9d428868cd48db26fb4d2e4838ff3beb58e67c92744bab7b35720b48be3205e4709148c2b3ae732455a2e5f1bd43fa62dc368af1febbd248a91e60f171a4d5f3bce6dff6726eb46181a7a3b856b1918e0
|
||||
00b2e3aca8ef819cb6d1024454e32a0570b5403d9fb349da91e5880e4fc605a6dd940089ffde4724680aced0dd3dd1c3093d0d43c80fdf76bca9c8dc3378a85c4ab55c3f4b9cf266d48baacdd3b2c06204429e5f0952b24f932fcfcd95888793
|
||||
0b4fb1e2f5fa53da60eee06542439755e8b8aebc017d8842ad2b8cd7e482ada24c948e48157178783402b9dee0696fd2157f99955da642540a5ed8ed7522f3256c9f146cc7722ef26d544ad07fec9fd24390e6a48f08af56b81ab2a1ec829c65
|
||||
0ae3a4428f07a5e61750ea2d96be4758d7a5f0841e659a0b70152c5990ed025e07c41c191431bc2c26c2e05aee61ea3313838ed1d54260d16bdbc624ba5e86c6547365822844c5fb6496571abdbe0f4bb6d37183be72c387f1f6b60d3f5db4de
|
||||
0d61b4e5e2db6ca78a92d1337be76d7a8c2bd1bb1f6e9e70100b67dcd615180919d83b823aa185fb3fd9e0d6386b125b0a215fb556ae6b25c19637a6ddb0adad4d7ecd9db490451ba8aa9794c64389dab3b1f732881b035b574d60bc93e5c489
|
||||
175ad2db474b40a6ee13bfd575e84d47e6a2e4aa23e8ea890bbb196ca491ab8cbf6bedab4bc724b2004cd71270c8c8130d1606b8885a50542f2cc7d468aeda1882bb891c507d9f162cd2579bee205d08faf8e2d2eb7040e718586610ef1efa2a
|
||||
05e6426510e323ba7237a52e5ea48a58049629dad4cee09f6f647c629b7de7cedf15c8d8b79597dab4867fd2cb2a3cf31284584486e5f6a5dcfe87bca3dda3faf493a5ac399dfdbbde70aaa6fd8ca2f0283a5f73d5dca867dae66f7866adf5f5
|
||||
173e5a6a4f3dd2a40b7ee754b180eb16cc93376d5f2db012ab23de9639a44ecf2782b394c4c662fe84e92b6f59f645bd0575cea7dc237ade9a5d2b3d08e426815e1e765761ea343090ec892e78f04631d61795f2f909878871e8d19dac665281
|
||||
13c0658ce79d60e242fd59432f683145027f90f9f9cc6425f5eb0b1ea50b0b69fa0f46f1ef9f7e92959c63e3852c891a05dbb25c8eab8fe7abdd3a2d9b7abb0f4072769992251f6bf36070291a35a3ceb24ddd72c9e50f6ed06bfa6b443a423f
|
||||
0b53f4f76e4dc283c3c0ab0736c6fefbb6f6246a50296f26d2453fa665ed06a4b701cfc4aad4f794a6418780507de9df03ed0ff7d770219c876750fe70f467c861428d9a15f2de7fd862aba09649f166ebd360fda6082a0af0fa5118ec4baf56
|
||||
097d0380411120ab4244d8a253c464bdaaf9577a41d44ee1bf84990eba070d0ddae15b1e0811e773a1cf7f9e3e7ec8230c7cd05d42e8bb43ee2760cc3d7c31f8a0a56ad3c3433370c2a7680709768873f28ae79599faa83fb5ad490153225633
|
||||
040e8b0db7cd95d7990e957f2eec6584f00bcab13238e96a03d7afb6f19e01ebaf80ae952ad71b5a1739b16fde6d802c0997a2fb001320fb8a8de3cbe18f80cd80a613102264bb406491d29f682f2d0d4e56f14d6f6471b8c69f1a2ec2132152
|
||||
1763079809252588415af9279bc47ce8feec45faaf86a3b7f2df704a94c59a4c99f923abe138649fdf524fc3d6df51510adb43529dfe25a37bf629addd09ed13f2d07b6f5b67adae8c00a93081477e9b55430b0e11fcd5f8442c686044164c4b
|
||||
03577a2e8806686ebc0724e2142ad561568f681fd01978f9b293163d226d3b69e2ab0788ff80ccfbc1320277ddef3c8e13cac16c79c697f37437d2b01b27a7e662e6508fcf40c2bbd47b3e521f5e5b93a98a8c0c90a536af207aea692747d84c
|
||||
19862d425df05395388d26afd4d32f751bd81c624ef2cbab3235146554dca50c0a33a0ee420b825578c1f0b5b5642241102d1add599928fa20086533c5e58e3326812f5cd2663d785cfabde01b9825eee46148b27ca1c702f2ba4790980e2aae
|
||||
048cf1dcaa344232ef3de973611d6f1e2680c6c992cb4f388b1e3090df904e2cc89aa3e4964ddea0dd6bc6bdb838c2870a2239e17dfa5fd1e819c08df193b860313b1eff08079f3a36cafe5d3a815dd496deefacf0cdad1bdf18d27e79173020
|
||||
010bee161f1536868715c5dd76dbbacaf3fab591c7a65e0f1b2b5373cef8d7672ad82a06297eebc2192b8aaede0a94960848be4d8fdbf1b63dea32a1e1a6b6286a71ea39d01730e5f5a204544f8c1107bc95757333588a0add90ac0ac0773505
|
||||
0d208b8967424d332e9bb567eefdada6fcacbe80f640b281383f58a7029e8a630a666d7332e3ac2a9fd5df8676381c3701999ad0cebba84b0ebc4e35ebec64e7a661850e9b344cde7cad066847c8a204d15828da0b58e230586a8b9a55c602cd
|
||||
01afe9d23f881d55a5eaeef03748874a05fde34ab12a8e38fd0c05e576c3e88d240957f0b1983874ed2e38c5fd51ef7f12a9bc86b2febcb726901e044e6338a833670e6e2ffeebcf8a8b991b9ce4be22df47d2ba13a518780bf7e7f294357ca8
|
||||
05bdbc96b636f90fa9d92ea8920d12b2637a45a95b3d3fc04ad6274e8d7388294c858d574d445c61f2e1519ce688210601207a866e44ac51df42e038652236f615ca1b3c43307ea634044ad5cf7e57fc3d1bcf1e65b79b77b1a8ee1de569ed84
|
||||
00050c8d35e077c9a601d4f5c75bc82749b86e86cb51474ad7bb0d0a69753d249c5a136b4922877fd061049d6eb511380de2b58a51923a955c717cc2934c5c14c609f429a7616ca1146859535c5376c344b3514044c88c3743fc61f8004606d7
|
||||
15632e4a830760ecea054c14abbb9071c6a7595a7872f5bb7c6a7f246850b7c544090d4d3d33330bdf2b45a765cf7c9b1028964c48a94ff40e2e039e3cbda61e7ecb948b364d917c35274e23c2502294b03b9daf360aeda538a92a28865ff11f
|
||||
16db3aab206e4d35351e75d1d13001c9c89e6a8c1d8eecad993800080ef3cd73d2aec3826de69634b7e0700ced99c0e30f41b281e5d5d9450278bde3be2bd26c5d417513b1b1f7d8ff19546d31b1fdd20778033f29c40476b2c8ef7e0e1c02af
|
||||
006921c2ddf97dfb1e27e89bcd2386a4b78837338e10b14c58fa4e959c1590988e23a3090eb3495b57728d1bb6272dec00977cad35ef9d5c27757b538ce5a917b2087ee6c942ae07620c45688a57f771ab5d8f745f1e059983e5c94a76ca14d3
|
||||
00ed29c7dc7a5a5202f18b21dde0704519b096be4b39ee9b0b5d31c89e8832bb907ec200cef9976decb12d92c1f827921846bd76a2e22b311720f7a1c8a2731bc580d7a6f14a28f9e83d5b0fbc6be779221241f271f1f9e31c1d384e72ad18dc
|
||||
0f9b236d1dd30b5dd6b11c0416fe4db6c861302986316f8ca2a59a2dfa65e9c8fc8128270d8b686b6293c9cd5173fbdf1467181da2942072b18027c9d10bbdce287ad3db3936678e972996830ac48b14b1e2d7a4292d706a8be84577737f8d66
|
||||
|
32
test_vectors/C.csv
Normal file
32
test_vectors/C.csv
Normal file
File diff suppressed because one or more lines are too long
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user