mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-12 17:07:59 -05:00
Compare commits
47 Commits
rust-inter
...
msm-perfor
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e80c42a0dd | ||
|
|
3c8a1e6f31 | ||
|
|
995af2b290 | ||
|
|
b591301f64 | ||
|
|
b791381641 | ||
|
|
00019166d1 | ||
|
|
4c0791eb98 | ||
|
|
dd4fa8e5f2 | ||
|
|
659c883de2 | ||
|
|
8cc6e32fea | ||
|
|
bfcfa3807c | ||
|
|
df0bdb82fb | ||
|
|
12eb53250c | ||
|
|
d273bf6ffc | ||
|
|
61ddc32310 | ||
|
|
ac5047d0e4 | ||
|
|
ecb36fefeb | ||
|
|
1785e793f1 | ||
|
|
b39b529463 | ||
|
|
86bee3af42 | ||
|
|
dbd5cd4cbb | ||
|
|
a767c93564 | ||
|
|
655f014dc2 | ||
|
|
18c7cad89c | ||
|
|
1e9f628235 | ||
|
|
2e45ed1bd4 | ||
|
|
e828d1da2a | ||
|
|
e3f089f0f3 | ||
|
|
233927668c | ||
|
|
1866df60f1 | ||
|
|
ccc8892a52 | ||
|
|
67e4ee2864 | ||
|
|
6c5fe47e55 | ||
|
|
ed9de3d1e9 | ||
|
|
cb61755c8b | ||
|
|
34a556ac85 | ||
|
|
d01e0dbfb1 | ||
|
|
6aa6fe0c1c | ||
|
|
26f2f5c76c | ||
|
|
434ab70305 | ||
|
|
a64df640de | ||
|
|
1b2b9f2826 | ||
|
|
0a36a545bf | ||
|
|
407273dee3 | ||
|
|
f55bd30e13 | ||
|
|
071c24ce5a | ||
|
|
08c34a5183 |
27
.github/workflows/build.yml
vendored
27
.github/workflows/build.yml
vendored
@@ -1,20 +1,31 @@
|
||||
name: Build
|
||||
|
||||
on:
|
||||
on:
|
||||
pull_request:
|
||||
types:
|
||||
- ready_for_review
|
||||
- opened
|
||||
branches:
|
||||
- "main"
|
||||
- "dev"
|
||||
- main
|
||||
- dev
|
||||
paths:
|
||||
- "icicle/**"
|
||||
- "src/**"
|
||||
- "Cargo.toml"
|
||||
- "build.rs"
|
||||
- icicle/**
|
||||
- src/**
|
||||
- Cargo.toml
|
||||
- build.rs
|
||||
push:
|
||||
branches-ignore:
|
||||
- main
|
||||
- dev
|
||||
paths:
|
||||
- icicle/**
|
||||
- src/**
|
||||
- Cargo.toml
|
||||
- build.rs
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ARCH_TYPE: sm_70
|
||||
DEFAULT_STREAM: per-thread
|
||||
|
||||
jobs:
|
||||
build-linux:
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -5,6 +5,8 @@
|
||||
*.cubin
|
||||
*.bin
|
||||
*.fatbin
|
||||
*.nsys-rep
|
||||
*.ncu-rep
|
||||
**/target
|
||||
**/.vscode
|
||||
**/.*lock*csv#
|
||||
|
||||
48
Cargo.toml
48
Cargo.toml
@@ -1,9 +1,49 @@
|
||||
[workspace]
|
||||
name = "icicle"
|
||||
[package]
|
||||
name = "icicle-utils"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
description = "An implementation of the Ingonyama CUDA Library"
|
||||
homepage = "https://www.ingonyama.com"
|
||||
repository = "https://github.com/ingonyama-zk/icicle"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
path = "benches/ntt.rs"
|
||||
harness = false
|
||||
|
||||
members = ["icicle-core", "bls12-381", "bls12-377", "bn254"]
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
path = "benches/msm.rs"
|
||||
harness = false
|
||||
|
||||
[dependencies]
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-381 = "0.3.0"
|
||||
ark-bls12-377 = "0.3.0"
|
||||
ark-bn254 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
default = ["bls12_381"]
|
||||
bls12_381 = ["ark-bls12-381/curve"]
|
||||
g2 = []
|
||||
|
||||
54
README.md
54
README.md
@@ -4,6 +4,13 @@
|
||||
|
||||

|
||||
|
||||
|
||||
<div align="center">
|
||||

|
||||

|
||||

|
||||
</div>
|
||||
|
||||
## Background
|
||||
|
||||
Zero Knowledge Proofs (ZKPs) are considered one of the greatest achievements of modern cryptography. Accordingly, ZKPs are expected to disrupt a number of industries and will usher in an era of trustless and privacy preserving services and infrastructure.
|
||||
@@ -36,7 +43,7 @@ ICICLE is a CUDA implementation of general functions widely used in ZKP. ICICLE
|
||||
|
||||
```sh
|
||||
mkdir -p build
|
||||
nvcc -o build/<ENTER_DIR_NAME> ./icicle/appUtils/ntt/ntt.cu ./icicle/appUtils/msm/msm.cu ./icicle/appUtils/vector_manipulation/ve_mod_mult.cu ./icicle/primitives/projective.cu -lib -arch=native
|
||||
nvcc -o build/<binary_name> ./icicle/curves/index.cu -lib -arch=native
|
||||
```
|
||||
|
||||
### Testing the CUDA code
|
||||
@@ -95,52 +102,64 @@ Supporting additional curves can be done as follows:
|
||||
|
||||
Create a JSON file with the curve parameters. The curve is defined by the following parameters:
|
||||
- ``curve_name`` - e.g. ``bls12_381``.
|
||||
- ``modolus_p`` - scalar field modolus (in decimal).
|
||||
- ``bit_count_p`` - number of bits needed to represent `` modolus_p`` .
|
||||
- ``limb_p`` - number of bytes needed to represent `` modolus_p`` (rounded).
|
||||
- ``modulus_p`` - scalar field modulus (in decimal).
|
||||
- ``bit_count_p`` - number of bits needed to represent `` modulus_p`` .
|
||||
- ``limb_p`` - number of bytes needed to represent `` modulus_p`` (rounded).
|
||||
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
|
||||
- ``modolus_q`` - base field modulus (in decimal).
|
||||
- ``bit_count_q`` - number of bits needed to represent `` modolus_q`` .
|
||||
- ``limb_q`` number of bytes needed to represent `` modolus_p`` (rounded).
|
||||
- ``modulus_q`` - base field modulus (in decimal).
|
||||
- ``bit_count_q`` - number of bits needed to represent `` modulus_q`` .
|
||||
- ``limb_q`` number of bytes needed to represent `` modulus_p`` (rounded).
|
||||
- ``weierstrass_b`` - Weierstrauss constant of the curve.
|
||||
- ``weierstrass_b_g2_re`` - Weierstrauss real constant of the g2 curve.
|
||||
- ``weierstrass_b_g2_im`` - Weierstrauss imaginary constant of the g2 curve.
|
||||
- ``gen_x`` - x-value of a generator element for the curve.
|
||||
- ``gen_y`` - y-value of a generator element for the curve.
|
||||
- ``gen_x_re`` - real x-value of a generator element for the g2 curve.
|
||||
- ``gen_x_im`` - imaginary x-value of a generator element for the g2 curve.
|
||||
- ``gen_y_re`` - real y-value of a generator element for the g2 curve.
|
||||
- ``gen_y_im`` - imaginary y-value of a generator element for the g2 curve.
|
||||
|
||||
Here's an example for BLS12-381.
|
||||
```
|
||||
{
|
||||
"curve_name" : "bls12_381",
|
||||
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"modulus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"bit_count_p" : 255,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"modulus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"bit_count_q" : 381,
|
||||
"limb_q" : 12,
|
||||
"weierstrass_b" : 4,
|
||||
"weierstrass_b_g2_re":4,
|
||||
"weierstrass_b_g2_im":4,
|
||||
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
|
||||
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
|
||||
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569,
|
||||
"gen_x_re" : 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
|
||||
"gen_x_im" : 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
|
||||
"gen_y_re" : 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
|
||||
"gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582
|
||||
}
|
||||
```
|
||||
|
||||
Save the parameters JSON file in ``curve_parameters``.
|
||||
Save the parameters JSON file under the ``curve_parameters`` directory.
|
||||
|
||||
Then run the Python script ``new_curve_script.py `` from the main icicle folder:
|
||||
Then run the Python script ``new_curve_script.py `` from the root folder:
|
||||
|
||||
```
|
||||
python3 ./curve_parameters/new_curve_script_rust.py ./curve_parameters/bls12_381.json
|
||||
python3 ./curve_parameters/new_curve_script.py ./curve_parameters/bls12_381.json
|
||||
```
|
||||
|
||||
The script does the following:
|
||||
- Creates a folder in ``icicle/curves`` with the curve name, which contains all of the files needed for the supported operations in cuda.
|
||||
- Adds the curve exported operations to ``icicle/curves/index.cu``.
|
||||
- Adds the curve's exported operations to ``icicle/curves/index.cu``.
|
||||
- Creates a file with the curve name in ``src/curves`` with the relevant objects for the curve.
|
||||
- Creates a test file with the curve name in ``src``.
|
||||
|
||||
Testing the new curve could be done by running the tests in ``tests_curve_name`` (e.g. ``tests_bls12_381``).
|
||||
## Contributions
|
||||
|
||||
Join our [Discord Server](https://discord.gg/Y4SkbDf2Ff) and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
|
||||
Join our [Discord Server][DISCORD] and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
|
||||
|
||||
### Hall of Fame
|
||||
|
||||
@@ -153,13 +172,16 @@ ICICLE is distributed under the terms of the MIT License.
|
||||
See [LICENSE-MIT][LMIT] for details.
|
||||
|
||||
<!-- Begin Links -->
|
||||
[BLS12-381]: ./icicle/curves/bls12_381.cuh
|
||||
[BLS12-381]: ./icicle/curves/bls12_381/supported_operations.cu
|
||||
[BLS12-377]: ./icicle/curves/bls12_377/supported_operations.cu
|
||||
[BN254]: ./icicle/curves/bn254/supported_operations.cu
|
||||
[NVCC]: https://docs.nvidia.com/cuda/#installation-guides
|
||||
[CRV_TEMPLATE]: ./icicle/curves/curve_template.cuh
|
||||
[CRV_CONFIG]: ./icicle/curves/curve_config.cuh
|
||||
[B_SCRIPT]: ./build.rs
|
||||
[FDI]: https://github.com/ingonyama-zk/fast-danksharding
|
||||
[LMIT]: ./LICENSE
|
||||
[DISCORD]: https://discord.gg/Y4SkbDf2Ff
|
||||
[googletest]: https://github.com/google/googletest/
|
||||
|
||||
<!-- End Links -->
|
||||
|
||||
52
benches/msm.rs
Normal file
52
benches/msm.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
extern crate criterion;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use icicle_utils::test_bls12_381::{
|
||||
commit_batch_bls12_381, generate_random_points_bls12_381, set_up_scalars_bls12_381,
|
||||
};
|
||||
use icicle_utils::utils::*;
|
||||
#[cfg(feature = "g2")]
|
||||
use icicle_utils::{commit_batch_g2, field::ExtensionField};
|
||||
|
||||
use rustacuda::prelude::*;
|
||||
|
||||
const LOG_MSM_SIZES: [usize; 1] = [12];
|
||||
const BATCH_SIZES: [usize; 2] = [128, 256];
|
||||
|
||||
fn bench_msm(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("MSM");
|
||||
for log_msm_size in LOG_MSM_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let msm_size = 1 << log_msm_size;
|
||||
let (scalars, _, _) = set_up_scalars_bls12_381(msm_size, 0, false);
|
||||
let batch_scalars = vec![scalars; batch_size].concat();
|
||||
let mut d_scalars = DeviceBuffer::from_slice(&batch_scalars[..]).unwrap();
|
||||
|
||||
let points = generate_random_points_bls12_381(msm_size, get_rng(None));
|
||||
let batch_points = vec![points; batch_size].concat();
|
||||
let mut d_points = DeviceBuffer::from_slice(&batch_points[..]).unwrap();
|
||||
|
||||
#[cfg(feature = "g2")]
|
||||
let g2_points = generate_random_points::<ExtensionField>(msm_size, get_rng(None));
|
||||
#[cfg(feature = "g2")]
|
||||
let g2_batch_points = vec![g2_points; batch_size].concat();
|
||||
#[cfg(feature = "g2")]
|
||||
let mut d_g2_points = DeviceBuffer::from_slice(&g2_batch_points[..]).unwrap();
|
||||
|
||||
group.sample_size(30).bench_function(
|
||||
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch_bls12_381(&mut d_points, &mut d_scalars, batch_size)),
|
||||
);
|
||||
|
||||
#[cfg(feature = "g2")]
|
||||
group.sample_size(10).bench_function(
|
||||
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(msm_benches, bench_msm);
|
||||
criterion_main!(msm_benches);
|
||||
34
benches/ntt.rs
Normal file
34
benches/ntt.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
extern crate criterion;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use icicle_utils::test_bls12_381::{interpolate_scalars_batch_bls12_381, interpolate_points_batch_bls12_381, set_up_scalars_bls12_381, set_up_points_bls12_381};
|
||||
|
||||
|
||||
const LOG_NTT_SIZES: [usize; 1] = [15];
|
||||
const BATCH_SIZES: [usize; 2] = [8, 16];
|
||||
|
||||
fn bench_ntt(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("NTT");
|
||||
for log_ntt_size in LOG_NTT_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let ntt_size = 1 << log_ntt_size;
|
||||
let (_, mut d_evals, mut d_domain) = set_up_scalars_bls12_381(ntt_size * batch_size, log_ntt_size, true);
|
||||
let (_, mut d_points_evals, _) = set_up_points_bls12_381(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
group.sample_size(100).bench_function(
|
||||
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
|
||||
group.sample_size(10).bench_function(
|
||||
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(ntt_benches, bench_ntt);
|
||||
criterion_main!(ntt_benches);
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "bls12-377"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { path = "../icicle-core" }
|
||||
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-377 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
g2 = []
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,106 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
|
||||
use crate::basic_structs::scalar::ScalarT;
|
||||
use crate::basic_structs::field::Field;
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarField;
|
||||
impl Field<8> for ScalarField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct BaseField;
|
||||
impl Field<12> for BaseField {
|
||||
const MODOLUS: [u32; 12] = [0x0;12];
|
||||
}
|
||||
|
||||
|
||||
pub type Scalar = ScalarT<ScalarField,8>;
|
||||
impl Default for Scalar {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Scalar{}
|
||||
|
||||
|
||||
pub type Base = ScalarT<BaseField,12>;
|
||||
impl Default for Base {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Base{}
|
||||
|
||||
pub type Point = PointT<Base>;
|
||||
pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
@@ -1,798 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
extern "C" {
|
||||
fn msm_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn msm_batch_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
batch_size: usize,
|
||||
msm_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
|
||||
|
||||
fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ntt_batch_cuda(
|
||||
inout: *mut Scalar,
|
||||
arr_size: usize,
|
||||
n: usize,
|
||||
inverse: bool,
|
||||
) -> c_int;
|
||||
|
||||
fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
|
||||
|
||||
fn interpolate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_batch_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_batch_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_point(
|
||||
inout: *mut Point,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_scalar(
|
||||
inout: *mut Scalar,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn matrix_vec_mod_mult(
|
||||
matrix_flattened: *const Scalar,
|
||||
input: *const Scalar,
|
||||
output: *mut Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
}
|
||||
|
||||
pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = Point::zero();
|
||||
unsafe {
|
||||
msm_cuda(
|
||||
&mut ret as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
scalars.len(),
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn msm_batch(
|
||||
points: &[PointAffineNoInfinity],
|
||||
scalars: &[Scalar],
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> Vec<Point> {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = vec![Point::zero(); batch_size];
|
||||
|
||||
unsafe {
|
||||
msm_batch_cuda(
|
||||
&mut ret[0] as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
batch_size,
|
||||
count / batch_size,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn commit(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBox<Point> {
|
||||
let mut res = DeviceBox::new(&Point::zero()).unwrap();
|
||||
unsafe {
|
||||
commit_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn commit_batch(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
|
||||
unsafe {
|
||||
commit_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
|
||||
let ret_code = unsafe {
|
||||
ntt_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
ret_code
|
||||
}
|
||||
|
||||
pub fn ntt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, false);
|
||||
}
|
||||
|
||||
pub fn intt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal_batch(
|
||||
values: &mut [Scalar],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ntt_batch_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
|
||||
unsafe {
|
||||
ecntt_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, false, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, true, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal_batch(
|
||||
values: &mut [Point],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ecntt_batch_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
|
||||
unsafe {
|
||||
DeviceBuffer::from_raw_parts(build_domain_cuda(
|
||||
domain_size,
|
||||
logn,
|
||||
inverse,
|
||||
0
|
||||
), domain_size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn reverse_order_scalars(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_scalars_batch(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_batch_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
) {
|
||||
unsafe { reverse_order_points_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points_batch(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_points_batch_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_point(
|
||||
a as *mut _ as *mut Point,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_scalar(
|
||||
a as *mut _ as *mut Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply a matrix by a scalar:
|
||||
// `a` - flattenned matrix;
|
||||
// `b` - vector to multiply `a` by;
|
||||
pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
|
||||
let mut c = Vec::with_capacity(b.len());
|
||||
for i in 0..b.len() {
|
||||
c.push(Scalar::zero());
|
||||
}
|
||||
unsafe {
|
||||
matrix_vec_mod_mult(
|
||||
a as *const _ as *const Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
c.as_mut_slice() as *mut _ as *mut Scalar,
|
||||
b.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
|
||||
let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
|
||||
unsafe { buf_cpy.copy_from(buf) };
|
||||
return buf_cpy;
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
|
||||
fn set_up_device() {
|
||||
// Set up the context, load the module, and create a stream to run kernels in.
|
||||
rustacuda::init(CudaFlags::empty()).unwrap();
|
||||
let device = Device::get_device(0).unwrap();
|
||||
let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_random_points(
|
||||
count: usize,
|
||||
mut rng: Box<dyn RngCore>,
|
||||
) -> Vec<PointAffineNoInfinity> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_377::rand(&mut rng)).to_xy_strip_z())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_377::rand(&mut rng)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
|
||||
(0..count)
|
||||
.map(|_| Scalar::from_ark(Fr_BLS12_377::rand(&mut rng).into_repr()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalar
|
||||
let vector = generate_random_points_proj(test_size, get_rng(seed));
|
||||
let mut vector_mut = vector.clone();
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalars
|
||||
let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
pub mod test_bls12_377;
|
||||
pub mod basic_structs;
|
||||
pub mod from_cuda;
|
||||
pub mod curve_structs;
|
||||
@@ -1,816 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
pub use crate::basic_structs::scalar::ScalarTrait;
|
||||
pub use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
|
||||
impl Scalar {
|
||||
pub fn to_biginteger254(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_biginteger256(ark: BigInteger256) -> Self {
|
||||
Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
|
||||
}
|
||||
|
||||
pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
|
||||
Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> Fr_BLS12_377 {
|
||||
unsafe { std::mem::transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: &Fr_BLS12_377) -> Scalar {
|
||||
unsafe { std::mem::transmute_copy(v) }
|
||||
}
|
||||
|
||||
pub fn to_ark_mod_p(&self) -> Fr_BLS12_377 {
|
||||
Fr_BLS12_377::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> Fr_BLS12_377 {
|
||||
Fr_BLS12_377::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
|
||||
}
|
||||
|
||||
pub fn from_ark(v: BigInteger256) -> Scalar {
|
||||
Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Base {
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Point {
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_377 {
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_377 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_377::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_377) -> Point {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point {
|
||||
x: Base::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: Base::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: Base::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity {
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(Fq_BLS12_377::new(self.x.to_ark()), Fq_BLS12_377::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(
|
||||
Fq_BLS12_377::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_377::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BLS12_377) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(p.x.into_repr()),
|
||||
y: Base::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(ark_affine.x.into_repr()),
|
||||
y: Base::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests_bls12_377 {
|
||||
use std::ops::Add;
|
||||
use ark_bls12_377::{Fr, G1Affine, G1Projective};
|
||||
use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
|
||||
use ark_ff::{FftField, Field, Zero, PrimeField};
|
||||
use ark_std::UniformRand;
|
||||
use rustacuda::prelude::{DeviceBuffer, CopyDestination};
|
||||
use crate::curve_structs::{Point, Scalar, Base};
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
|
||||
|
||||
fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
|
||||
let mut rng = ark_std::rand::thread_rng();
|
||||
let mut points_ga: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..nof_elements {
|
||||
let aff = G1Projective::rand(&mut rng);
|
||||
points_ga.push(aff);
|
||||
}
|
||||
points_ga
|
||||
}
|
||||
|
||||
fn ecntt_arc_naive(
|
||||
points: &Vec<G1Projective>,
|
||||
size: usize,
|
||||
inverse: bool,
|
||||
) -> Vec<G1Projective> {
|
||||
let mut result: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..size {
|
||||
result.push(G1Projective::zero());
|
||||
}
|
||||
let rou: Fr;
|
||||
if !inverse {
|
||||
rou = Fr::get_root_of_unity(size).unwrap();
|
||||
} else {
|
||||
rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
|
||||
}
|
||||
for k in 0..size {
|
||||
for l in 0..size {
|
||||
let pow: [u64; 1] = [(l * k).try_into().unwrap()];
|
||||
let mul_rou = Fr::pow(&rou, &pow);
|
||||
result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
|
||||
}
|
||||
}
|
||||
if inverse {
|
||||
let size2 = size as u64;
|
||||
for k in 0..size {
|
||||
let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
|
||||
result[k] = result[k].into_affine().mul(multfactor);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
|
||||
let mut eq = true;
|
||||
for i in 0..points.len() {
|
||||
if points2[i].ne(&points[i]) {
|
||||
eq = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return eq;
|
||||
}
|
||||
|
||||
fn test_naive_ark_ecntt(size: usize) {
|
||||
let points = random_points_ark_proj(size);
|
||||
let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
|
||||
let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
|
||||
assert!(!check_eq(&result2, &result1));
|
||||
assert!(check_eq(&result2, &points));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_msm() {
|
||||
let test_sizes = [6, 9];
|
||||
|
||||
for pow2 in test_sizes {
|
||||
let count = 1 << pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points = generate_random_points(count, get_rng(seed));
|
||||
let scalars = generate_random_scalars(count, get_rng(seed));
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
|
||||
let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
|
||||
|
||||
assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
|
||||
assert_eq!(msm_result.to_ark(), msm_result_ark);
|
||||
assert_eq!(
|
||||
msm_result.to_ark_affine(),
|
||||
Point::from_ark(msm_result_ark).to_ark_affine()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_msm() {
|
||||
for batch_pow2 in [2, 4] {
|
||||
for pow2 in [4, 6] {
|
||||
let msm_size = 1 << pow2;
|
||||
let batch_size = 1 << batch_pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
|
||||
let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
|
||||
|
||||
let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let expected: Vec<_> = point_r_ark
|
||||
.chunks(msm_size)
|
||||
.zip(scalars_r_ark.chunks(msm_size))
|
||||
.map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
|
||||
.collect();
|
||||
|
||||
let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_commit() {
|
||||
let test_size = 1 << 8;
|
||||
let seed = Some(0);
|
||||
let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
|
||||
let mut points = generate_random_points(test_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
|
||||
let mut h_commit_result = Point::zero();
|
||||
d_commit_result.copy_to(&mut h_commit_result).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
assert_ne!(msm_result, Point::zero());
|
||||
assert_ne!(h_commit_result, Point::zero());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_commit() {
|
||||
let batch_size = 4;
|
||||
let test_size = 1 << 12;
|
||||
let seed = Some(0);
|
||||
let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
|
||||
let points = generate_random_points(test_size * batch_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm_batch(&points, &scalars, batch_size, 0);
|
||||
let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
|
||||
let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
|
||||
d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
for h in h_commit_result {
|
||||
assert_ne!(h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 3;
|
||||
|
||||
let scalars = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut ntt_result = scalars.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
assert_ne!(ntt_result, scalars);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
intt(&mut intt_result, 0);
|
||||
|
||||
assert_eq!(intt_result, scalars);
|
||||
|
||||
//ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size, get_rng(seed));
|
||||
|
||||
test_naive_ark_ecntt(test_size);
|
||||
|
||||
assert!(points_proj[0].to_ark().into_affine().is_on_curve());
|
||||
|
||||
//naive ark
|
||||
let points_proj_ark = points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark())
|
||||
.collect::<Vec<G1Projective>>();
|
||||
|
||||
let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
|
||||
|
||||
let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
|
||||
|
||||
assert_eq!(points_proj_ark, iecntt_result_naive);
|
||||
|
||||
//ingo gpu
|
||||
let mut ecntt_result = points_proj.to_vec();
|
||||
ecntt(&mut ecntt_result, 0);
|
||||
|
||||
assert_ne!(ecntt_result, points_proj);
|
||||
|
||||
let mut iecntt_result = ecntt_result.clone();
|
||||
iecntt(&mut iecntt_result, 0);
|
||||
|
||||
assert_eq!(
|
||||
iecntt_result_naive,
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
assert_eq!(
|
||||
iecntt_result
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>(),
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt_batch() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let batches = 4;
|
||||
|
||||
let scalars_batch: Vec<Scalar> =
|
||||
generate_random_scalars(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result = scalars_batch.clone();
|
||||
|
||||
// do batch ntt
|
||||
ntt_batch(&mut ntt_result, test_size, 0);
|
||||
|
||||
let mut ntt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do ntt for every chunk
|
||||
for i in 0..batches {
|
||||
ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
|
||||
ntt(&mut ntt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ntt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_vec_of_vec[i],
|
||||
ntt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
// check that ntt output is different from input
|
||||
assert_ne!(ntt_result, scalars_batch);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
// do batch intt
|
||||
intt_batch(&mut intt_result, test_size, 0);
|
||||
|
||||
let mut intt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do intt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
|
||||
intt(&mut intt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the intt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_vec_of_vec[i],
|
||||
intt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result, scalars_batch);
|
||||
|
||||
// //ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result_points = points_proj.clone();
|
||||
|
||||
// do batch ecintt
|
||||
ecntt_batch(&mut ntt_result_points, test_size, 0);
|
||||
|
||||
let mut ntt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
|
||||
ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_points_vec_of_vec[i],
|
||||
ntt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_ne!(ntt_result_points, points_proj);
|
||||
|
||||
let mut intt_result_points = ntt_result_points.clone();
|
||||
|
||||
// do batch ecintt
|
||||
iecntt_batch(&mut intt_result_points, test_size, 0);
|
||||
|
||||
let mut intt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
// do ecintt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
|
||||
iecntt(&mut intt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ecintt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_points_vec_of_vec[i],
|
||||
intt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result_points, points_proj);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_interpolation() {
|
||||
let log_test_size = 7;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
|
||||
intt(&mut evals_mut, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 10;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
intt_batch(&mut evals_mut, test_size, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_interpolation() {
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_points(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
|
||||
iecntt(&mut evals_mut[..], 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_points_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
iecntt_batch(&mut evals_mut[..], test_size, 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation() {
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size.. (1 << log_test_domain_size) {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation() {
|
||||
let batch_size = 6;
|
||||
let log_test_domain_size = 8;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation() {
|
||||
let log_test_domain_size = 7;
|
||||
let coeff_size = 1 << 7;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size..(1 << log_test_domain_size) {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
for i in 0..coeff_size {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation() {
|
||||
let batch_size = 4;
|
||||
let log_test_domain_size = 6;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 5;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
for i in j * domain_size..(j * domain_size + coeff_size) {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_trivial_coset() {
|
||||
// checks that the evaluations on the subgroup is the same as on the coset generated by 1
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
|
||||
let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_evals_coset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
for i in 0..test_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 2;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
for i in 0..test_size * batch_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
// testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
|
||||
#[test]
|
||||
fn test_matrix_multiplication() {
|
||||
let seed = None; // some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let rou = Fr::get_root_of_unity(test_size).unwrap();
|
||||
let matrix_flattened: Vec<Scalar> = (0..test_size).map(
|
||||
|row_num| { (0..test_size).map(
|
||||
|col_num| {
|
||||
let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
|
||||
Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
|
||||
}).collect::<Vec<Scalar>>()
|
||||
}).flatten().collect::<Vec<_>>();
|
||||
let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
|
||||
let mut ntt_result = vector.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
// we don't use the same roots of unity as arkworks, so the results are permutations
|
||||
// of one another and the only guaranteed fixed scalars are the following ones:
|
||||
assert_eq!(result[0], ntt_result[0]);
|
||||
assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_scalar_mul() {
|
||||
let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
|
||||
let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
mult_sc_vec(&mut intoo, &expected, 0);
|
||||
assert_eq!(intoo, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_point_mul() {
|
||||
let dummy_one = Point {
|
||||
x: Base::one(),
|
||||
y: Base::one(),
|
||||
z: Base::one(),
|
||||
};
|
||||
|
||||
let mut inout = [dummy_one, dummy_one, Point::zero()];
|
||||
let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
let expected = [dummy_one, Point::zero(), Point::zero()];
|
||||
multp_vec(&mut inout, &scalars, 0);
|
||||
assert_eq!(inout, expected);
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "bls12-381"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { path = "../icicle-core" }
|
||||
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-381 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
g2 = []
|
||||
@@ -1,36 +0,0 @@
|
||||
use std::env;
|
||||
|
||||
fn main() {
|
||||
//TODO: check cargo features selected
|
||||
//TODO: can conflict/duplicate with make ?
|
||||
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=./icicle");
|
||||
|
||||
let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
|
||||
let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
|
||||
|
||||
let mut arch = String::from("-arch=");
|
||||
arch.push_str(&arch_type);
|
||||
let mut stream = String::from("-default-stream=");
|
||||
stream.push_str(&stream_type);
|
||||
|
||||
let mut nvcc = cc::Build::new();
|
||||
|
||||
println!("Compiling icicle library using arch: {}", &arch);
|
||||
|
||||
if cfg!(feature = "g2") {
|
||||
nvcc.define("G2_DEFINED", None);
|
||||
}
|
||||
nvcc.cuda(true);
|
||||
nvcc.define("FEATURE_BLS12_381", None);
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.flag(&stream);
|
||||
nvcc.shared_flag(false);
|
||||
// nvcc.static_flag(true);
|
||||
nvcc.files([
|
||||
"../icicle-cuda/curves/index.cu",
|
||||
]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,106 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
|
||||
use crate::basic_structs::scalar::ScalarT;
|
||||
use crate::basic_structs::field::Field;
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarField;
|
||||
impl Field<8> for ScalarField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct BaseField;
|
||||
impl Field<12> for BaseField {
|
||||
const MODOLUS: [u32; 12] = [0x0;12];
|
||||
}
|
||||
|
||||
|
||||
pub type Scalar = ScalarT<ScalarField,8>;
|
||||
impl Default for Scalar {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Scalar{}
|
||||
|
||||
|
||||
pub type Base = ScalarT<BaseField,12>;
|
||||
impl Default for Base {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Base{}
|
||||
|
||||
pub type Point = PointT<Base>;
|
||||
pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
@@ -1,798 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
extern "C" {
|
||||
fn msm_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn msm_batch_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
batch_size: usize,
|
||||
msm_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
|
||||
|
||||
fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ntt_batch_cuda(
|
||||
inout: *mut Scalar,
|
||||
arr_size: usize,
|
||||
n: usize,
|
||||
inverse: bool,
|
||||
) -> c_int;
|
||||
|
||||
fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
|
||||
|
||||
fn interpolate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_batch_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_batch_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_point(
|
||||
inout: *mut Point,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_scalar(
|
||||
inout: *mut Scalar,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn matrix_vec_mod_mult(
|
||||
matrix_flattened: *const Scalar,
|
||||
input: *const Scalar,
|
||||
output: *mut Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
}
|
||||
|
||||
pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = Point::zero();
|
||||
unsafe {
|
||||
msm_cuda(
|
||||
&mut ret as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
scalars.len(),
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn msm_batch(
|
||||
points: &[PointAffineNoInfinity],
|
||||
scalars: &[Scalar],
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> Vec<Point> {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = vec![Point::zero(); batch_size];
|
||||
|
||||
unsafe {
|
||||
msm_batch_cuda(
|
||||
&mut ret[0] as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
batch_size,
|
||||
count / batch_size,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn commit(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBox<Point> {
|
||||
let mut res = DeviceBox::new(&Point::zero()).unwrap();
|
||||
unsafe {
|
||||
commit_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn commit_batch(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
|
||||
unsafe {
|
||||
commit_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
|
||||
let ret_code = unsafe {
|
||||
ntt_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
ret_code
|
||||
}
|
||||
|
||||
pub fn ntt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, false);
|
||||
}
|
||||
|
||||
pub fn intt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal_batch(
|
||||
values: &mut [Scalar],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ntt_batch_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
|
||||
unsafe {
|
||||
ecntt_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, false, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, true, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal_batch(
|
||||
values: &mut [Point],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ecntt_batch_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
|
||||
unsafe {
|
||||
DeviceBuffer::from_raw_parts(build_domain_cuda(
|
||||
domain_size,
|
||||
logn,
|
||||
inverse,
|
||||
0
|
||||
), domain_size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn reverse_order_scalars(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_scalars_batch(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_batch_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
) {
|
||||
unsafe { reverse_order_points_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points_batch(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_points_batch_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_point(
|
||||
a as *mut _ as *mut Point,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_scalar(
|
||||
a as *mut _ as *mut Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply a matrix by a scalar:
|
||||
// `a` - flattenned matrix;
|
||||
// `b` - vector to multiply `a` by;
|
||||
pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
|
||||
let mut c = Vec::with_capacity(b.len());
|
||||
for i in 0..b.len() {
|
||||
c.push(Scalar::zero());
|
||||
}
|
||||
unsafe {
|
||||
matrix_vec_mod_mult(
|
||||
a as *const _ as *const Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
c.as_mut_slice() as *mut _ as *mut Scalar,
|
||||
b.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
|
||||
let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
|
||||
unsafe { buf_cpy.copy_from(buf) };
|
||||
return buf_cpy;
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
|
||||
fn set_up_device() {
|
||||
// Set up the context, load the module, and create a stream to run kernels in.
|
||||
rustacuda::init(CudaFlags::empty()).unwrap();
|
||||
let device = Device::get_device(0).unwrap();
|
||||
let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_random_points(
|
||||
count: usize,
|
||||
mut rng: Box<dyn RngCore>,
|
||||
) -> Vec<PointAffineNoInfinity> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_381::rand(&mut rng)).to_xy_strip_z())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BLS12_381::rand(&mut rng)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
|
||||
(0..count)
|
||||
.map(|_| Scalar::from_ark(Fr_BLS12_381::rand(&mut rng).into_repr()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalar
|
||||
let vector = generate_random_points_proj(test_size, get_rng(seed));
|
||||
let mut vector_mut = vector.clone();
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalars
|
||||
let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
pub mod test_bls12_381;
|
||||
pub mod basic_structs;
|
||||
pub mod from_cuda;
|
||||
pub mod curve_structs;
|
||||
@@ -1,816 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
pub use crate::basic_structs::scalar::ScalarTrait;
|
||||
pub use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
|
||||
impl Scalar {
|
||||
pub fn to_biginteger254(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_biginteger256(ark: BigInteger256) -> Self {
|
||||
Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
|
||||
}
|
||||
|
||||
pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
|
||||
Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> Fr_BLS12_381 {
|
||||
unsafe { std::mem::transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: &Fr_BLS12_381) -> Scalar {
|
||||
unsafe { std::mem::transmute_copy(v) }
|
||||
}
|
||||
|
||||
pub fn to_ark_mod_p(&self) -> Fr_BLS12_381 {
|
||||
Fr_BLS12_381::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> Fr_BLS12_381 {
|
||||
Fr_BLS12_381::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
|
||||
}
|
||||
|
||||
pub fn from_ark(v: BigInteger256) -> Scalar {
|
||||
Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Base {
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Point {
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_381 {
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_381 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_381::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_381) -> Point {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point {
|
||||
x: Base::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: Base::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: Base::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity {
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(Fq_BLS12_381::new(self.x.to_ark()), Fq_BLS12_381::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(
|
||||
Fq_BLS12_381::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_381::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BLS12_381) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(p.x.into_repr()),
|
||||
y: Base::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(ark_affine.x.into_repr()),
|
||||
y: Base::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests_bls12_381 {
|
||||
use std::ops::Add;
|
||||
use ark_bls12_381::{Fr, G1Affine, G1Projective};
|
||||
use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
|
||||
use ark_ff::{FftField, Field, Zero, PrimeField};
|
||||
use ark_std::UniformRand;
|
||||
use rustacuda::prelude::{DeviceBuffer, CopyDestination};
|
||||
use crate::curve_structs::{Point, Scalar, Base};
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
|
||||
|
||||
fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
|
||||
let mut rng = ark_std::rand::thread_rng();
|
||||
let mut points_ga: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..nof_elements {
|
||||
let aff = G1Projective::rand(&mut rng);
|
||||
points_ga.push(aff);
|
||||
}
|
||||
points_ga
|
||||
}
|
||||
|
||||
fn ecntt_arc_naive(
|
||||
points: &Vec<G1Projective>,
|
||||
size: usize,
|
||||
inverse: bool,
|
||||
) -> Vec<G1Projective> {
|
||||
let mut result: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..size {
|
||||
result.push(G1Projective::zero());
|
||||
}
|
||||
let rou: Fr;
|
||||
if !inverse {
|
||||
rou = Fr::get_root_of_unity(size).unwrap();
|
||||
} else {
|
||||
rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
|
||||
}
|
||||
for k in 0..size {
|
||||
for l in 0..size {
|
||||
let pow: [u64; 1] = [(l * k).try_into().unwrap()];
|
||||
let mul_rou = Fr::pow(&rou, &pow);
|
||||
result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
|
||||
}
|
||||
}
|
||||
if inverse {
|
||||
let size2 = size as u64;
|
||||
for k in 0..size {
|
||||
let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
|
||||
result[k] = result[k].into_affine().mul(multfactor);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
|
||||
let mut eq = true;
|
||||
for i in 0..points.len() {
|
||||
if points2[i].ne(&points[i]) {
|
||||
eq = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return eq;
|
||||
}
|
||||
|
||||
fn test_naive_ark_ecntt(size: usize) {
|
||||
let points = random_points_ark_proj(size);
|
||||
let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
|
||||
let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
|
||||
assert!(!check_eq(&result2, &result1));
|
||||
assert!(check_eq(&result2, &points));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_msm() {
|
||||
let test_sizes = [6, 9];
|
||||
|
||||
for pow2 in test_sizes {
|
||||
let count = 1 << pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points = generate_random_points(count, get_rng(seed));
|
||||
let scalars = generate_random_scalars(count, get_rng(seed));
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
|
||||
let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
|
||||
|
||||
assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
|
||||
assert_eq!(msm_result.to_ark(), msm_result_ark);
|
||||
assert_eq!(
|
||||
msm_result.to_ark_affine(),
|
||||
Point::from_ark(msm_result_ark).to_ark_affine()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_msm() {
|
||||
for batch_pow2 in [2, 4] {
|
||||
for pow2 in [4, 6] {
|
||||
let msm_size = 1 << pow2;
|
||||
let batch_size = 1 << batch_pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
|
||||
let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
|
||||
|
||||
let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let expected: Vec<_> = point_r_ark
|
||||
.chunks(msm_size)
|
||||
.zip(scalars_r_ark.chunks(msm_size))
|
||||
.map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
|
||||
.collect();
|
||||
|
||||
let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_commit() {
|
||||
let test_size = 1 << 8;
|
||||
let seed = Some(0);
|
||||
let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
|
||||
let mut points = generate_random_points(test_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
|
||||
let mut h_commit_result = Point::zero();
|
||||
d_commit_result.copy_to(&mut h_commit_result).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
assert_ne!(msm_result, Point::zero());
|
||||
assert_ne!(h_commit_result, Point::zero());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_commit() {
|
||||
let batch_size = 4;
|
||||
let test_size = 1 << 12;
|
||||
let seed = Some(0);
|
||||
let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
|
||||
let points = generate_random_points(test_size * batch_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm_batch(&points, &scalars, batch_size, 0);
|
||||
let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
|
||||
let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
|
||||
d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
for h in h_commit_result {
|
||||
assert_ne!(h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 3;
|
||||
|
||||
let scalars = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut ntt_result = scalars.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
assert_ne!(ntt_result, scalars);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
intt(&mut intt_result, 0);
|
||||
|
||||
assert_eq!(intt_result, scalars);
|
||||
|
||||
//ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size, get_rng(seed));
|
||||
|
||||
test_naive_ark_ecntt(test_size);
|
||||
|
||||
assert!(points_proj[0].to_ark().into_affine().is_on_curve());
|
||||
|
||||
//naive ark
|
||||
let points_proj_ark = points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark())
|
||||
.collect::<Vec<G1Projective>>();
|
||||
|
||||
let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
|
||||
|
||||
let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
|
||||
|
||||
assert_eq!(points_proj_ark, iecntt_result_naive);
|
||||
|
||||
//ingo gpu
|
||||
let mut ecntt_result = points_proj.to_vec();
|
||||
ecntt(&mut ecntt_result, 0);
|
||||
|
||||
assert_ne!(ecntt_result, points_proj);
|
||||
|
||||
let mut iecntt_result = ecntt_result.clone();
|
||||
iecntt(&mut iecntt_result, 0);
|
||||
|
||||
assert_eq!(
|
||||
iecntt_result_naive,
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
assert_eq!(
|
||||
iecntt_result
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>(),
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt_batch() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let batches = 4;
|
||||
|
||||
let scalars_batch: Vec<Scalar> =
|
||||
generate_random_scalars(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result = scalars_batch.clone();
|
||||
|
||||
// do batch ntt
|
||||
ntt_batch(&mut ntt_result, test_size, 0);
|
||||
|
||||
let mut ntt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do ntt for every chunk
|
||||
for i in 0..batches {
|
||||
ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
|
||||
ntt(&mut ntt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ntt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_vec_of_vec[i],
|
||||
ntt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
// check that ntt output is different from input
|
||||
assert_ne!(ntt_result, scalars_batch);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
// do batch intt
|
||||
intt_batch(&mut intt_result, test_size, 0);
|
||||
|
||||
let mut intt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do intt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
|
||||
intt(&mut intt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the intt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_vec_of_vec[i],
|
||||
intt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result, scalars_batch);
|
||||
|
||||
// //ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result_points = points_proj.clone();
|
||||
|
||||
// do batch ecintt
|
||||
ecntt_batch(&mut ntt_result_points, test_size, 0);
|
||||
|
||||
let mut ntt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
|
||||
ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_points_vec_of_vec[i],
|
||||
ntt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_ne!(ntt_result_points, points_proj);
|
||||
|
||||
let mut intt_result_points = ntt_result_points.clone();
|
||||
|
||||
// do batch ecintt
|
||||
iecntt_batch(&mut intt_result_points, test_size, 0);
|
||||
|
||||
let mut intt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
// do ecintt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
|
||||
iecntt(&mut intt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ecintt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_points_vec_of_vec[i],
|
||||
intt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result_points, points_proj);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_interpolation() {
|
||||
let log_test_size = 7;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
|
||||
intt(&mut evals_mut, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 10;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
intt_batch(&mut evals_mut, test_size, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_interpolation() {
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_points(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
|
||||
iecntt(&mut evals_mut[..], 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_points_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
iecntt_batch(&mut evals_mut[..], test_size, 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation() {
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size.. (1 << log_test_domain_size) {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation() {
|
||||
let batch_size = 6;
|
||||
let log_test_domain_size = 8;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation() {
|
||||
let log_test_domain_size = 7;
|
||||
let coeff_size = 1 << 7;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size..(1 << log_test_domain_size) {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
for i in 0..coeff_size {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation() {
|
||||
let batch_size = 4;
|
||||
let log_test_domain_size = 6;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 5;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
for i in j * domain_size..(j * domain_size + coeff_size) {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_trivial_coset() {
|
||||
// checks that the evaluations on the subgroup is the same as on the coset generated by 1
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
|
||||
let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_evals_coset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
for i in 0..test_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 2;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
for i in 0..test_size * batch_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
// testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
|
||||
#[test]
|
||||
fn test_matrix_multiplication() {
|
||||
let seed = None; // some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let rou = Fr::get_root_of_unity(test_size).unwrap();
|
||||
let matrix_flattened: Vec<Scalar> = (0..test_size).map(
|
||||
|row_num| { (0..test_size).map(
|
||||
|col_num| {
|
||||
let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
|
||||
Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
|
||||
}).collect::<Vec<Scalar>>()
|
||||
}).flatten().collect::<Vec<_>>();
|
||||
let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
|
||||
let mut ntt_result = vector.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
// we don't use the same roots of unity as arkworks, so the results are permutations
|
||||
// of one another and the only guaranteed fixed scalars are the following ones:
|
||||
assert_eq!(result[0], ntt_result[0]);
|
||||
assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_scalar_mul() {
|
||||
let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
|
||||
let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
mult_sc_vec(&mut intoo, &expected, 0);
|
||||
assert_eq!(intoo, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_point_mul() {
|
||||
let dummy_one = Point {
|
||||
x: Base::one(),
|
||||
y: Base::one(),
|
||||
z: Base::one(),
|
||||
};
|
||||
|
||||
let mut inout = [dummy_one, dummy_one, Point::zero()];
|
||||
let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
let expected = [dummy_one, Point::zero(), Point::zero()];
|
||||
multp_vec(&mut inout, &scalars, 0);
|
||||
assert_eq!(inout, expected);
|
||||
}
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
[package]
|
||||
name = "bn254"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { path = "../icicle-core" }
|
||||
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bn254 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
g2 = []
|
||||
@@ -1,36 +0,0 @@
|
||||
use std::env;
|
||||
|
||||
fn main() {
|
||||
//TODO: check cargo features selected
|
||||
//TODO: can conflict/duplicate with make ?
|
||||
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=./icicle");
|
||||
|
||||
let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
|
||||
let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
|
||||
|
||||
let mut arch = String::from("-arch=");
|
||||
arch.push_str(&arch_type);
|
||||
let mut stream = String::from("-default-stream=");
|
||||
stream.push_str(&stream_type);
|
||||
|
||||
let mut nvcc = cc::Build::new();
|
||||
|
||||
println!("Compiling icicle library using arch: {}", &arch);
|
||||
|
||||
if cfg!(feature = "g2") {
|
||||
nvcc.define("G2_DEFINED", None);
|
||||
}
|
||||
nvcc.cuda(true);
|
||||
nvcc.define("FEATURE_BN254", None);
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.flag(&stream);
|
||||
nvcc.shared_flag(false);
|
||||
// nvcc.static_flag(true);
|
||||
nvcc.files([
|
||||
"../icicle-cuda/curves/index.cu",
|
||||
]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,108 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
|
||||
use crate::basic_structs::scalar::ScalarT;
|
||||
use crate::basic_structs::field::Field;
|
||||
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarField;
|
||||
impl Field<8> for ScalarField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct BaseField;
|
||||
impl Field<8> for BaseField {
|
||||
const MODOLUS: [u32; 8] = [0x0;8];
|
||||
}
|
||||
|
||||
|
||||
pub type Scalar = ScalarT<ScalarField,8>;
|
||||
impl Default for Scalar {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Scalar{}
|
||||
|
||||
|
||||
pub type Base = ScalarT<BaseField,8>;
|
||||
impl Default for Base {
|
||||
fn default() -> Self {
|
||||
Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl DeviceCopy for Base{}
|
||||
|
||||
pub type Point = PointT<Base>;
|
||||
pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
@@ -1,797 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
extern "C" {
|
||||
fn msm_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn msm_batch_cuda(
|
||||
out: *mut Point,
|
||||
points: *const PointAffineNoInfinity,
|
||||
scalars: *const Scalar,
|
||||
batch_size: usize,
|
||||
msm_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn commit_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_scalars: DevicePointer<Scalar>,
|
||||
d_points: DevicePointer<PointAffineNoInfinity>,
|
||||
count: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> c_uint;
|
||||
|
||||
fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
|
||||
|
||||
fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
|
||||
|
||||
fn ntt_batch_cuda(
|
||||
inout: *mut Scalar,
|
||||
arr_size: usize,
|
||||
n: usize,
|
||||
inverse: bool,
|
||||
) -> c_int;
|
||||
|
||||
fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
|
||||
|
||||
fn interpolate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_evaluations: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn interpolate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_evaluations: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_scalars_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Scalar>,
|
||||
d_coefficients: DevicePointer<Scalar>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn evaluate_points_on_coset_batch_cuda(
|
||||
d_out: DevicePointer<Point>,
|
||||
d_coefficients: DevicePointer<Point>,
|
||||
d_domain: DevicePointer<Scalar>,
|
||||
domain_size: usize,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
coset_powers: DevicePointer<Scalar>,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_scalars_batch_cuda(
|
||||
d_arr: DevicePointer<Scalar>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn reverse_order_points_batch_cuda(
|
||||
d_arr: DevicePointer<Point>,
|
||||
n: usize,
|
||||
batch_size: usize,
|
||||
device_id: usize
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_point(
|
||||
inout: *mut Point,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn vec_mod_mult_scalar(
|
||||
inout: *mut Scalar,
|
||||
scalars: *const Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
|
||||
fn matrix_vec_mod_mult(
|
||||
matrix_flattened: *const Scalar,
|
||||
input: *const Scalar,
|
||||
output: *mut Scalar,
|
||||
n_elements: usize,
|
||||
device_id: usize,
|
||||
) -> c_int;
|
||||
}
|
||||
|
||||
pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
let mut ret = Point::zero();
|
||||
unsafe {
|
||||
msm_cuda(
|
||||
&mut ret as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
scalars.len(),
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn msm_batch(
|
||||
points: &[PointAffineNoInfinity],
|
||||
scalars: &[Scalar],
|
||||
batch_size: usize,
|
||||
device_id: usize,
|
||||
) -> Vec<Point> {
|
||||
let count = points.len();
|
||||
if count != scalars.len() {
|
||||
todo!("variable length")
|
||||
}
|
||||
|
||||
let mut ret = vec![Point::zero(); batch_size];
|
||||
|
||||
unsafe {
|
||||
msm_batch_cuda(
|
||||
&mut ret[0] as *mut _ as *mut Point,
|
||||
points as *const _ as *const PointAffineNoInfinity,
|
||||
scalars as *const _ as *const Scalar,
|
||||
batch_size,
|
||||
count / batch_size,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
pub fn commit(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBox<Point> {
|
||||
let mut res = DeviceBox::new(&Point::zero()).unwrap();
|
||||
unsafe {
|
||||
commit_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len(),
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn commit_batch(
|
||||
points: &mut DeviceBuffer<PointAffineNoInfinity>,
|
||||
scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
|
||||
unsafe {
|
||||
commit_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
scalars.as_device_ptr(),
|
||||
points.as_device_ptr(),
|
||||
scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0,
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
|
||||
let ret_code = unsafe {
|
||||
ntt_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
};
|
||||
ret_code
|
||||
}
|
||||
|
||||
pub fn ntt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, false);
|
||||
}
|
||||
|
||||
pub fn intt(values: &mut [Scalar], device_id: usize) {
|
||||
ntt_internal(values, device_id, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place NTT on the input data.
|
||||
fn ntt_internal_batch(
|
||||
values: &mut [Scalar],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ntt_batch_cuda(
|
||||
values as *mut _ as *mut Scalar,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
|
||||
ntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
|
||||
unsafe {
|
||||
ecntt_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
inverse,
|
||||
device_id,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, false, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt(values: &mut [Point], device_id: usize) {
|
||||
ecntt_internal(values, true, device_id);
|
||||
}
|
||||
|
||||
/// Compute an in-place ECNTT on the input data.
|
||||
fn ecntt_internal_batch(
|
||||
values: &mut [Point],
|
||||
device_id: usize,
|
||||
batch_size: usize,
|
||||
inverse: bool,
|
||||
) -> i32 {
|
||||
unsafe {
|
||||
ecntt_batch_cuda(
|
||||
values as *mut _ as *mut Point,
|
||||
values.len(),
|
||||
batch_size,
|
||||
inverse,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, false);
|
||||
}
|
||||
|
||||
/// Compute an in-place iECNTT on the input data.
|
||||
pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
|
||||
ecntt_internal_batch(values, 0, batch_size, true);
|
||||
}
|
||||
|
||||
pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
|
||||
unsafe {
|
||||
DeviceBuffer::from_raw_parts(build_domain_cuda(
|
||||
domain_size,
|
||||
logn,
|
||||
inverse,
|
||||
0
|
||||
), domain_size)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn reverse_order_scalars(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_scalars_batch(
|
||||
d_scalars: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_scalars_batch_cuda(
|
||||
d_scalars.as_device_ptr(),
|
||||
d_scalars.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
) {
|
||||
unsafe { reverse_order_points_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len(),
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn reverse_order_points_batch(
|
||||
d_points: &mut DeviceBuffer<Point>,
|
||||
batch_size: usize,
|
||||
) {
|
||||
unsafe { reverse_order_points_batch_cuda(
|
||||
d_points.as_device_ptr(),
|
||||
d_points.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
); }
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_scalars_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe { interpolate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn interpolate_points_batch(
|
||||
d_evaluations: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe { interpolate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_evaluations.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
batch_size,
|
||||
0
|
||||
) };
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_scalars_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Scalar>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Scalar> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_scalars_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len(),
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn evaluate_points_on_coset_batch(
|
||||
d_coefficients: &mut DeviceBuffer<Point>,
|
||||
d_domain: &mut DeviceBuffer<Scalar>,
|
||||
batch_size: usize,
|
||||
coset_powers: &mut DeviceBuffer<Scalar>,
|
||||
) -> DeviceBuffer<Point> {
|
||||
let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
|
||||
unsafe {
|
||||
evaluate_points_on_coset_batch_cuda(
|
||||
res.as_device_ptr(),
|
||||
d_coefficients.as_device_ptr(),
|
||||
d_domain.as_device_ptr(),
|
||||
d_domain.len(),
|
||||
d_coefficients.len() / batch_size,
|
||||
batch_size,
|
||||
coset_powers.as_device_ptr(),
|
||||
0
|
||||
);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_point(
|
||||
a as *mut _ as *mut Point,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
|
||||
assert_eq!(a.len(), b.len());
|
||||
unsafe {
|
||||
vec_mod_mult_scalar(
|
||||
a as *mut _ as *mut Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
a.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Multiply a matrix by a scalar:
|
||||
// `a` - flattenned matrix;
|
||||
// `b` - vector to multiply `a` by;
|
||||
pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
|
||||
let mut c = Vec::with_capacity(b.len());
|
||||
for i in 0..b.len() {
|
||||
c.push(Scalar::zero());
|
||||
}
|
||||
unsafe {
|
||||
matrix_vec_mod_mult(
|
||||
a as *const _ as *const Scalar,
|
||||
b as *const _ as *const Scalar,
|
||||
c.as_mut_slice() as *mut _ as *mut Scalar,
|
||||
b.len(),
|
||||
device_id,
|
||||
);
|
||||
}
|
||||
c
|
||||
}
|
||||
|
||||
pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
|
||||
let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
|
||||
unsafe { buf_cpy.copy_from(buf) };
|
||||
return buf_cpy;
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
|
||||
fn set_up_device() {
|
||||
// Set up the context, load the module, and create a stream to run kernels in.
|
||||
rustacuda::init(CudaFlags::empty()).unwrap();
|
||||
let device = Device::get_device(0).unwrap();
|
||||
let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
|
||||
}
|
||||
|
||||
pub fn generate_random_points(
|
||||
count: usize,
|
||||
mut rng: Box<dyn RngCore>,
|
||||
) -> Vec<PointAffineNoInfinity> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BN254::rand(&mut rng)).to_xy_strip_z())
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
|
||||
(0..count)
|
||||
.map(|_| Point::from_ark(G1Projective_BN254::rand(&mut rng)))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
|
||||
(0..count)
|
||||
.map(|_| Scalar::from_ark(Fr_BN254::rand(&mut rng).into_repr()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalar
|
||||
let vector = generate_random_points_proj(test_size, get_rng(seed));
|
||||
let mut vector_mut = vector.clone();
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
|
||||
set_up_device();
|
||||
|
||||
let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
|
||||
|
||||
let seed = Some(0); // fix the rng to get two equal scalars
|
||||
let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
|
||||
(vector_mut, d_vector, d_domain)
|
||||
}
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
pub mod test_bn254;
|
||||
pub mod basic_structs;
|
||||
pub mod from_cuda;
|
||||
pub mod curve_structs;
|
||||
@@ -1,816 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use ark_std::UniformRand;
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda::CudaFlags;
|
||||
use rustacuda::memory::DeviceBox;
|
||||
use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
|
||||
use rustacuda_core::DevicePointer;
|
||||
use std::mem::transmute;
|
||||
pub use crate::basic_structs::scalar::ScalarTrait;
|
||||
pub use crate::curve_structs::*;
|
||||
use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use rustacuda::memory::{CopyDestination, DeviceCopy};
|
||||
|
||||
|
||||
impl Scalar {
|
||||
pub fn to_biginteger254(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_biginteger256(ark: BigInteger256) -> Self {
|
||||
Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
|
||||
}
|
||||
|
||||
pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
|
||||
Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> Fr_BN254 {
|
||||
unsafe { std::mem::transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: &Fr_BN254) -> Scalar {
|
||||
unsafe { std::mem::transmute_copy(v) }
|
||||
}
|
||||
|
||||
pub fn to_ark_mod_p(&self) -> Fr_BN254 {
|
||||
Fr_BN254::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> Fr_BN254 {
|
||||
Fr_BN254::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
|
||||
}
|
||||
|
||||
pub fn from_ark(v: BigInteger256) -> Scalar {
|
||||
Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
impl Base {
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
impl Point {
|
||||
pub fn to_ark(&self) -> G1Projective_BN254 {
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BN254 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BN254::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BN254::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BN254::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BN254::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BN254) -> Point {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point {
|
||||
x: Base::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: Base::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: Base::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity {
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BN254 {
|
||||
G1Affine_BN254::new(Fq_BN254::new(self.x.to_ark()), Fq_BN254::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BN254 {
|
||||
G1Affine_BN254::new(
|
||||
Fq_BN254::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BN254::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BN254) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(p.x.into_repr()),
|
||||
y: Base::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity {
|
||||
x: Base::from_ark(ark_affine.x.into_repr()),
|
||||
y: Base::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests_bn254 {
|
||||
use std::ops::Add;
|
||||
use ark_bn254::{Fr, G1Affine, G1Projective};
|
||||
use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
|
||||
use ark_ff::{FftField, Field, Zero, PrimeField};
|
||||
use ark_std::UniformRand;
|
||||
use rustacuda::prelude::{DeviceBuffer, CopyDestination};
|
||||
use crate::curve_structs::{Point, Scalar, Base};
|
||||
use crate::basic_structs::scalar::ScalarTrait;
|
||||
use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
|
||||
|
||||
fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
|
||||
let mut rng = ark_std::rand::thread_rng();
|
||||
let mut points_ga: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..nof_elements {
|
||||
let aff = G1Projective::rand(&mut rng);
|
||||
points_ga.push(aff);
|
||||
}
|
||||
points_ga
|
||||
}
|
||||
|
||||
fn ecntt_arc_naive(
|
||||
points: &Vec<G1Projective>,
|
||||
size: usize,
|
||||
inverse: bool,
|
||||
) -> Vec<G1Projective> {
|
||||
let mut result: Vec<G1Projective> = Vec::new();
|
||||
for _ in 0..size {
|
||||
result.push(G1Projective::zero());
|
||||
}
|
||||
let rou: Fr;
|
||||
if !inverse {
|
||||
rou = Fr::get_root_of_unity(size).unwrap();
|
||||
} else {
|
||||
rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
|
||||
}
|
||||
for k in 0..size {
|
||||
for l in 0..size {
|
||||
let pow: [u64; 1] = [(l * k).try_into().unwrap()];
|
||||
let mul_rou = Fr::pow(&rou, &pow);
|
||||
result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
|
||||
}
|
||||
}
|
||||
if inverse {
|
||||
let size2 = size as u64;
|
||||
for k in 0..size {
|
||||
let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
|
||||
result[k] = result[k].into_affine().mul(multfactor);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
|
||||
let mut eq = true;
|
||||
for i in 0..points.len() {
|
||||
if points2[i].ne(&points[i]) {
|
||||
eq = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return eq;
|
||||
}
|
||||
|
||||
fn test_naive_ark_ecntt(size: usize) {
|
||||
let points = random_points_ark_proj(size);
|
||||
let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
|
||||
let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
|
||||
assert!(!check_eq(&result2, &result1));
|
||||
assert!(check_eq(&result2, &points));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_msm() {
|
||||
let test_sizes = [6, 9];
|
||||
|
||||
for pow2 in test_sizes {
|
||||
let count = 1 << pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points = generate_random_points(count, get_rng(seed));
|
||||
let scalars = generate_random_scalars(count, get_rng(seed));
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
|
||||
let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
|
||||
|
||||
assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
|
||||
assert_eq!(msm_result.to_ark(), msm_result_ark);
|
||||
assert_eq!(
|
||||
msm_result.to_ark_affine(),
|
||||
Point::from_ark(msm_result_ark).to_ark_affine()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_msm() {
|
||||
for batch_pow2 in [2, 4] {
|
||||
for pow2 in [4, 6] {
|
||||
let msm_size = 1 << pow2;
|
||||
let batch_size = 1 << batch_pow2;
|
||||
let seed = None; // set Some to provide seed
|
||||
let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
|
||||
let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
|
||||
|
||||
let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
|
||||
let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
|
||||
|
||||
let expected: Vec<_> = point_r_ark
|
||||
.chunks(msm_size)
|
||||
.zip(scalars_r_ark.chunks(msm_size))
|
||||
.map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
|
||||
.collect();
|
||||
|
||||
let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
|
||||
|
||||
assert_eq!(result, expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_commit() {
|
||||
let test_size = 1 << 8;
|
||||
let seed = Some(0);
|
||||
let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
|
||||
let mut points = generate_random_points(test_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm(&points, &scalars, 0);
|
||||
let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
|
||||
let mut h_commit_result = Point::zero();
|
||||
d_commit_result.copy_to(&mut h_commit_result).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
assert_ne!(msm_result, Point::zero());
|
||||
assert_ne!(h_commit_result, Point::zero());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_batch_commit() {
|
||||
let batch_size = 4;
|
||||
let test_size = 1 << 12;
|
||||
let seed = Some(0);
|
||||
let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
|
||||
let points = generate_random_points(test_size * batch_size, get_rng(seed));
|
||||
let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
|
||||
|
||||
let msm_result = msm_batch(&points, &scalars, batch_size, 0);
|
||||
let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
|
||||
let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
|
||||
d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
|
||||
|
||||
assert_eq!(msm_result, h_commit_result);
|
||||
for h in h_commit_result {
|
||||
assert_ne!(h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 3;
|
||||
|
||||
let scalars = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let mut ntt_result = scalars.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
assert_ne!(ntt_result, scalars);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
intt(&mut intt_result, 0);
|
||||
|
||||
assert_eq!(intt_result, scalars);
|
||||
|
||||
//ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size, get_rng(seed));
|
||||
|
||||
test_naive_ark_ecntt(test_size);
|
||||
|
||||
assert!(points_proj[0].to_ark().into_affine().is_on_curve());
|
||||
|
||||
//naive ark
|
||||
let points_proj_ark = points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark())
|
||||
.collect::<Vec<G1Projective>>();
|
||||
|
||||
let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
|
||||
|
||||
let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
|
||||
|
||||
assert_eq!(points_proj_ark, iecntt_result_naive);
|
||||
|
||||
//ingo gpu
|
||||
let mut ecntt_result = points_proj.to_vec();
|
||||
ecntt(&mut ecntt_result, 0);
|
||||
|
||||
assert_ne!(ecntt_result, points_proj);
|
||||
|
||||
let mut iecntt_result = ecntt_result.clone();
|
||||
iecntt(&mut iecntt_result, 0);
|
||||
|
||||
assert_eq!(
|
||||
iecntt_result_naive,
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
assert_eq!(
|
||||
iecntt_result
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>(),
|
||||
points_proj
|
||||
.iter()
|
||||
.map(|p| p.to_ark_affine())
|
||||
.collect::<Vec<G1Affine>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ntt_batch() {
|
||||
//NTT
|
||||
let seed = None; //some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let batches = 4;
|
||||
|
||||
let scalars_batch: Vec<Scalar> =
|
||||
generate_random_scalars(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result = scalars_batch.clone();
|
||||
|
||||
// do batch ntt
|
||||
ntt_batch(&mut ntt_result, test_size, 0);
|
||||
|
||||
let mut ntt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do ntt for every chunk
|
||||
for i in 0..batches {
|
||||
ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
|
||||
ntt(&mut ntt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ntt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_vec_of_vec[i],
|
||||
ntt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
// check that ntt output is different from input
|
||||
assert_ne!(ntt_result, scalars_batch);
|
||||
|
||||
let mut intt_result = ntt_result.clone();
|
||||
|
||||
// do batch intt
|
||||
intt_batch(&mut intt_result, test_size, 0);
|
||||
|
||||
let mut intt_result_vec_of_vec = Vec::new();
|
||||
|
||||
// do intt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
|
||||
intt(&mut intt_result_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the intt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_vec_of_vec[i],
|
||||
intt_result[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result, scalars_batch);
|
||||
|
||||
// //ECNTT
|
||||
let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
|
||||
|
||||
let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
|
||||
}
|
||||
|
||||
let mut ntt_result_points = points_proj.clone();
|
||||
|
||||
// do batch ecintt
|
||||
ecntt_batch(&mut ntt_result_points, test_size, 0);
|
||||
|
||||
let mut ntt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
for i in 0..batches {
|
||||
ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
|
||||
ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
ntt_result_points_vec_of_vec[i],
|
||||
ntt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_ne!(ntt_result_points, points_proj);
|
||||
|
||||
let mut intt_result_points = ntt_result_points.clone();
|
||||
|
||||
// do batch ecintt
|
||||
iecntt_batch(&mut intt_result_points, test_size, 0);
|
||||
|
||||
let mut intt_result_points_vec_of_vec = Vec::new();
|
||||
|
||||
// do ecintt for every chunk
|
||||
for i in 0..batches {
|
||||
intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
|
||||
iecntt(&mut intt_result_points_vec_of_vec[i], 0);
|
||||
}
|
||||
|
||||
// check that the ecintt of each vec of scalars is equal to the intt of the specific batch
|
||||
for i in 0..batches {
|
||||
assert_eq!(
|
||||
intt_result_points_vec_of_vec[i],
|
||||
intt_result_points[i * test_size..(i + 1) * test_size]
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(intt_result_points, points_proj);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_interpolation() {
|
||||
let log_test_size = 7;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
|
||||
intt(&mut evals_mut, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 10;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_scalars_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
intt_batch(&mut evals_mut, test_size, 0);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, evals_mut);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_interpolation() {
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
|
||||
|
||||
reverse_order_points(&mut d_evals);
|
||||
let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
|
||||
iecntt(&mut evals_mut[..], 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_interpolation() {
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
|
||||
|
||||
reverse_order_points_batch(&mut d_evals, batch_size);
|
||||
let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
|
||||
iecntt_batch(&mut evals_mut[..], test_size, 0);
|
||||
let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, *evals_mut);
|
||||
for h in h_coeffs.iter() {
|
||||
assert_ne!(*h, Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation() {
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size.. (1 << log_test_domain_size) {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation() {
|
||||
let batch_size = 6;
|
||||
let log_test_domain_size = 8;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 6;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation() {
|
||||
let log_test_domain_size = 7;
|
||||
let coeff_size = 1 << 7;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
|
||||
for i in coeff_size..(1 << log_test_domain_size) {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[i]);
|
||||
}
|
||||
for i in 0..coeff_size {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation() {
|
||||
let batch_size = 4;
|
||||
let log_test_domain_size = 6;
|
||||
let domain_size = 1 << log_test_domain_size;
|
||||
let coeff_size = 1 << 5;
|
||||
let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
|
||||
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
|
||||
let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
|
||||
|
||||
for j in 0..batch_size {
|
||||
assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
|
||||
for i in coeff_size..domain_size {
|
||||
assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
|
||||
}
|
||||
for i in j * domain_size..(j * domain_size + coeff_size) {
|
||||
assert_ne!(h_coeffs_domain[i], Point::zero());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_trivial_coset() {
|
||||
// checks that the evaluations on the subgroup is the same as on the coset generated by 1
|
||||
let log_test_domain_size = 8;
|
||||
let coeff_size = 1 << 6;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
|
||||
let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
|
||||
let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
|
||||
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_coeffs[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_coeffs, h_evals_coset);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_scalar_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 4;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let log_test_size = 8;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
|
||||
let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
assert_eq!(h_evals[..], h_evals_large[..test_size]);
|
||||
assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
|
||||
for i in 0..test_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_point_batch_evaluation_on_coset() {
|
||||
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
|
||||
let batch_size = 2;
|
||||
let log_test_size = 6;
|
||||
let test_size = 1 << log_test_size;
|
||||
let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
|
||||
let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
|
||||
let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
|
||||
|
||||
let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
|
||||
let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
|
||||
let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
|
||||
let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals.copy_to(&mut h_evals[..]).unwrap();
|
||||
let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
|
||||
let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
|
||||
d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
|
||||
|
||||
for i in 0..batch_size {
|
||||
assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
|
||||
assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
|
||||
}
|
||||
for i in 0..test_size * batch_size {
|
||||
assert_ne!(h_evals[i], Point::zero());
|
||||
assert_ne!(h_evals_coset[i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i], Point::zero());
|
||||
assert_ne!(h_evals_large[2 * i + 1], Point::zero());
|
||||
}
|
||||
}
|
||||
|
||||
// testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
|
||||
#[test]
|
||||
fn test_matrix_multiplication() {
|
||||
let seed = None; // some value to fix the rng
|
||||
let test_size = 1 << 5;
|
||||
let rou = Fr::get_root_of_unity(test_size).unwrap();
|
||||
let matrix_flattened: Vec<Scalar> = (0..test_size).map(
|
||||
|row_num| { (0..test_size).map(
|
||||
|col_num| {
|
||||
let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
|
||||
Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
|
||||
}).collect::<Vec<Scalar>>()
|
||||
}).flatten().collect::<Vec<_>>();
|
||||
let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
|
||||
|
||||
let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
|
||||
let mut ntt_result = vector.clone();
|
||||
ntt(&mut ntt_result, 0);
|
||||
|
||||
// we don't use the same roots of unity as arkworks, so the results are permutations
|
||||
// of one another and the only guaranteed fixed scalars are the following ones:
|
||||
assert_eq!(result[0], ntt_result[0]);
|
||||
assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_scalar_mul() {
|
||||
let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
|
||||
let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
mult_sc_vec(&mut intoo, &expected, 0);
|
||||
assert_eq!(intoo, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_vec_point_mul() {
|
||||
let dummy_one = Point {
|
||||
x: Base::one(),
|
||||
y: Base::one(),
|
||||
z: Base::one(),
|
||||
};
|
||||
|
||||
let mut inout = [dummy_one, dummy_one, Point::zero()];
|
||||
let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
|
||||
let expected = [dummy_one, Point::zero(), Point::zero()];
|
||||
multp_vec(&mut inout, &scalars, 0);
|
||||
assert_eq!(inout, expected);
|
||||
}
|
||||
}
|
||||
@@ -23,12 +23,11 @@ fn main() {
|
||||
nvcc.define("G2_DEFINED", None);
|
||||
}
|
||||
nvcc.cuda(true);
|
||||
nvcc.define("FEATURE_BLS12_377", None);
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.flag(&stream);
|
||||
nvcc.files([
|
||||
"../icicle-cuda/curves/index.cu",
|
||||
"./icicle/curves/index.cu",
|
||||
]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
@@ -1,13 +1,20 @@
|
||||
{
|
||||
"curve_name" : "bls12_377",
|
||||
"modolus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
|
||||
"modulus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
|
||||
"bit_count_p" : 253,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
|
||||
"modulus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
|
||||
"bit_count_q" : 377,
|
||||
"limb_q" : 12,
|
||||
"root_of_unity" : 5928890464389279575069867463136436689218492512582288454256978381122364252082,
|
||||
"weierstrass_b" : 1,
|
||||
"gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
|
||||
"gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030
|
||||
"weierstrass_b_g2_re" : 0,
|
||||
"weierstrass_b_g2_im" : 155198655607781456406391640216936120121836107652948796323930557600032281009004493664981332883744016074664192874906,
|
||||
"g1_gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
|
||||
"g1_gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030,
|
||||
"g2_gen_x_re" : 233578398248691099356572568220835526895379068987715365179118596935057653620464273615301663571204657964920925606294,
|
||||
"g2_gen_x_im" : 140913150380207355837477652521042157274541796891053068589147167627541651775299824604154852141315666357241556069118,
|
||||
"g2_gen_y_re" : 63160294768292073209381361943935198908131692476676907196754037919244929611450776219210369229519898517858833747423,
|
||||
"g2_gen_y_im" : 149157405641012693445398062341192467754805999074082136895788947234480009303640899064710353187729182149407503257491
|
||||
}
|
||||
@@ -1,13 +1,20 @@
|
||||
{
|
||||
"curve_name" : "bls12_381",
|
||||
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"modulus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"bit_count_p" : 255,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"modulus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"bit_count_q" : 381,
|
||||
"limb_q" : 12,
|
||||
"root_of_unity" : 937917089079007706106976984802249742464848817460758522850752807661925904159,
|
||||
"weierstrass_b" : 4,
|
||||
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
|
||||
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
|
||||
"weierstrass_b_g2_re":4,
|
||||
"weierstrass_b_g2_im":4,
|
||||
"g1_gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
|
||||
"g1_gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569,
|
||||
"g2_gen_x_re" : 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
|
||||
"g2_gen_x_im" : 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
|
||||
"g2_gen_y_re" : 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
|
||||
"g2_gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582
|
||||
}
|
||||
@@ -1,13 +1,20 @@
|
||||
{
|
||||
"curve_name" : "bn254",
|
||||
"modolus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
|
||||
"modulus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
|
||||
"bit_count_p" : 254,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 16,
|
||||
"modolus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
|
||||
"ntt_size" : 28,
|
||||
"modulus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
|
||||
"bit_count_q" : 254,
|
||||
"limb_q" : 8,
|
||||
"root_of_unity" : 19103219067921713944291392827692070036145651957329286315305642004821462161904,
|
||||
"weierstrass_b" : 3,
|
||||
"gen_x" : 1,
|
||||
"gen_y" : 2
|
||||
"weierstrass_b_g2_re" : 19485874751759354771024239261021720505790618469301721065564631296452457478373,
|
||||
"weierstrass_b_g2_im" : 266929791119991161246907387137283842545076965332900288569378510910307636690,
|
||||
"g1_gen_x" : 1,
|
||||
"g1_gen_y" : 2,
|
||||
"g2_gen_x_re" : 10857046999023057135944570762232829481370756359578518086990519993285655852781,
|
||||
"g2_gen_x_im" : 11559732032986387107991004021392285783925812861821192530917403151452391805634,
|
||||
"g2_gen_y_re" : 8495653923123431417604973247489272438418190587263600148770280649306958101930,
|
||||
"g2_gen_y_im" : 4082367875863433681332203403145435568316851327593401208105741076214120093531
|
||||
}
|
||||
@@ -1,30 +1,12 @@
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from sympy.ntheory import isprime, primitive_root
|
||||
import subprocess
|
||||
import random
|
||||
from string import Template
|
||||
import sys
|
||||
|
||||
data = None
|
||||
with open(sys.argv[1]) as json_file:
|
||||
data = json.load(json_file)
|
||||
|
||||
curve_name = data["curve_name"]
|
||||
modolus_p = data["modolus_p"]
|
||||
bit_count_p = data["bit_count_p"]
|
||||
limb_p = data["limb_p"]
|
||||
ntt_size = data["ntt_size"]
|
||||
modolus_q = data["modolus_q"]
|
||||
bit_count_q = data["bit_count_q"]
|
||||
limb_q = data["limb_q"]
|
||||
weierstrass_b = data["weierstrass_b"]
|
||||
gen_x = data["gen_x"]
|
||||
gen_y = data["gen_y"]
|
||||
|
||||
|
||||
def to_hex(val, length):
|
||||
x = str(hex(val))[2:]
|
||||
def to_hex(val: int, length):
|
||||
x = hex(val)[2:]
|
||||
if len(x) % 8 != 0:
|
||||
x = "0" * (8-len(x) % 8) + x
|
||||
if len(x) != length:
|
||||
@@ -33,133 +15,260 @@ def to_hex(val, length):
|
||||
chunks = [x[i:i+n] for i in range(0, len(x), n)][::-1]
|
||||
s = ""
|
||||
for c in chunks:
|
||||
s += "0x" + c + ", "
|
||||
return s
|
||||
s += f'0x{c}, '
|
||||
|
||||
return s[:-2]
|
||||
|
||||
|
||||
def get_root_of_unity(order: int) -> int:
|
||||
assert (modolus_p - 1) % order == 0
|
||||
return pow(5, (modolus_p - 1) // order, modolus_p)
|
||||
def compute_values(modulus, modulus_bit_count, limbs):
|
||||
limb_size = 8*limbs
|
||||
modulus_ = to_hex(modulus,limb_size)
|
||||
modulus_2 = to_hex(modulus*2,limb_size)
|
||||
modulus_4 = to_hex(modulus*4,limb_size)
|
||||
modulus_wide = to_hex(modulus,limb_size*2)
|
||||
modulus_squared = to_hex(modulus*modulus,limb_size)
|
||||
modulus_squared_2 = to_hex(modulus*modulus*2,limb_size)
|
||||
modulus_squared_4 = to_hex(modulus*modulus*4,limb_size)
|
||||
m_raw = int(math.floor(int(pow(2,2*modulus_bit_count) // modulus)))
|
||||
m = to_hex(m_raw,limb_size)
|
||||
one = to_hex(1,limb_size)
|
||||
zero = to_hex(0,limb_size)
|
||||
|
||||
def create_field_parameters_struct(modulus, modulus_bits_count,limbs,ntt,size,name):
|
||||
s = " struct "+name+"{\n"
|
||||
s += " static constexpr unsigned limbs_count = " + str(limbs)+";\n"
|
||||
s += " static constexpr storage<limbs_count> modulus = {"+to_hex(modulus,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<limbs_count> modulus_2 = {"+to_hex(modulus*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<limbs_count> modulus_4 = {"+to_hex(modulus*4,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_wide = {"+to_hex(modulus,8*limbs*2)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared = {"+to_hex(modulus*modulus,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared_2 = {"+to_hex(modulus*modulus*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared_4 = {"+to_hex(modulus*modulus*2*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr unsigned modulus_bits_count = "+str(modulus_bits_count)+";\n"
|
||||
m = int(math.floor(int(pow(2,2*modulus_bits_count) // modulus)))
|
||||
s += " static constexpr storage<limbs_count> m = {"+ to_hex(m,8*limbs)[:-2] +"};\n"
|
||||
s += " static constexpr storage<limbs_count> one = {"+ to_hex(1,8*limbs)[:-2] +"};\n"
|
||||
s += " static constexpr storage<limbs_count> zero = {"+ to_hex(0,8*limbs)[:-2] +"};\n"
|
||||
return (
|
||||
modulus_,
|
||||
modulus_2,
|
||||
modulus_4,
|
||||
modulus_wide,
|
||||
modulus_squared,
|
||||
modulus_squared_2,
|
||||
modulus_squared_4,
|
||||
m,
|
||||
one,
|
||||
zero
|
||||
)
|
||||
|
||||
if ntt:
|
||||
|
||||
def get_fq_params(modulus, modulus_bit_count, limbs, g1_gen_x, g1_gen_y, g2_gen_x_re, g2_gen_x_im, g2_gen_y_re, g2_gen_y_im):
|
||||
(
|
||||
modulus,
|
||||
modulus_2,
|
||||
modulus_4,
|
||||
modulus_wide,
|
||||
modulus_squared,
|
||||
modulus_squared_2,
|
||||
modulus_squared_4,
|
||||
m,
|
||||
one,
|
||||
zero
|
||||
) = compute_values(modulus, modulus_bit_count, limbs)
|
||||
|
||||
limb_size = 8*limbs
|
||||
return {
|
||||
'fq_modulus': modulus,
|
||||
'fq_modulus_2': modulus_2,
|
||||
'fq_modulus_4': modulus_4,
|
||||
'fq_modulus_wide': modulus_wide,
|
||||
'fq_modulus_squared': modulus_squared,
|
||||
'fq_modulus_squared_2': modulus_squared_2,
|
||||
'fq_modulus_squared_4': modulus_squared_4,
|
||||
'fq_m': m,
|
||||
'fq_one': one,
|
||||
'fq_zero': zero,
|
||||
'fq_gen_x': to_hex(g1_gen_x, limb_size),
|
||||
'fq_gen_y': to_hex(g1_gen_y, limb_size),
|
||||
'fq_gen_x_re': to_hex(g2_gen_x_re, limb_size),
|
||||
'fq_gen_x_im': to_hex(g2_gen_x_im, limb_size),
|
||||
'fq_gen_y_re': to_hex(g2_gen_y_re, limb_size),
|
||||
'fq_gen_y_im': to_hex(g2_gen_y_im, limb_size)
|
||||
}
|
||||
|
||||
|
||||
def get_fp_params(modulus, modulus_bit_count, limbs, root_of_unity, size=0):
|
||||
(
|
||||
modulus_,
|
||||
modulus_2,
|
||||
modulus_4,
|
||||
modulus_wide,
|
||||
modulus_squared,
|
||||
modulus_squared_2,
|
||||
modulus_squared_4,
|
||||
m,
|
||||
one,
|
||||
zero
|
||||
) = compute_values(modulus, modulus_bit_count, limbs)
|
||||
limb_size = 8*limbs
|
||||
if size > 0:
|
||||
omega = ''
|
||||
omega_inv = ''
|
||||
inv = ''
|
||||
omegas = []
|
||||
omegas_inv = []
|
||||
for k in range(size):
|
||||
omega = get_root_of_unity(int(pow(2,k+1)))
|
||||
s += " static constexpr storage<limbs_count> omega"+str(k+1)+"= {"+ to_hex(omega,8*limbs)[:-2]+"};\n"
|
||||
if k == 0:
|
||||
om = root_of_unity
|
||||
else:
|
||||
om = pow(om, 2, modulus)
|
||||
omegas.append(om)
|
||||
omegas_inv.append(pow(om, -1, modulus))
|
||||
omegas.reverse()
|
||||
omegas_inv.reverse()
|
||||
for k in range(size):
|
||||
omega = get_root_of_unity(int(pow(2,k+1)))
|
||||
s += " static constexpr storage<limbs_count> omega_inv"+str(k+1)+"= {"+ to_hex(pow(omega, -1, modulus),8*limbs)[:-2]+"};\n"
|
||||
for k in range(size):
|
||||
s += " static constexpr storage<limbs_count> inv"+str(k+1)+"= {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),8*limbs)[:-2]+"};\n"
|
||||
s+=" };\n"
|
||||
return s
|
||||
omega += "\n {"+ to_hex(omegas[k],limb_size)+"}," if k>0 else " {"+ to_hex(omegas[k],limb_size)+"},"
|
||||
omega_inv += "\n {"+ to_hex(omegas_inv[k],limb_size)+"}," if k>0 else " {"+ to_hex(omegas_inv[k],limb_size)+"},"
|
||||
inv += "\n {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),limb_size)+"}," if k>0 else " {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),limb_size)+"},"
|
||||
|
||||
|
||||
return {
|
||||
'fp_modulus': modulus_,
|
||||
'fp_modulus_2': modulus_2,
|
||||
'fp_modulus_4': modulus_4,
|
||||
'fp_modulus_wide': modulus_wide,
|
||||
'fp_modulus_squared': modulus_squared,
|
||||
'fp_modulus_squared_2': modulus_squared_2,
|
||||
'fp_modulus_squared_4': modulus_squared_4,
|
||||
'fp_m': m,
|
||||
'fp_one': one,
|
||||
'fp_zero': zero,
|
||||
'omega': omega[:-1],
|
||||
'omega_inv': omega_inv[:-1],
|
||||
'inv': inv[:-1],
|
||||
}
|
||||
|
||||
def create_gen():
|
||||
s = " struct group_generator {\n"
|
||||
s += " static constexpr storage<fq_config::limbs_count> generator_x = {"+to_hex(gen_x,8*limb_q)[:-2]+ "};\n"
|
||||
s += " static constexpr storage<fq_config::limbs_count> generator_y = {"+to_hex(gen_y,8*limb_q)[:-2]+ "};\n"
|
||||
s+=" };\n"
|
||||
return s
|
||||
|
||||
def get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b):
|
||||
file_content = ""
|
||||
file_content += "#pragma once\n#include \"../../utils/storage.cuh\"\n"
|
||||
file_content += "namespace PARAMS_"+curve_name.upper()+"{\n"
|
||||
file_content += create_field_parameters_struct(modolus_p,bit_count_p,limb_p,True,ntt_size,"fp_config")
|
||||
file_content += create_field_parameters_struct(modolus_q,bit_count_q,limb_q,False,0,"fq_config")
|
||||
file_content += " static constexpr unsigned weierstrass_b = " + str(weierstrass_b)+ ";\n"
|
||||
file_content += create_gen()
|
||||
file_content+="}\n"
|
||||
return file_content
|
||||
def get_weier_params(weierstrass_b, weierstrass_b_g2_re, weierstrass_b_g2_im, size):
|
||||
|
||||
return {
|
||||
'weier_b': to_hex(weierstrass_b, size),
|
||||
'weier_b_g2_re': to_hex(weierstrass_b_g2_re, size),
|
||||
'weier_b_g2_im': to_hex(weierstrass_b_g2_im, size),
|
||||
}
|
||||
|
||||
|
||||
def get_params(config):
|
||||
global ntt_size
|
||||
curve_name = config["curve_name"]
|
||||
modulus_p = config["modulus_p"]
|
||||
bit_count_p = config["bit_count_p"]
|
||||
limb_p = config["limb_p"]
|
||||
ntt_size = config["ntt_size"]
|
||||
modulus_q = config["modulus_q"]
|
||||
bit_count_q = config["bit_count_q"]
|
||||
limb_q = config["limb_q"]
|
||||
root_of_unity = config["root_of_unity"]
|
||||
if root_of_unity == modulus_p:
|
||||
sys.exit("Invalid root_of_unity value; please update in curve parameters")
|
||||
|
||||
weierstrass_b = config["weierstrass_b"]
|
||||
weierstrass_b_g2_re = config["weierstrass_b_g2_re"]
|
||||
weierstrass_b_g2_im = config["weierstrass_b_g2_im"]
|
||||
g1_gen_x = config["g1_gen_x"]
|
||||
g1_gen_y = config["g1_gen_y"]
|
||||
g2_generator_x_re = config["g2_gen_x_re"]
|
||||
g2_generator_x_im = config["g2_gen_x_im"]
|
||||
g2_generator_y_re = config["g2_gen_y_re"]
|
||||
g2_generator_y_im = config["g2_gen_y_im"]
|
||||
|
||||
params = {
|
||||
'curve_name_U': curve_name.upper(),
|
||||
'fp_num_limbs': limb_p,
|
||||
'fq_num_limbs': limb_q,
|
||||
'fp_modulus_bit_count': bit_count_p,
|
||||
'fq_modulus_bit_count': bit_count_q,
|
||||
'num_omegas': ntt_size
|
||||
}
|
||||
|
||||
fp_params = get_fp_params(modulus_p, bit_count_p, limb_p, root_of_unity, ntt_size)
|
||||
fq_params = get_fq_params(modulus_q, bit_count_q, limb_q, g1_gen_x, g1_gen_y, g2_generator_x_re, g2_generator_x_im, g2_generator_y_re, g2_generator_y_im)
|
||||
weier_params = get_weier_params(weierstrass_b, weierstrass_b_g2_re, weierstrass_b_g2_im, 8*limb_q)
|
||||
|
||||
return {
|
||||
**params,
|
||||
**fp_params,
|
||||
**fq_params,
|
||||
**weier_params
|
||||
}
|
||||
|
||||
|
||||
config = None
|
||||
with open(sys.argv[1]) as json_file:
|
||||
config = json.load(json_file)
|
||||
|
||||
curve_name_lower = config["curve_name"].lower()
|
||||
curve_name_upper = config["curve_name"].upper()
|
||||
limb_q = config["limb_q"]
|
||||
limb_p = config["limb_p"]
|
||||
|
||||
# Create Cuda interface
|
||||
|
||||
newpath = "./icicle-cuda/curves/"+curve_name
|
||||
newpath = f'./icicle/curves/{curve_name_lower}'
|
||||
if not os.path.exists(newpath):
|
||||
os.makedirs(newpath)
|
||||
|
||||
fc = get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b)
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/params.cuh", "w")
|
||||
n = text_file.write(fc)
|
||||
text_file.close()
|
||||
with open("./icicle/curves/curve_template/params.cuh", "r") as params_file:
|
||||
params_file_template = Template(params_file.read())
|
||||
params = get_params(config)
|
||||
params_content = params_file_template.safe_substitute(params)
|
||||
with open(f'./icicle/curves/{curve_name_lower}/params.cuh', 'w') as f:
|
||||
f.write(params_content)
|
||||
|
||||
with open("./icicle-cuda/curves/curve_template/lde.cu", "r") as lde_file:
|
||||
content = lde_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/lde.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
|
||||
template_content = Template(lde_file.read())
|
||||
lde_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
CURVE_NAME_L=curve_name_lower
|
||||
)
|
||||
with open(f'./icicle/curves/{curve_name_lower}/lde.cu', 'w') as f:
|
||||
f.write(lde_content)
|
||||
|
||||
with open("./icicle-cuda/curves/curve_template/msm.cu", "r") as msm_file:
|
||||
content = msm_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/msm.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
|
||||
template_content = Template(msm_file.read())
|
||||
msm_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
CURVE_NAME_L=curve_name_lower
|
||||
)
|
||||
with open(f'./icicle/curves/{curve_name_lower}/msm.cu', 'w') as f:
|
||||
f.write(msm_content)
|
||||
|
||||
with open("./icicle-cuda/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
|
||||
content = ve_mod_mult_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle-cuda/curves/"+curve_name+"/ve_mod_mult.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
|
||||
template_content = Template(ve_mod_mult_file.read())
|
||||
ve_mod_mult_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
CURVE_NAME_L=curve_name_lower
|
||||
)
|
||||
with open(f'./icicle/curves/{curve_name_lower}/ve_mod_mult.cu', 'w') as f:
|
||||
f.write(ve_mod_mult_content)
|
||||
|
||||
|
||||
namespace = '#include "params.cuh"\n'+'''namespace CURVE_NAME_U {
|
||||
typedef Field<PARAMS_CURVE_NAME_U::fp_config> scalar_field_t;\
|
||||
typedef scalar_field_t scalar_t;\
|
||||
typedef Field<PARAMS_CURVE_NAME_U::fq_config> point_field_t;
|
||||
typedef Projective<point_field_t, scalar_field_t, PARAMS_CURVE_NAME_U::group_generator, PARAMS_CURVE_NAME_U::weierstrass_b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
}'''
|
||||
|
||||
with open('./icicle-cuda/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
|
||||
f.write(namespace.replace("CURVE_NAME_U",curve_name.upper()))
|
||||
with open(f'./icicle/curves/curve_template/curve_config.cuh', 'r') as cc:
|
||||
template_content = Template(cc.read())
|
||||
cc_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
)
|
||||
with open(f'./icicle/curves/{curve_name_lower}/curve_config.cuh', 'w') as f:
|
||||
f.write(cc_content)
|
||||
|
||||
|
||||
eq = '''
|
||||
#include <cuda.h>\n
|
||||
#include "curve_config.cuh"\n
|
||||
#include "../../primitives/projective.cuh"\n
|
||||
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2);
|
||||
}'''
|
||||
|
||||
with open('./icicle-cuda/curves/'+curve_name+'/projective.cu', 'w') as f:
|
||||
f.write(eq.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
|
||||
with open(f'./icicle/curves/curve_template/projective.cu', 'r') as proj:
|
||||
template_content = Template(proj.read())
|
||||
proj_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
CURVE_NAME_L=curve_name_lower
|
||||
)
|
||||
with open(f'./icicle/curves/{curve_name_lower}/projective.cu', 'w') as f:
|
||||
f.write(proj_content)
|
||||
|
||||
supported_operations = '''
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
'''
|
||||
|
||||
with open('./icicle-cuda/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
|
||||
f.write(supported_operations.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
|
||||
|
||||
with open('./icicle-cuda/curves/index.cu', 'a') as f:
|
||||
f.write('\n#include "'+curve_name.lower()+'/supported_operations.cu"')
|
||||
with open(f'./icicle/curves/curve_template/supported_operations.cu', 'r') as supp_ops:
|
||||
template_content = Template(supp_ops.read())
|
||||
supp_ops_content = template_content.safe_substitute()
|
||||
with open(f'./icicle/curves/{curve_name_lower}/supported_operations.cu', 'w') as f:
|
||||
f.write(supp_ops_content)
|
||||
|
||||
|
||||
with open('./icicle/curves/index.cu', 'r+') as f:
|
||||
index_text = f.read()
|
||||
if index_text.find(curve_name_lower) == -1:
|
||||
f.write(f'\n#include "{curve_name_lower}/supported_operations.cu"')
|
||||
|
||||
|
||||
|
||||
@@ -168,36 +277,40 @@ with open('./icicle-cuda/curves/index.cu', 'a') as f:
|
||||
if limb_p == limb_q:
|
||||
with open("./src/curve_templates/curve_same_limbs.rs", "r") as curve_file:
|
||||
content = curve_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
content = content.replace("CURVE_NAME_U",curve_name_upper)
|
||||
content = content.replace("CURVE_NAME_L",curve_name_lower)
|
||||
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
|
||||
content = content.replace("limbs_p",str(limb_p))
|
||||
text_file = open("./src/curves/"+curve_name+".rs", "w")
|
||||
text_file = open("./src/curves/"+curve_name_lower+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
else:
|
||||
with open("./src/curve_templates/curve_different_limbs.rs", "r") as curve_file:
|
||||
content = curve_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
content = content.replace("CURVE_NAME_U",curve_name_upper)
|
||||
content = content.replace("CURVE_NAME_L",curve_name_lower)
|
||||
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
|
||||
content = content.replace("limbs_p",str(limb_p))
|
||||
content = content.replace("_limbs_q",str(limb_q * 8 * 4))
|
||||
content = content.replace("limbs_q",str(limb_q))
|
||||
text_file = open("./src/curves/"+curve_name+".rs", "w")
|
||||
text_file = open("./src/curves/"+curve_name_lower+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open("./src/curve_templates/test.rs", "r") as test_file:
|
||||
content = test_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./src/test_"+curve_name+".rs", "w")
|
||||
content = content.replace("CURVE_NAME_U",curve_name_upper)
|
||||
content = content.replace("CURVE_NAME_L",curve_name_lower)
|
||||
text_file = open("./src/test_"+curve_name_lower+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open('./src/curves/mod.rs', 'a') as f:
|
||||
f.write('\n pub mod ' + curve_name + ';')
|
||||
with open('./src/curves/mod.rs', 'r+') as f:
|
||||
mod_text = f.read()
|
||||
if mod_text.find(curve_name_lower) == -1:
|
||||
f.write('\npub mod ' + curve_name_lower + ';')
|
||||
|
||||
with open('./src/lib.rs', 'a') as f:
|
||||
f.write('\npub mod ' + curve_name + ';')
|
||||
with open('./src/lib.rs', 'r+') as f:
|
||||
lib_text = f.read()
|
||||
if lib_text.find(curve_name_lower) == -1:
|
||||
f.write('\npub mod ' + curve_name_lower + ';')
|
||||
@@ -1,49 +0,0 @@
|
||||
[package]
|
||||
name = "icicle-core"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = [ "Ingonyama" ]
|
||||
description = "An implementation of the Ingonyama CUDA Library"
|
||||
homepage = "https://www.ingonyama.com"
|
||||
repository = "https://github.com/ingonyama-zk/icicle"
|
||||
|
||||
[[bench]]
|
||||
name = "ntt"
|
||||
path = "benches/ntt.rs"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "msm"
|
||||
path = "benches/msm.rs"
|
||||
harness = false
|
||||
|
||||
[dependencies]
|
||||
hex = "*"
|
||||
ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-381 = "0.3.0"
|
||||
ark-bls12-377 = "0.3.0"
|
||||
ark-bn254 = "0.3.0"
|
||||
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_derive = "1.0"
|
||||
serde_cbor = "0.11.2"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
rustacuda_derive = "0.1"
|
||||
|
||||
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
|
||||
|
||||
[build-dependencies]
|
||||
cc = { version = "1.0", features = ["parallel"] }
|
||||
|
||||
[dev-dependencies]
|
||||
"criterion" = "0.4.0"
|
||||
|
||||
[features]
|
||||
default = ["bls12-381"]
|
||||
bls12-381 = ["ark-bls12-381/curve"]
|
||||
g2 = []
|
||||
@@ -1,4 +0,0 @@
|
||||
pub trait Field<const NUM_LIMBS: usize> {
|
||||
const MODOLUS: [u32;NUM_LIMBS];
|
||||
const LIMBS: usize = NUM_LIMBS;
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
pub mod field;
|
||||
pub mod scalar;
|
||||
pub mod point;
|
||||
@@ -1,108 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use super::scalar::{get_fixed_limbs, self};
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointT<BF: scalar::ScalarTrait> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
pub z: BF,
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
|
||||
fn default() -> Self {
|
||||
PointT::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn zero() -> Self {
|
||||
PointT {
|
||||
x: BF::zero(),
|
||||
y: BF::one(),
|
||||
z: BF::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinityT<BF> {
|
||||
pub x: BF,
|
||||
pub y: BF,
|
||||
}
|
||||
|
||||
impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::zero(),
|
||||
y: BF::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinityT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> PointT<BF> {
|
||||
PointT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BF::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<BF: Copy + scalar::ScalarTrait> PointT<BF> {
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
PointT {
|
||||
x: BF::from_limbs(x),
|
||||
y: BF::from_limbs(y),
|
||||
z: BF::from_limbs(z)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
|
||||
PointT {
|
||||
x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
|
||||
y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
|
||||
z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
|
||||
PointAffineNoInfinityT {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,102 +0,0 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::mem::transmute;
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination};
|
||||
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
use std::marker::PhantomData;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use super::field::{Field, self};
|
||||
|
||||
pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
pub trait ScalarTrait{
|
||||
fn base_limbs() -> usize;
|
||||
fn zero() -> Self;
|
||||
fn from_limbs(value: &[u32]) -> Self;
|
||||
fn one() -> Self;
|
||||
fn to_bytes_le(&self) -> Vec<u8>;
|
||||
fn limbs(&self) -> &[u32];
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy)]
|
||||
#[repr(C)]
|
||||
pub struct ScalarT<M, const NUM_LIMBS: usize> {
|
||||
pub(crate) phantom: PhantomData<M>,
|
||||
pub(crate) value : [u32; NUM_LIMBS]
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
|
||||
where
|
||||
M: Field<NUM_LIMBS>,
|
||||
{
|
||||
|
||||
fn base_limbs() -> usize {
|
||||
return NUM_LIMBS;
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
ScalarT {
|
||||
value: [0u32; NUM_LIMBS],
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
value: get_fixed_limbs(value),
|
||||
phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
ScalarT { value: s, phantom: PhantomData }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.value
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
fn limbs(&self) -> &[u32] {
|
||||
&self.value
|
||||
}
|
||||
}
|
||||
|
||||
impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
|
||||
pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
Self::from_limbs(value)
|
||||
}
|
||||
|
||||
pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
|
||||
let mut value = value.to_vec();
|
||||
value.reverse();
|
||||
Self::from_limbs_le(&value)
|
||||
}
|
||||
|
||||
// Additional Functions
|
||||
pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{ // overload +
|
||||
return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData };
|
||||
}
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
pub mod utils;
|
||||
pub mod basic_structs;
|
||||
@@ -1,42 +0,0 @@
|
||||
use rand::RngCore;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::SeedableRng;
|
||||
|
||||
pub fn from_limbs<T>(limbs: Vec<u32>, chunk_size: usize, f: fn(&[u32]) -> T) -> Vec<T> {
|
||||
let points = limbs
|
||||
.chunks(chunk_size)
|
||||
.map(|lmbs| f(lmbs))
|
||||
.collect::<Vec<T>>();
|
||||
points
|
||||
}
|
||||
|
||||
pub fn u32_vec_to_u64_vec(arr_u32: &[u32]) -> Vec<u64> {
|
||||
let len = (arr_u32.len() / 2) as usize;
|
||||
let mut arr_u64 = vec![0u64; len];
|
||||
|
||||
for i in 0..len {
|
||||
arr_u64[i] = u64::from(arr_u32[i * 2]) | (u64::from(arr_u32[i * 2 + 1]) << 32);
|
||||
}
|
||||
|
||||
arr_u64
|
||||
}
|
||||
|
||||
pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
|
||||
let len = arr_u64.len() * 2;
|
||||
let mut arr_u32 = vec![0u32; len];
|
||||
|
||||
for i in 0..arr_u64.len() {
|
||||
arr_u32[i * 2] = arr_u64[i] as u32;
|
||||
arr_u32[i * 2 + 1] = (arr_u64[i] >> 32) as u32;
|
||||
}
|
||||
|
||||
arr_u32
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> { //TOOD: this func is universal
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
};
|
||||
rng
|
||||
}
|
||||
@@ -1,4 +0,0 @@
|
||||
test_msm:
|
||||
mkdir -p work
|
||||
nvcc -o work/test_msm -I. tests/msm_test.cu
|
||||
work/test_msm
|
||||
@@ -1,133 +0,0 @@
|
||||
#pragma once
|
||||
#include "constants.cuh"
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <chrono>
|
||||
|
||||
#define ARITY 3
|
||||
|
||||
template <typename S>
|
||||
__host__ void print_buffer_from_cuda(S * device_ptr, size_t size) {
|
||||
S * buffer = static_cast< S * >(malloc(size * sizeof(S)));
|
||||
cudaMemcpy(buffer, device_ptr, size * sizeof(S), cudaMemcpyDeviceToHost);
|
||||
|
||||
std::cout << "Start print" << std::endl;
|
||||
for(int i = 0; i < size / ARITY; i++) {
|
||||
std::cout << "State #" << i << std::endl;
|
||||
for (int j = 0; j < ARITY; j++) {
|
||||
std::cout << buffer[i * ARITY + j] << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
free(buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
template <typename S>
|
||||
__device__ void print_scalar(S element, int data) {
|
||||
printf("D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
|
||||
data,
|
||||
threadIdx.x,
|
||||
element.limbs_storage.limbs[0],
|
||||
element.limbs_storage.limbs[1],
|
||||
element.limbs_storage.limbs[2],
|
||||
element.limbs_storage.limbs[3],
|
||||
element.limbs_storage.limbs[4],
|
||||
element.limbs_storage.limbs[5],
|
||||
element.limbs_storage.limbs[6],
|
||||
element.limbs_storage.limbs[7]
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename S>
|
||||
struct PoseidonConfiguration {
|
||||
uint32_t partial_rounds, full_rounds_half, t;
|
||||
S * round_constants, * mds_matrix, * non_sparse_matrix, *sparse_matrices;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
class Poseidon {
|
||||
public:
|
||||
uint32_t t;
|
||||
PoseidonConfiguration<S> config;
|
||||
|
||||
enum HashType {
|
||||
ConstInputLen,
|
||||
MerkleTree,
|
||||
};
|
||||
|
||||
Poseidon(const uint32_t arity) {
|
||||
t = arity + 1;
|
||||
this->config.t = t;
|
||||
|
||||
// Pre-calculate domain tags
|
||||
// Domain tags will vary for different applications of Poseidon
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
|
||||
tree_domain_tag = S::from(tree_domain_tag_value);
|
||||
|
||||
const_input_no_pad_domain_tag = S::one();
|
||||
|
||||
// TO-DO: implement binary shifts for scalar type
|
||||
// const_input_no_pad_domain_tag = S::one() << 64;
|
||||
// const_input_no_pad_domain_tag *= S::from(arity);
|
||||
|
||||
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
|
||||
|
||||
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
|
||||
uint32_t mds_matrix_len = t * t;
|
||||
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
|
||||
|
||||
// All the constants are stored in a single file
|
||||
S * constants = load_constants<S>(arity);
|
||||
|
||||
S * mds_offset = constants + round_constants_len;
|
||||
S * non_sparse_offset = mds_offset + mds_matrix_len;
|
||||
S * sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
|
||||
#endif
|
||||
|
||||
// Allocate the memory for constants
|
||||
cudaMalloc(&this->config.round_constants, sizeof(S) * round_constants_len);
|
||||
cudaMalloc(&this->config.mds_matrix, sizeof(S) * mds_matrix_len);
|
||||
cudaMalloc(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len);
|
||||
cudaMalloc(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len);
|
||||
|
||||
// Copy the constants to device
|
||||
cudaMemcpy(this->config.round_constants, constants,
|
||||
sizeof(S) * round_constants_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(this->config.mds_matrix, mds_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(this->config.non_sparse_matrix, non_sparse_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(this->config.sparse_matrices, sparse_matrices_offset,
|
||||
sizeof(S) * sparse_matrices_len,
|
||||
cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
~Poseidon() {
|
||||
cudaFree(this->config.round_constants);
|
||||
cudaFree(this->config.mds_matrix);
|
||||
cudaFree(this->config.non_sparse_matrix);
|
||||
cudaFree(this->config.sparse_matrices);
|
||||
}
|
||||
|
||||
// Hash multiple preimages in parallel
|
||||
void hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type);
|
||||
|
||||
private:
|
||||
S tree_domain_tag, const_input_no_pad_domain_tag;
|
||||
};
|
||||
@@ -1,48 +0,0 @@
|
||||
#define DEBUG
|
||||
|
||||
#include "../../curves/bls12_381/curve_config.cuh"
|
||||
#include "../../curves/bls12_381/poseidon.cu"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <fstream>
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
const int arity = 2;
|
||||
const int t = arity + 1;
|
||||
|
||||
Poseidon<BLS12_381::scalar_t> poseidon(arity);
|
||||
|
||||
int number_of_blocks = 4;
|
||||
|
||||
BLS12_381::scalar_t input = BLS12_381::scalar_t::zero();
|
||||
BLS12_381::scalar_t * in_ptr = static_cast< BLS12_381::scalar_t * >(malloc(number_of_blocks * arity * sizeof(BLS12_381::scalar_t)));
|
||||
for (uint32_t i = 0; i < number_of_blocks * arity; i++) {
|
||||
// std::cout << input << std::endl;
|
||||
in_ptr[i] = input;
|
||||
input = input + BLS12_381::scalar_t::one();
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
BLS12_381::scalar_t * out_ptr = static_cast< BLS12_381::scalar_t * >(malloc(number_of_blocks * sizeof(BLS12_381::scalar_t)));
|
||||
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
|
||||
poseidon.hash_blocks(in_ptr, number_of_blocks, out_ptr, Poseidon<BLS12_381::scalar_t>::HashType::MerkleTree);
|
||||
|
||||
#ifdef DEBUG
|
||||
for (int i = 0; i < number_of_blocks; i++) {
|
||||
std::cout << out_ptr[i] << std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
|
||||
free(in_ptr);
|
||||
free(out_ptr);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,28 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../primitives/field.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
|
||||
#if defined(FEATURE_BLS12_381)
|
||||
#include "bls12_381/params.cuh"
|
||||
#elif defined(FEATURE_BLS12_377)
|
||||
#include "bls12_377/params.cuh"
|
||||
#elif defined(FEATURE_BN254)
|
||||
#include "bn254/params.cuh"
|
||||
#else
|
||||
# error "no FEATURE"
|
||||
#endif
|
||||
|
||||
typedef Field<PARAMS::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
@@ -1,26 +0,0 @@
|
||||
#ifndef _POSEIDON
|
||||
#define _POSEIDON
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
#include "../appUtils/poseidon/poseidon.cu"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
template class Poseidon<scalar_t>;
|
||||
|
||||
extern "C" int poseidon_multi_cuda(scalar_t input[], scalar_t* out,
|
||||
size_t number_of_blocks, int arity, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
Poseidon<scalar_t> poseidon(arity);
|
||||
poseidon.hash_blocks(input, number_of_blocks, out, Poseidon<scalar_t>::HashType::MerkleTree);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,19 +0,0 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq(projective_t *point1, projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == point_field_t::zero()) && (point1->y == point_field_t::zero()) && (point1->z == point_field_t::zero())) &&
|
||||
!((point2->x == point_field_t::zero()) && (point2->y == point_field_t::zero()) && (point2->z == point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2(g2_projective_t *point1, g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == g2_point_field_t::zero()) && (point1->y == g2_point_field_t::zero()) && (point1->z == g2_point_field_t::zero())) &&
|
||||
!((point2->x == g2_point_field_t::zero()) && (point2->y == g2_point_field_t::zero()) && (point2->z == g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
9
icicle/appUtils/msm/Makefile
Normal file
9
icicle/appUtils/msm/Makefile
Normal file
@@ -0,0 +1,9 @@
|
||||
test_msm:
|
||||
mkdir -p work
|
||||
nvcc -o work/test_msm -I. tests/msm_test.cu
|
||||
work/test_msm
|
||||
|
||||
test_msm_debug:
|
||||
mkdir -p work
|
||||
nvcc -o work/test_msm_debug -I. tests/msm_test.cu -g -G
|
||||
work/test_msm_debug
|
||||
2300
icicle/appUtils/msm/msm.cu
Normal file
2300
icicle/appUtils/msm/msm.cu
Normal file
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@
|
||||
#pragma once
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream);
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, bool big_triangle, cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
|
||||
@@ -12,7 +12,7 @@ template <typename S, typename P, typename A>
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream);
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, bool big_triangle, cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);
|
||||
@@ -3,6 +3,7 @@
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include <cooperative_groups.h>
|
||||
#include "../../primitives/affine.cuh"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
@@ -14,10 +15,24 @@
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "msm.cuh"
|
||||
|
||||
#define MAX_TH 256
|
||||
|
||||
#define BIG_TRIANGLE
|
||||
// #define SIGNED_DIG //WIP
|
||||
// #define BIG_TRIANGLE
|
||||
// #define SSM_SUM //WIP
|
||||
|
||||
template <typename P>
|
||||
__global__ void single_stage_multi_reduction_kernel(P *v, P *v_r, unsigned block_size, unsigned write_stride, unsigned write_phase, unsigned padding) {
|
||||
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
int tid_p = padding? (tid/(2*padding))*padding + tid%padding: tid;
|
||||
int jump =block_size/2;
|
||||
int block_id = tid_p/jump;
|
||||
int block_tid = tid_p%jump;
|
||||
unsigned read_ind = block_size*block_id + block_tid;
|
||||
unsigned write_ind = tid;
|
||||
v_r[write_stride? ((write_ind/write_stride)*2 + write_phase)*write_stride + write_ind%write_stride : write_ind] = padding? (tid%(2*padding)<padding)? v[read_ind] + v[read_ind + jump] : P::zero() :v[read_ind] + v[read_ind + jump];
|
||||
}
|
||||
|
||||
//this kernel performs single scalar multiplication
|
||||
//each thread multilies a single scalar and point
|
||||
template <typename P, typename S>
|
||||
@@ -62,42 +77,165 @@ __global__ void initialize_buckets_kernel(P *buckets, unsigned N) {
|
||||
//this kernel splits the scalars into digits of size c
|
||||
//each thread splits a single scalar into nof_bms digits
|
||||
template <typename S>
|
||||
__global__ void split_scalars_kernel(unsigned *buckets_indices, unsigned *point_indices, S *scalars, unsigned total_size, unsigned msm_log_size, unsigned nof_bms, unsigned bm_bitsize, unsigned c){
|
||||
__global__ void split_scalars_kernel(unsigned *buckets_indices, unsigned *point_indices, S *scalars, unsigned total_size, unsigned msm_log_size, unsigned nof_bms, unsigned bm_bitsize, unsigned c, unsigned top_bm_nof_missing_bits){
|
||||
|
||||
constexpr unsigned sign_mask = 0x80000000;
|
||||
// constexpr unsigned trash_bucket = 0x80000000;
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
unsigned bucket_index;
|
||||
unsigned bucket_index2;
|
||||
unsigned current_index;
|
||||
unsigned msm_index = tid >> msm_log_size;
|
||||
unsigned borrow = 0;
|
||||
if (tid < total_size){
|
||||
S scalar = scalars[tid];
|
||||
|
||||
for (unsigned bm = 0; bm < nof_bms; bm++)
|
||||
{
|
||||
bucket_index = scalar.get_scalar_digit(bm, c);
|
||||
#ifdef SIGNED_DIG
|
||||
bucket_index += borrow;
|
||||
borrow = 0;
|
||||
unsigned sign = 0;
|
||||
if (bucket_index > (1<<(c-1))) {
|
||||
bucket_index = (1 << c) - bucket_index;
|
||||
borrow = 1;
|
||||
sign = sign_mask;
|
||||
}
|
||||
#endif
|
||||
current_index = bm * total_size + tid;
|
||||
#ifdef SIGNED_DIG
|
||||
point_indices[current_index] = sign | tid; //the point index is saved for later
|
||||
#else
|
||||
buckets_indices[current_index] = (msm_index<<(c+bm_bitsize)) | (bm<<c) | bucket_index; //the bucket module number and the msm number are appended at the msbs
|
||||
if (scalar == S::zero() || scalar == S::one() || bucket_index==0) buckets_indices[current_index] = 0; //will be skipped
|
||||
point_indices[current_index] = tid; //the point index is saved for later
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename P, typename A, typename S>
|
||||
__global__ void add_ones_kernel(A *points, S* scalars, P* results, const unsigned msm_size, const unsigned run_length){
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
const unsigned nof_threads = (msm_size + run_length - 1)/run_length; //129256
|
||||
if (tid>=nof_threads) {
|
||||
results[tid] = P::zero();
|
||||
return;
|
||||
}
|
||||
const unsigned start_index = tid*run_length;
|
||||
P sum = P::zero();
|
||||
for (int i=start_index;i<min(start_index+run_length,msm_size);i++){
|
||||
if (scalars[i] == S::one()) sum = sum + points[i];
|
||||
}
|
||||
results[tid] = sum;
|
||||
}
|
||||
|
||||
__global__ void find_cutoff_kernel(unsigned *v, unsigned size, unsigned cutoff, unsigned run_length, unsigned *result){
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
const unsigned nof_threads = (size + run_length - 1)/run_length;
|
||||
if (tid>=nof_threads) {
|
||||
return;
|
||||
}
|
||||
const unsigned start_index = tid*run_length;
|
||||
for (int i=start_index;i<min(start_index+run_length,size-1);i++){
|
||||
if (v[i] > cutoff && v[i+1] <= cutoff) {
|
||||
result[0] = i+1;
|
||||
return;
|
||||
}
|
||||
if (i == size - 1) {
|
||||
result[0] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void find_max_size(unsigned *bucket_sizes,unsigned *single_bucket_indices,unsigned c, unsigned *largest_bucket_size){
|
||||
for (int i=0;;i++){
|
||||
if (single_bucket_indices[i]&((1<<c)-1)){
|
||||
largest_bucket_size[0] = bucket_sizes[i];
|
||||
largest_bucket_size[1] = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//this kernel adds up the points in each bucket
|
||||
template <typename P, typename A>
|
||||
// __global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
|
||||
// unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
|
||||
__global__ void accumulate_buckets_kernel(P *buckets, unsigned *bucket_offsets, unsigned *bucket_sizes, unsigned *single_bucket_indices, unsigned *point_indices, A *points, unsigned nof_buckets, unsigned *nof_buckets_to_compute, unsigned msm_idx_shift){
|
||||
// unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
|
||||
template <typename P, typename A>
|
||||
__global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets, unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, const unsigned *__restrict__ point_indices, A *__restrict__ points, const unsigned nof_buckets, const unsigned nof_buckets_to_compute, const unsigned msm_idx_shift, const unsigned c){
|
||||
|
||||
constexpr unsigned sign_mask = 0x80000000;
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid >= *nof_buckets_to_compute){
|
||||
if (tid>=nof_buckets_to_compute){
|
||||
return;
|
||||
}
|
||||
if ((single_bucket_indices[tid]&((1<<c)-1))==0)
|
||||
{
|
||||
return; //skip zero buckets
|
||||
}
|
||||
#ifdef SIGNED_DIG //todo - fix
|
||||
const unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
|
||||
const unsigned bm_index = (single_bucket_indices[tid]&((1<<msm_idx_shift)-1))>>c;
|
||||
const unsigned bucket_index = msm_index * nof_buckets + bm_index * ((1<<(c-1))+1) + (single_bucket_indices[tid]&((1<<c)-1));
|
||||
#else
|
||||
unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
|
||||
unsigned bucket_index = msm_index * nof_buckets + (single_bucket_indices[tid]&((1<<msm_idx_shift)-1));
|
||||
unsigned bucket_offset = bucket_offsets[tid];
|
||||
for (unsigned i = 0; i < bucket_sizes[tid]; i++) //add the relevant points starting from the relevant offset up to the bucket size
|
||||
#endif
|
||||
const unsigned bucket_offset = bucket_offsets[tid];
|
||||
const unsigned bucket_size = bucket_sizes[tid];
|
||||
|
||||
P bucket; //get rid of init buckets? no.. because what about buckets with no points
|
||||
for (unsigned i = 0; i < bucket_size; i++) //add the relevant points starting from the relevant offset up to the bucket size
|
||||
{
|
||||
buckets[bucket_index] = buckets[bucket_index] + points[point_indices[bucket_offset+i]];
|
||||
unsigned point_ind = point_indices[bucket_offset+i];
|
||||
#ifdef SIGNED_DIG
|
||||
unsigned sign = point_ind & sign_mask;
|
||||
point_ind &= ~sign_mask;
|
||||
A point = points[point_ind];
|
||||
if (sign) point = A::neg(point);
|
||||
#else
|
||||
A point = points[point_ind];
|
||||
#endif
|
||||
bucket = i? bucket + point : P::from_affine(point);
|
||||
}
|
||||
buckets[bucket_index] = bucket;
|
||||
}
|
||||
|
||||
template <typename P, typename A>
|
||||
__global__ void accumulate_large_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets, unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, const unsigned *__restrict__ point_indices, A *__restrict__ points, const unsigned nof_buckets, const unsigned nof_buckets_to_compute, const unsigned msm_idx_shift, const unsigned c, const unsigned threads_per_bucket, const unsigned max_run_length){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
unsigned large_bucket_index = tid/threads_per_bucket;
|
||||
unsigned bucket_segment_index = tid%threads_per_bucket;
|
||||
if (tid>=nof_buckets_to_compute*threads_per_bucket){
|
||||
return;
|
||||
}
|
||||
if ((single_bucket_indices[large_bucket_index]&((1<<c)-1))==0) //dont need
|
||||
{
|
||||
return; //skip zero buckets
|
||||
}
|
||||
unsigned write_bucket_index = bucket_segment_index * nof_buckets_to_compute + large_bucket_index;
|
||||
const unsigned bucket_offset = bucket_offsets[large_bucket_index] + bucket_segment_index*max_run_length;
|
||||
const unsigned bucket_size = bucket_sizes[large_bucket_index] > bucket_segment_index*max_run_length? bucket_sizes[large_bucket_index] - bucket_segment_index*max_run_length :0;
|
||||
P bucket;
|
||||
unsigned run_length = min(bucket_size,max_run_length);
|
||||
for (unsigned i = 0; i < run_length; i++) //add the relevant points starting from the relevant offset up to the bucket size
|
||||
{
|
||||
unsigned point_ind = point_indices[bucket_offset+i];
|
||||
A point = points[point_ind];
|
||||
bucket = i? bucket + point : P::from_affine(point); //init empty buckets
|
||||
}
|
||||
buckets[write_bucket_index] = run_length? bucket : P::zero();
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
__global__ void distribute_large_buckets_kernel(P* large_buckets, P* buckets, unsigned *single_bucket_indices, unsigned size){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid>=size){
|
||||
return;
|
||||
}
|
||||
buckets[single_bucket_indices[tid]] = large_buckets[tid];
|
||||
}
|
||||
|
||||
//this kernel sums the entire bucket module
|
||||
@@ -106,12 +244,17 @@ template <typename P>
|
||||
__global__ void big_triangle_sum_kernel(P* buckets, P* final_sums, unsigned nof_bms, unsigned c){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid >= nof_bms) return;
|
||||
P line_sum = buckets[(tid+1)*(1<<c)-1];
|
||||
if (tid>=nof_bms) return;
|
||||
#ifdef SIGNED_DIG
|
||||
unsigned buckets_in_bm = (1<<c)+1;
|
||||
#else
|
||||
unsigned buckets_in_bm = (1<<c);
|
||||
#endif
|
||||
P line_sum = buckets[(tid+1)*buckets_in_bm-1];
|
||||
final_sums[tid] = line_sum;
|
||||
for (unsigned i = (1<<c)-2; i >0; i--)
|
||||
for (unsigned i = buckets_in_bm-2; i >0; i--)
|
||||
{
|
||||
line_sum = line_sum + buckets[tid*(1<<c) + i]; //using the running sum method
|
||||
line_sum = line_sum + buckets[tid*buckets_in_bm + i]; //using the running sum method
|
||||
final_sums[tid] = final_sums[tid] + line_sum;
|
||||
}
|
||||
}
|
||||
@@ -130,10 +273,17 @@ __global__ void ssm_buckets_kernel(P* buckets, unsigned* single_bucket_indices,
|
||||
|
||||
}
|
||||
|
||||
template <typename P>
|
||||
__global__ void last_pass_kernel(P*final_buckets, P*final_sums, unsigned num_sums){
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid>num_sums) return;
|
||||
final_sums[tid] = final_buckets[2*tid+1];
|
||||
}
|
||||
|
||||
//this kernel computes the final result using the double and add algorithm
|
||||
//it is done by a single thread
|
||||
template <typename P, typename S>
|
||||
__global__ void final_accumulation_kernel(P* final_sums, P* final_results, unsigned nof_msms, unsigned nof_bms, unsigned c){
|
||||
__global__ void final_accumulation_kernel(P* final_sums, P* ones_result, P* final_results, unsigned nof_msms, unsigned nof_bms, unsigned c){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid>nof_msms) return;
|
||||
@@ -146,13 +296,14 @@ __global__ void final_accumulation_kernel(P* final_sums, P* final_results, unsig
|
||||
final_result = final_result + final_result;
|
||||
}
|
||||
}
|
||||
final_results[tid] = final_result + final_sums[tid*nof_bms];
|
||||
final_results[tid] = final_result + final_sums[tid*nof_bms] + ones_result[0];
|
||||
// final_results[tid] = final_result + final_sums[tid*nof_bms];
|
||||
|
||||
}
|
||||
|
||||
//this function computes msm using the bucket method
|
||||
template <typename S, typename P, typename A>
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream) {
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, bool big_triangle, cudaStream_t stream) {
|
||||
|
||||
S *d_scalars;
|
||||
A *d_points;
|
||||
@@ -173,11 +324,15 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
unsigned nof_bms = bitsize/c;
|
||||
unsigned msm_log_size = ceil(log2(size));
|
||||
unsigned bm_bitsize = ceil(log2(nof_bms));
|
||||
|
||||
if (bitsize%c){
|
||||
nof_bms++;
|
||||
}
|
||||
unsigned top_bm_nof_missing_bits = c*nof_bms - bitsize;
|
||||
#ifdef SIGNED_DIG
|
||||
unsigned nof_buckets = nof_bms*((1<<(c-1))+1); //signed digits
|
||||
#else
|
||||
unsigned nof_buckets = nof_bms<<c;
|
||||
#endif
|
||||
cudaMallocAsync(&buckets, sizeof(P) * nof_buckets, stream);
|
||||
|
||||
// launch the bucket initialization kernel with maximum threads
|
||||
@@ -185,6 +340,21 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
unsigned NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, nof_buckets);
|
||||
|
||||
//accumulate ones
|
||||
P *ones_results; //fix whole division, in last run in kernel too
|
||||
const unsigned nof_runs = msm_log_size > 10? (1<<(msm_log_size-6)) : 16;
|
||||
const unsigned run_length = (size + nof_runs -1)/nof_runs;
|
||||
cudaMallocAsync(&ones_results, sizeof(P) * nof_runs, stream);
|
||||
NUM_THREADS = min(1 << 8,nof_runs);
|
||||
NUM_BLOCKS = (nof_runs + NUM_THREADS - 1) / NUM_THREADS;
|
||||
add_ones_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_points, d_scalars, ones_results, size, run_length);
|
||||
|
||||
for (int s=nof_runs>>1;s>0;s>>=1){
|
||||
NUM_THREADS = min(MAX_TH,s);
|
||||
NUM_BLOCKS = (s + NUM_THREADS - 1) / NUM_THREADS;
|
||||
single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS,0,stream>>>(ones_results,ones_results,s*2,0,0,0);
|
||||
}
|
||||
|
||||
unsigned *bucket_indices;
|
||||
unsigned *point_indices;
|
||||
cudaMallocAsync(&bucket_indices, sizeof(unsigned) * size * (nof_bms+1), stream);
|
||||
@@ -194,8 +364,8 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
NUM_THREADS = 1 << 10;
|
||||
NUM_BLOCKS = (size * (nof_bms+1) + NUM_THREADS - 1) / NUM_THREADS;
|
||||
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size,
|
||||
nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
|
||||
|
||||
nof_bms, bm_bitsize, c, top_bm_nof_missing_bits); //+size - leaving the first bm free for the out of place sort later
|
||||
|
||||
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
|
||||
unsigned *sort_indices_temp_storage{};
|
||||
size_t sort_indices_temp_storage_bytes;
|
||||
@@ -240,11 +410,109 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets, stream);
|
||||
cudaFreeAsync(offsets_temp_storage, stream);
|
||||
|
||||
//sort by bucket sizes
|
||||
unsigned h_nof_buckets_to_compute;
|
||||
cudaMemcpyAsync(&h_nof_buckets_to_compute, nof_buckets_to_compute, sizeof(unsigned), cudaMemcpyDeviceToHost, stream);
|
||||
|
||||
unsigned* sorted_bucket_sizes;
|
||||
cudaMallocAsync(&sorted_bucket_sizes, sizeof(unsigned)*h_nof_buckets_to_compute, stream);
|
||||
unsigned* sorted_bucket_offsets;
|
||||
cudaMallocAsync(&sorted_bucket_offsets, sizeof(unsigned)*h_nof_buckets_to_compute, stream);
|
||||
unsigned* sort_offsets_temp_storage{};
|
||||
size_t sort_offsets_temp_storage_bytes = 0;
|
||||
cub::DeviceRadixSort::SortPairsDescending(sort_offsets_temp_storage, sort_offsets_temp_storage_bytes, bucket_sizes,
|
||||
sorted_bucket_sizes, bucket_offsets, sorted_bucket_offsets, h_nof_buckets_to_compute, 0, sizeof(unsigned) * 8, stream);
|
||||
cudaMallocAsync(&sort_offsets_temp_storage, sort_offsets_temp_storage_bytes, stream);
|
||||
cub::DeviceRadixSort::SortPairsDescending(sort_offsets_temp_storage, sort_offsets_temp_storage_bytes, bucket_sizes,
|
||||
sorted_bucket_sizes, bucket_offsets, sorted_bucket_offsets, h_nof_buckets_to_compute, 0, sizeof(unsigned) * 8, stream);
|
||||
cudaFreeAsync(sort_offsets_temp_storage, stream);
|
||||
|
||||
unsigned* sorted_single_bucket_indices;
|
||||
cudaMallocAsync(&sorted_single_bucket_indices, sizeof(unsigned)*h_nof_buckets_to_compute, stream);
|
||||
unsigned* sort_single_temp_storage{};
|
||||
size_t sort_single_temp_storage_bytes = 0;
|
||||
cub::DeviceRadixSort::SortPairsDescending(sort_single_temp_storage, sort_single_temp_storage_bytes, bucket_sizes,
|
||||
sorted_bucket_sizes, single_bucket_indices, sorted_single_bucket_indices, h_nof_buckets_to_compute, 0, sizeof(unsigned) * 8, stream);
|
||||
cudaMallocAsync(&sort_single_temp_storage, sort_single_temp_storage_bytes, stream);
|
||||
cub::DeviceRadixSort::SortPairsDescending(sort_single_temp_storage, sort_single_temp_storage_bytes, bucket_sizes,
|
||||
sorted_bucket_sizes, single_bucket_indices, sorted_single_bucket_indices, h_nof_buckets_to_compute, 0, sizeof(unsigned) * 8, stream);
|
||||
cudaFreeAsync(sort_single_temp_storage, stream);
|
||||
|
||||
//find large buckets
|
||||
unsigned avarage_size = size/(1<<c);
|
||||
// printf("avarage_size %u\n", avarage_size);
|
||||
float large_bucket_factor = 10; //global param
|
||||
unsigned bucket_th = ceil(large_bucket_factor*avarage_size);
|
||||
// printf("bucket_th %u\n", bucket_th);
|
||||
|
||||
unsigned *nof_large_buckets;
|
||||
cudaMallocAsync(&nof_large_buckets, sizeof(unsigned), stream);
|
||||
|
||||
unsigned TOTAL_THREADS = 129000; //todo - device dependant
|
||||
unsigned cutoff_run_length = max(2,h_nof_buckets_to_compute/TOTAL_THREADS);
|
||||
unsigned cutoff_nof_runs = (h_nof_buckets_to_compute + cutoff_run_length -1)/cutoff_run_length;
|
||||
NUM_THREADS = min(1 << 5,cutoff_nof_runs);
|
||||
NUM_BLOCKS = (cutoff_nof_runs + NUM_THREADS - 1) / NUM_THREADS;
|
||||
find_cutoff_kernel<<<NUM_BLOCKS,NUM_THREADS,0,stream>>>(sorted_bucket_sizes,h_nof_buckets_to_compute,bucket_th,cutoff_run_length,nof_large_buckets);
|
||||
|
||||
unsigned h_nof_large_buckets;
|
||||
cudaMemcpyAsync(&h_nof_large_buckets, nof_large_buckets, sizeof(unsigned), cudaMemcpyDeviceToHost, stream);
|
||||
|
||||
unsigned *max_res;
|
||||
cudaMallocAsync(&max_res, sizeof(unsigned)*2, stream);
|
||||
find_max_size<<<1,1,0,stream>>>(sorted_bucket_sizes,sorted_single_bucket_indices,c,max_res);
|
||||
|
||||
unsigned h_max_res[2];
|
||||
cudaMemcpyAsync(h_max_res, max_res, sizeof(unsigned)*2, cudaMemcpyDeviceToHost, stream);
|
||||
// printf("h_nof_large_buckets %u\n", h_nof_large_buckets);
|
||||
unsigned h_largest_bucket_size = h_max_res[0];
|
||||
unsigned h_nof_zero_large_buckets = h_max_res[1];
|
||||
// printf("h_largest_bucket_size %u\n", h_largest_bucket_size);
|
||||
// printf("h_nof_zero_large_buckets %u\n", h_nof_zero_large_buckets);
|
||||
|
||||
unsigned large_buckets_to_compute = h_nof_large_buckets>h_nof_zero_large_buckets? h_nof_large_buckets-h_nof_zero_large_buckets : 0;
|
||||
|
||||
cudaStream_t stream2;
|
||||
cudaStreamCreate(&stream2);
|
||||
P* large_buckets;
|
||||
|
||||
if (large_buckets_to_compute>0 && bucket_th>0){
|
||||
|
||||
unsigned threads_per_bucket = 1<<(unsigned)ceil(log2((h_largest_bucket_size + bucket_th - 1) / bucket_th)); //global param
|
||||
unsigned max_bucket_size_run_length = (h_largest_bucket_size + threads_per_bucket - 1) / threads_per_bucket;
|
||||
// printf("threads_per_bucket %u\n", threads_per_bucket);
|
||||
// printf("max_bucket_size_run_length %u\n", max_bucket_size_run_length);
|
||||
unsigned total_large_buckets_size = large_buckets_to_compute*threads_per_bucket;
|
||||
cudaMallocAsync(&large_buckets, sizeof(P)*total_large_buckets_size, stream);
|
||||
NUM_THREADS = min(1 << 8,total_large_buckets_size);
|
||||
NUM_BLOCKS = (total_large_buckets_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_large_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream2>>>(large_buckets, sorted_bucket_offsets+h_nof_zero_large_buckets, sorted_bucket_sizes+h_nof_zero_large_buckets, sorted_single_bucket_indices+h_nof_zero_large_buckets, point_indices,
|
||||
d_points, nof_buckets, large_buckets_to_compute, c+bm_bitsize, c, threads_per_bucket, max_bucket_size_run_length);
|
||||
|
||||
//reduce
|
||||
for (int s=total_large_buckets_size>>1;s>large_buckets_to_compute-1;s>>=1){
|
||||
NUM_THREADS = min(MAX_TH,s);
|
||||
NUM_BLOCKS = (s + NUM_THREADS - 1) / NUM_THREADS;
|
||||
single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS,0,stream2>>>(large_buckets,large_buckets,s*2,0,0,0);
|
||||
}
|
||||
|
||||
//distribute
|
||||
NUM_THREADS = min(MAX_TH,large_buckets_to_compute);
|
||||
NUM_BLOCKS = (large_buckets_to_compute + NUM_THREADS - 1) / NUM_THREADS;
|
||||
distribute_large_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS,0,stream2>>>(large_buckets,buckets,sorted_single_bucket_indices+h_nof_zero_large_buckets,large_buckets_to_compute);
|
||||
}
|
||||
else{
|
||||
h_nof_large_buckets = 0;
|
||||
}
|
||||
|
||||
//launch the accumulation kernel with maximum threads
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices,
|
||||
d_points, nof_buckets, nof_buckets_to_compute, c+bm_bitsize);
|
||||
NUM_BLOCKS = (h_nof_buckets_to_compute-h_nof_large_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, sorted_bucket_offsets+h_nof_large_buckets, sorted_bucket_sizes+h_nof_large_buckets, sorted_single_bucket_indices+h_nof_large_buckets, point_indices,
|
||||
d_points, nof_buckets, h_nof_buckets_to_compute-h_nof_large_buckets, c+bm_bitsize, c);
|
||||
cudaStreamSynchronize(stream2);
|
||||
cudaStreamDestroy(stream2);
|
||||
cudaDeviceSynchronize();
|
||||
|
||||
#ifdef SSM_SUM
|
||||
//sum each bucket
|
||||
@@ -260,23 +528,87 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(buckets, final_results);
|
||||
#endif
|
||||
|
||||
#ifdef BIG_TRIANGLE
|
||||
P* final_results;
|
||||
P* final_results;
|
||||
if (big_triangle){
|
||||
cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
|
||||
//launch the bucket module sum kernel - a thread for each bucket module
|
||||
NUM_THREADS = nof_bms;
|
||||
NUM_BLOCKS = 1;
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, final_results, nof_bms, c);
|
||||
#endif
|
||||
#ifdef SIGNED_DIG
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, final_results, nof_bms, c-1); //sighed digits
|
||||
#else
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, final_results, nof_bms, c);
|
||||
#endif
|
||||
|
||||
}
|
||||
else{
|
||||
|
||||
unsigned source_bits_count = c;
|
||||
bool odd_source_c = source_bits_count%2;
|
||||
unsigned source_windows_count = nof_bms;
|
||||
unsigned source_buckets_count = nof_buckets;
|
||||
P *source_buckets = buckets;
|
||||
buckets = nullptr;
|
||||
P *target_buckets;
|
||||
P *temp_buckets1;
|
||||
P *temp_buckets2;
|
||||
for (unsigned i = 0;; i++) {
|
||||
// printf("round %u \n" ,i);
|
||||
const unsigned target_bits_count = (source_bits_count + 1) >> 1; //c/2=8
|
||||
// printf("target_bits_count %u \n" ,target_bits_count);
|
||||
const unsigned target_windows_count = source_windows_count << 1; //nof bms*2 = 32
|
||||
const unsigned target_buckets_count = target_windows_count << target_bits_count; // bms*2^c = 32*2^8
|
||||
cudaMallocAsync(&target_buckets, sizeof(P) * target_buckets_count,stream); //32*2^8*2^7 buckets
|
||||
cudaMallocAsync(&temp_buckets1, sizeof(P) * source_buckets_count/2,stream); //32*2^8*2^7 buckets
|
||||
cudaMallocAsync(&temp_buckets2, sizeof(P) * source_buckets_count/2,stream); //32*2^8*2^7 buckets
|
||||
|
||||
if (source_bits_count>0){
|
||||
for(unsigned j=0;j<target_bits_count;j++){
|
||||
unsigned last_j = target_bits_count-1;
|
||||
NUM_THREADS = min(MAX_TH,(source_buckets_count>>(1+j)));
|
||||
NUM_BLOCKS = ((source_buckets_count>>(1+j)) + NUM_THREADS - 1) / NUM_THREADS;
|
||||
single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS,0,stream>>>(j==0?source_buckets:temp_buckets1,j==target_bits_count-1? target_buckets: temp_buckets1,1<<(source_bits_count-j),j==target_bits_count-1? 1<<target_bits_count: 0,0,0);
|
||||
|
||||
unsigned nof_threads = (source_buckets_count>>(1+j));
|
||||
NUM_THREADS = min(MAX_TH,nof_threads);
|
||||
NUM_BLOCKS = (nof_threads + NUM_THREADS - 1) / NUM_THREADS;
|
||||
single_stage_multi_reduction_kernel<<<NUM_BLOCKS, NUM_THREADS,0,stream>>>(j==0?source_buckets:temp_buckets2,j==target_bits_count-1? target_buckets: temp_buckets2,1<<(target_bits_count-j),j==target_bits_count-1? 1<<target_bits_count: 0,1,0);
|
||||
|
||||
}
|
||||
}
|
||||
if (target_bits_count == 1) {
|
||||
nof_bms = bitsize;
|
||||
cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
|
||||
NUM_THREADS = 32;
|
||||
NUM_BLOCKS = (nof_bms + NUM_THREADS - 1) / NUM_THREADS;
|
||||
last_pass_kernel<<<NUM_BLOCKS,NUM_THREADS>>>(target_buckets,final_results,nof_bms);
|
||||
c = 1;
|
||||
cudaFreeAsync(source_buckets,stream);
|
||||
cudaFreeAsync(target_buckets,stream);
|
||||
cudaFreeAsync(temp_buckets1,stream);
|
||||
cudaFreeAsync(temp_buckets2,stream);
|
||||
break;
|
||||
}
|
||||
cudaFreeAsync(source_buckets,stream);
|
||||
cudaFreeAsync(temp_buckets1,stream);
|
||||
cudaFreeAsync(temp_buckets2,stream);
|
||||
source_buckets = target_buckets;
|
||||
target_buckets = nullptr;
|
||||
temp_buckets1 = nullptr;
|
||||
temp_buckets2 = nullptr;
|
||||
source_bits_count = target_bits_count;
|
||||
odd_source_c = source_bits_count%2;
|
||||
source_windows_count = target_windows_count;
|
||||
source_buckets_count = target_buckets_count;
|
||||
}
|
||||
}
|
||||
|
||||
P* d_final_result;
|
||||
if (!on_device)
|
||||
cudaMallocAsync(&d_final_result, sizeof(P), stream);
|
||||
|
||||
//launch the double and add kernel, a single thread
|
||||
final_accumulation_kernel<P, S><<<1,1,0,stream>>>(final_results, on_device ? final_result : d_final_result, 1, nof_bms, c);
|
||||
|
||||
//copy final result to host
|
||||
final_accumulation_kernel<P, S><<<1,1,0,stream>>>(final_results, ones_results, on_device ? final_result : d_final_result, 1, nof_bms, c);
|
||||
cudaStreamSynchronize(stream);
|
||||
if (!on_device)
|
||||
cudaMemcpyAsync(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost, stream);
|
||||
@@ -288,18 +620,29 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
cudaFreeAsync(d_final_result, stream);
|
||||
}
|
||||
cudaFreeAsync(buckets, stream);
|
||||
#ifndef PHASE1_TEST
|
||||
cudaFreeAsync(bucket_indices, stream);
|
||||
cudaFreeAsync(point_indices, stream);
|
||||
cudaFreeAsync(single_bucket_indices, stream);
|
||||
cudaFreeAsync(bucket_sizes, stream);
|
||||
cudaFreeAsync(nof_buckets_to_compute, stream);
|
||||
cudaFreeAsync(bucket_offsets, stream);
|
||||
#endif
|
||||
cudaFreeAsync(sorted_bucket_sizes,stream);
|
||||
cudaFreeAsync(sorted_bucket_offsets,stream);
|
||||
cudaFreeAsync(sorted_single_bucket_indices,stream);
|
||||
cudaFreeAsync(nof_large_buckets,stream);
|
||||
cudaFreeAsync(max_res,stream);
|
||||
if (large_buckets_to_compute>0 && bucket_th>0) cudaFreeAsync(large_buckets,stream);
|
||||
cudaFreeAsync(final_results, stream);
|
||||
cudaFreeAsync(ones_results, stream);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
|
||||
|
||||
}
|
||||
|
||||
//this function computes msm using the bucket method
|
||||
//this function computes multiple msms using the bucket method - currently isn't working on this branch
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream){
|
||||
|
||||
@@ -344,7 +687,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (total_size * nof_bms + msm_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(bucket_indices + msm_size, point_indices + msm_size, d_scalars, total_size,
|
||||
msm_log_size, nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
|
||||
msm_log_size, nof_bms, bm_bitsize, c,0); //+size - leaving the first bm free for the out of place sort later
|
||||
|
||||
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
|
||||
unsigned *sorted_bucket_indices;
|
||||
@@ -392,33 +735,33 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
|
||||
cudaFreeAsync(offsets_temp_storage, stream);
|
||||
|
||||
//launch the accumulation kernel with maximum threads
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
|
||||
d_points, nof_buckets, total_nof_buckets_to_compute, c+bm_bitsize);
|
||||
// NUM_THREADS = 1 << 8;
|
||||
// NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
// accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
|
||||
// d_points, nof_buckets, total_nof_buckets_to_compute, c+bm_bitsize,c);
|
||||
|
||||
#ifdef SSM_SUM
|
||||
//sum each bucket
|
||||
NUM_THREADS = 1 << 10;
|
||||
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
ssm_buckets_kernel<P, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, single_bucket_indices, nof_buckets, c);
|
||||
// #ifdef SSM_SUM
|
||||
// //sum each bucket
|
||||
// NUM_THREADS = 1 << 10;
|
||||
// NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
// ssm_buckets_kernel<P, S><<<NUM_BLOCKS, NUM_THREADS>>>(buckets, single_bucket_indices, nof_buckets, c);
|
||||
|
||||
//sum each bucket module
|
||||
P* final_results;
|
||||
cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
|
||||
NUM_THREADS = 1<<c;
|
||||
NUM_BLOCKS = nof_bms;
|
||||
sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(buckets, final_results);
|
||||
#endif
|
||||
// //sum each bucket module
|
||||
// P* final_results;
|
||||
// cudaMalloc(&final_results, sizeof(P) * nof_bms);
|
||||
// NUM_THREADS = 1<<c;
|
||||
// NUM_BLOCKS = nof_bms;
|
||||
// sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS>>>(buckets, final_results);
|
||||
// #endif
|
||||
|
||||
#ifdef BIG_TRIANGLE
|
||||
// #ifdef BIG_TRIANGLE
|
||||
P* bm_sums;
|
||||
cudaMallocAsync(&bm_sums, sizeof(P) * nof_bms * batch_size, stream);
|
||||
//launch the bucket module sum kernel - a thread for each bucket module
|
||||
NUM_THREADS = 1<<8;
|
||||
NUM_BLOCKS = (nof_bms*batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bm_sums, nof_bms*batch_size, c);
|
||||
#endif
|
||||
// #endif
|
||||
|
||||
P* d_final_results;
|
||||
if (!on_device)
|
||||
@@ -427,8 +770,10 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
|
||||
//launch the double and add kernel, a single thread for each msm
|
||||
NUM_THREADS = 1<<8;
|
||||
NUM_BLOCKS = (batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
|
||||
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(bm_sums,bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
|
||||
|
||||
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS>>>(bm_sums,bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
|
||||
|
||||
//copy final result to host
|
||||
if (!on_device)
|
||||
cudaMemcpyAsync(final_results, d_final_results, sizeof(P)*batch_size, cudaMemcpyDeviceToHost, stream);
|
||||
@@ -532,26 +877,22 @@ void reference_msm(S* scalars, A* a_points, unsigned size){
|
||||
unsigned get_optimal_c(const unsigned size) {
|
||||
if (size < 17)
|
||||
return 1;
|
||||
// return 15;
|
||||
// return 17;
|
||||
return ceil(log2(size))-4;
|
||||
}
|
||||
|
||||
//this function is used to compute msms of size larger than 256
|
||||
template <typename S, typename P, typename A>
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream){
|
||||
unsigned c = get_optimal_c(size);
|
||||
// unsigned c = 6;
|
||||
// unsigned bitsize = 32;
|
||||
unsigned bitsize = 255;
|
||||
bucket_method_msm(bitsize, c, scalars, points, size, result, on_device, stream);
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, bool big_triangle, cudaStream_t stream){
|
||||
unsigned c = 16;
|
||||
unsigned bitsize = S::NBITS;
|
||||
bucket_method_msm(bitsize, c, scalars, points, size, result, on_device, big_triangle, stream);
|
||||
}
|
||||
|
||||
// this function is used to compute a batches of msms of size larger than 256
|
||||
// this function is used to compute a batches of msms of size larger than 256 - currently isn't working on this branch
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream){
|
||||
unsigned c = get_optimal_c(msm_size);
|
||||
// unsigned c = 6;
|
||||
// unsigned bitsize = 32;
|
||||
unsigned bitsize = 255;
|
||||
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device, stream);
|
||||
}
|
||||
@@ -1,19 +1,33 @@
|
||||
#define G2_DEFINED
|
||||
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include "msm.cu"
|
||||
#include "msm_clean.cu"
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../curves/bls12_381/curve_config.cuh"
|
||||
// #include "../../curves/bls12_377/curve_config.cuh"
|
||||
#include "../../curves/bn254/curve_config.cuh"
|
||||
|
||||
using namespace BLS12_381;
|
||||
// using namespace BLS12_377;
|
||||
using namespace BN254;
|
||||
|
||||
class Dummy_Scalar {
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
// unsigned p = 10;
|
||||
unsigned p = 1<<30;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() {
|
||||
return {0};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() {
|
||||
return {1};
|
||||
}
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
|
||||
os << scalar.x;
|
||||
@@ -25,7 +39,7 @@ class Dummy_Scalar {
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
|
||||
return {p1.x+p2.x};
|
||||
return {(p1.x+p2.x)%p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
|
||||
@@ -36,11 +50,13 @@ class Dummy_Scalar {
|
||||
return (p1.x == p2);
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
|
||||
return {scalar.p-scalar.x};
|
||||
}
|
||||
static HOST_INLINE Dummy_Scalar rand_host() {
|
||||
return {(unsigned)rand()};
|
||||
return {(unsigned)rand()%(1<<30)};
|
||||
// return {(unsigned)rand()%10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -53,6 +69,10 @@ class Dummy_Projective {
|
||||
return {0};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective one() {
|
||||
return {1};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
@@ -61,9 +81,9 @@ class Dummy_Projective {
|
||||
return {point.x};
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
|
||||
return {Dummy_Scalar::neg(point.x)};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
return {p1.x+p2.x};
|
||||
@@ -103,7 +123,8 @@ class Dummy_Projective {
|
||||
}
|
||||
|
||||
static HOST_INLINE Dummy_Projective rand_host() {
|
||||
return {(unsigned)rand()};
|
||||
return {(unsigned)rand()%10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
@@ -113,68 +134,110 @@ typedef scalar_t test_scalar;
|
||||
typedef projective_t test_projective;
|
||||
typedef affine_t test_affine;
|
||||
|
||||
// typedef scalar_t test_scalar;
|
||||
// typedef g2_projective_t test_projective;
|
||||
// typedef g2_affine_t test_affine;
|
||||
|
||||
// typedef Dummy_Scalar test_scalar;
|
||||
// typedef Dummy_Projective test_projective;
|
||||
// typedef Dummy_Projective test_affine;
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned batch_size = 4;
|
||||
unsigned msm_size = 1<<15;
|
||||
bool on_device = false;
|
||||
|
||||
unsigned batch_size = 1;
|
||||
unsigned msm_size = (1<<24) -1;
|
||||
// unsigned msm_size = 9215384;
|
||||
// unsigned msm_size = (1<<10) - 456;
|
||||
// unsigned msm_size = 20;
|
||||
// unsigned msm_size = 6075005;
|
||||
unsigned N = batch_size*msm_size;
|
||||
|
||||
test_scalar *scalars = new test_scalar[N];
|
||||
test_affine *points = new test_affine[N];
|
||||
|
||||
for (unsigned i=0;i<N;i++){
|
||||
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
|
||||
// scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
|
||||
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
|
||||
// scalars[i] = test_scalar::rand_host();
|
||||
// scalars[i] = i >462560? test_scalar::rand_host() : (test_scalar::one() + test_scalar::one());
|
||||
scalars[i] = i >20? test_scalar::rand_host() : i>50? (test_scalar::one() + test_scalar::one()) : (test_scalar::one() + test_scalar::one()+ test_scalar::one());
|
||||
// points[i] = test_projective::to_affine(test_projective::rand_host());
|
||||
}
|
||||
std::cout<<"finished generating"<<std::endl;
|
||||
|
||||
test_scalar *d_scalars;
|
||||
test_affine *d_points;
|
||||
if (on_device) {
|
||||
//copy scalars and point to gpu
|
||||
cudaMalloc(&d_scalars, sizeof(test_scalar) * N);
|
||||
cudaMalloc(&d_points, sizeof(test_affine) * N);
|
||||
cudaMemcpy(d_scalars, scalars, sizeof(test_scalar) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(d_points, points, sizeof(test_affine) * N, cudaMemcpyHostToDevice);
|
||||
}
|
||||
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
|
||||
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
|
||||
test_projective large_res[batch_size];
|
||||
test_projective batched_large_res[batch_size];
|
||||
test_projective large_res[batch_size*2];
|
||||
test_projective *d_large_res;
|
||||
cudaMalloc(&d_large_res, sizeof(test_projective) * batch_size*2);
|
||||
// test_projective batched_large_res[batch_size];
|
||||
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
|
||||
// fake_point batched_large_res[256];
|
||||
|
||||
|
||||
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
|
||||
for (unsigned i=0;i<batch_size;i++){
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
|
||||
// for (unsigned i=0;i<batch_size;i++){
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
|
||||
// std::cout<<"final result large"<<std::endl;
|
||||
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
|
||||
}
|
||||
// }
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
|
||||
// batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
|
||||
cudaStream_t stream1;
|
||||
cudaStream_t stream2;
|
||||
cudaStreamCreate(&stream1);
|
||||
cudaStreamCreate(&stream2);
|
||||
// large_msm<test_scalar, test_projective, test_affine>(on_device? d_scalars : scalars, on_device? d_points : points, msm_size, on_device? d_large_res : large_res, on_device, true,stream1);
|
||||
// std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
large_msm<test_scalar, test_projective, test_affine>(on_device? d_scalars : scalars, on_device? d_points : points, msm_size, on_device? d_large_res+1 : large_res+1, on_device, false,stream2);
|
||||
// test_reduce_triangle(scalars);
|
||||
// test_reduce_rectangle(scalars);
|
||||
// test_reduce_single(scalars);
|
||||
// test_reduce_var(scalars);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
|
||||
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
cudaStreamSynchronize(stream1);
|
||||
cudaStreamSynchronize(stream2);
|
||||
cudaStreamDestroy(stream1);
|
||||
cudaStreamDestroy(stream2);
|
||||
|
||||
if (on_device)
|
||||
cudaMemcpy(large_res, d_large_res, sizeof(test_projective) * batch_size*2, cudaMemcpyDeviceToHost);
|
||||
|
||||
// std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
std::cout<<test_projective::to_affine(large_res[1])<<std::endl;
|
||||
|
||||
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
|
||||
|
||||
std::cout<<"final results batched large"<<std::endl;
|
||||
bool success = true;
|
||||
for (unsigned i = 0; i < batch_size; i++)
|
||||
{
|
||||
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
|
||||
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
|
||||
std::cout<<"good"<<std::endl;
|
||||
}
|
||||
else{
|
||||
std::cout<<"miss"<<std::endl;
|
||||
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
if (success){
|
||||
std::cout<<"success!"<<std::endl;
|
||||
}
|
||||
// std::cout<<"final results batched large"<<std::endl;
|
||||
// bool success = true;
|
||||
// for (unsigned i = 0; i < batch_size; i++)
|
||||
// {
|
||||
// std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
|
||||
// if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
|
||||
// std::cout<<"good"<<std::endl;
|
||||
// }
|
||||
// else{
|
||||
// std::cout<<"miss"<<std::endl;
|
||||
// std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
|
||||
// success = false;
|
||||
// }
|
||||
// }
|
||||
// if (success){
|
||||
// std::cout<<"success!"<<std::endl;
|
||||
// }
|
||||
|
||||
// std::cout<<batched_large_res[0]<<std::endl;
|
||||
// std::cout<<batched_large_res[1]<<std::endl;
|
||||
@@ -1,7 +1,7 @@
|
||||
#include "poseidon.cuh"
|
||||
|
||||
template <typename S>
|
||||
__global__ void prepare_poseidon_states(S * inp, S * states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config) {
|
||||
__global__ void prepare_poseidon_states(S * states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config) {
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
int state_number = idx / config.t;
|
||||
if (state_number >= number_of_states) {
|
||||
@@ -15,7 +15,7 @@ __global__ void prepare_poseidon_states(S * inp, S * states, size_t number_of_st
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
prepared_element = inp[state_number * (config.t - 1) + element_number - 1];
|
||||
prepared_element = states[state_number * config.t + element_number - 1];
|
||||
}
|
||||
|
||||
// Add pre-round constant
|
||||
@@ -148,17 +148,20 @@ __global__ void get_hash_results(S * states, size_t number_of_states, S * out, i
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type) {
|
||||
// Used in matrix multiplication
|
||||
|
||||
S * states, * inp_device;
|
||||
__host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream) {
|
||||
S * states;
|
||||
|
||||
// allocate memory for {blocks} states of {t} scalars each
|
||||
cudaMalloc(&states, blocks * this->t * sizeof(S));
|
||||
if (cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream) != cudaSuccess) {
|
||||
throw std::runtime_error("Failed memory allocation on the device");
|
||||
}
|
||||
|
||||
// Move input to cuda
|
||||
cudaMalloc(&inp_device, blocks * (this->t - 1) * sizeof(S));
|
||||
cudaMemcpy(inp_device, inp, blocks * (this->t - 1) * sizeof(S), cudaMemcpyHostToDevice);
|
||||
// This is where the input matrix of size Arity x NumberOfBlocks is
|
||||
// padded and coppied to device in a T x NumberOfBlocks matrix
|
||||
cudaMemcpy2DAsync(states, this->t * sizeof(S), // Device pointer and device pitch
|
||||
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
|
||||
(this->t - 1) * sizeof(S), blocks, // Size of the source matrix (Arity x NumberOfBlocks)
|
||||
cudaMemcpyHostToDevice, stream);
|
||||
|
||||
size_t rc_offset = 0;
|
||||
|
||||
@@ -191,14 +194,13 @@ __host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, Ha
|
||||
#endif
|
||||
|
||||
// Domain separation and adding pre-round constants
|
||||
prepare_poseidon_states <<< number_of_blocks, number_of_threads >>> (inp_device, states, blocks, domain_tag, this->config);
|
||||
prepare_poseidon_states <<< number_of_blocks, number_of_threads, 0, stream >>> (states, blocks, domain_tag, this->config);
|
||||
rc_offset += this->t;
|
||||
cudaFree(inp_device);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Domain separation: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
//print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
@@ -207,13 +209,13 @@ __host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, Ha
|
||||
#endif
|
||||
|
||||
// execute half full rounds
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t >>> (states, blocks, rc_offset, true, this->config);
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, true, this->config);
|
||||
rc_offset += this->t * this->config.full_rounds_half;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
// print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
@@ -222,13 +224,13 @@ __host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, Ha
|
||||
#endif
|
||||
|
||||
// execute partial rounds
|
||||
partial_rounds <<< number_of_singlehash_blocks, singlehash_block_size >>> (states, blocks, rc_offset, this->config);
|
||||
partial_rounds <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, rc_offset, this->config);
|
||||
rc_offset += this->config.partial_rounds;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
//print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
@@ -237,12 +239,12 @@ __host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, Ha
|
||||
#endif
|
||||
|
||||
// execute half full rounds
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t >>> (states, blocks, rc_offset, false, this->config);
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, false, this->config);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
|
||||
print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
//print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
@@ -252,18 +254,18 @@ __host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, Ha
|
||||
// get output
|
||||
S * out_device;
|
||||
cudaMalloc(&out_device, blocks * sizeof(S));
|
||||
get_hash_results <<< number_of_singlehash_blocks, singlehash_block_size >>> (states, blocks, out_device, this->config.t);
|
||||
get_hash_results <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, out_device, this->config.t);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceSynchronize();
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Get hash results" << std::endl;
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
#endif
|
||||
cudaMemcpy(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost);
|
||||
cudaFree(out_device);
|
||||
cudaFree(states);
|
||||
cudaMemcpyAsync(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost, stream);
|
||||
cudaFreeAsync(out_device, stream);
|
||||
cudaFreeAsync(states, stream);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceReset();
|
||||
163
icicle/appUtils/poseidon/poseidon.cuh
Normal file
163
icicle/appUtils/poseidon/poseidon.cuh
Normal file
@@ -0,0 +1,163 @@
|
||||
#pragma once
|
||||
#include "constants.cuh"
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <chrono>
|
||||
|
||||
template <typename S>
|
||||
__host__ void print_buffer_from_cuda(S * device_ptr, size_t size, size_t t) {
|
||||
S * buffer = static_cast< S * >(malloc(size * sizeof(S)));
|
||||
cudaMemcpy(buffer, device_ptr, size * sizeof(S), cudaMemcpyDeviceToHost);
|
||||
|
||||
std::cout << "Start print" << std::endl;
|
||||
for(int i = 0; i < size / t; i++) {
|
||||
std::cout << "State #" << i << std::endl;
|
||||
for (int j = 0; j < t; j++) {
|
||||
std::cout << buffer[i * t + j] << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << std::endl;
|
||||
free(buffer);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG
|
||||
template <typename S>
|
||||
__device__ void print_scalar(S element, int data) {
|
||||
printf("D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
|
||||
data,
|
||||
threadIdx.x,
|
||||
element.limbs_storage.limbs[0],
|
||||
element.limbs_storage.limbs[1],
|
||||
element.limbs_storage.limbs[2],
|
||||
element.limbs_storage.limbs[3],
|
||||
element.limbs_storage.limbs[4],
|
||||
element.limbs_storage.limbs[5],
|
||||
element.limbs_storage.limbs[6],
|
||||
element.limbs_storage.limbs[7]
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename S>
|
||||
struct PoseidonConfiguration {
|
||||
uint32_t partial_rounds, full_rounds_half, t;
|
||||
S * round_constants, * mds_matrix, * non_sparse_matrix, *sparse_matrices;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
class Poseidon {
|
||||
public:
|
||||
uint32_t t;
|
||||
PoseidonConfiguration<S> config;
|
||||
|
||||
enum HashType {
|
||||
ConstInputLen,
|
||||
MerkleTree,
|
||||
};
|
||||
|
||||
Poseidon(const uint32_t arity, cudaStream_t stream) {
|
||||
t = arity + 1;
|
||||
this->config.t = t;
|
||||
this->stream = stream;
|
||||
|
||||
// Pre-calculate domain tags
|
||||
// Domain tags will vary for different applications of Poseidon
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
|
||||
tree_domain_tag = S::from(tree_domain_tag_value);
|
||||
|
||||
const_input_no_pad_domain_tag = S::one();
|
||||
|
||||
// TO-DO: implement binary shifts for scalar type
|
||||
// const_input_no_pad_domain_tag = S::one() << 64;
|
||||
// const_input_no_pad_domain_tag *= S::from(arity);
|
||||
|
||||
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
|
||||
|
||||
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
|
||||
uint32_t mds_matrix_len = t * t;
|
||||
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
|
||||
|
||||
// All the constants are stored in a single file
|
||||
S * constants = load_constants<S>(arity);
|
||||
|
||||
S * mds_offset = constants + round_constants_len;
|
||||
S * non_sparse_offset = mds_offset + mds_matrix_len;
|
||||
S * sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
|
||||
#endif
|
||||
|
||||
// Create streams for copying constants
|
||||
cudaStream_t stream_copy_round_constants, stream_copy_mds_matrix, stream_copy_non_sparse, stream_copy_sparse_matrices;
|
||||
cudaStreamCreate(&stream_copy_round_constants);
|
||||
cudaStreamCreate(&stream_copy_mds_matrix);
|
||||
cudaStreamCreate(&stream_copy_non_sparse);
|
||||
cudaStreamCreate(&stream_copy_sparse_matrices);
|
||||
|
||||
// Create events for copying constants
|
||||
cudaEvent_t event_copied_round_constants, event_copy_mds_matrix, event_copy_non_sparse, event_copy_sparse_matrices;
|
||||
cudaEventCreateWithFlags(&event_copied_round_constants, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_mds_matrix, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_non_sparse, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_sparse_matrices, cudaEventDisableTiming);
|
||||
|
||||
// Malloc memory for copying constants
|
||||
cudaMallocAsync(&this->config.round_constants, sizeof(S) * round_constants_len, stream_copy_round_constants);
|
||||
cudaMallocAsync(&this->config.mds_matrix, sizeof(S) * mds_matrix_len, stream_copy_mds_matrix);
|
||||
cudaMallocAsync(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len, stream_copy_non_sparse);
|
||||
cudaMallocAsync(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len, stream_copy_sparse_matrices);
|
||||
|
||||
// Copy constants
|
||||
cudaMemcpyAsync(this->config.round_constants, constants,
|
||||
sizeof(S) * round_constants_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_round_constants
|
||||
);
|
||||
cudaMemcpyAsync(this->config.mds_matrix, mds_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_mds_matrix
|
||||
);
|
||||
cudaMemcpyAsync(this->config.non_sparse_matrix, non_sparse_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_non_sparse
|
||||
);
|
||||
cudaMemcpyAsync(this->config.sparse_matrices, sparse_matrices_offset,
|
||||
sizeof(S) * sparse_matrices_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_sparse_matrices
|
||||
);
|
||||
|
||||
// Record finished copying event for streams
|
||||
cudaEventRecord(event_copied_round_constants, stream_copy_round_constants);
|
||||
cudaEventRecord(event_copy_mds_matrix, stream_copy_mds_matrix);
|
||||
cudaEventRecord(event_copy_non_sparse, stream_copy_non_sparse);
|
||||
cudaEventRecord(event_copy_sparse_matrices, stream_copy_sparse_matrices);
|
||||
|
||||
// Main stream waits for copying to finish
|
||||
cudaStreamWaitEvent(stream, event_copied_round_constants);
|
||||
cudaStreamWaitEvent(stream, event_copy_mds_matrix);
|
||||
cudaStreamWaitEvent(stream, event_copy_non_sparse);
|
||||
cudaStreamWaitEvent(stream, event_copy_sparse_matrices);
|
||||
}
|
||||
|
||||
~Poseidon() {
|
||||
cudaFreeAsync(this->config.round_constants, this->stream);
|
||||
cudaFreeAsync(this->config.mds_matrix, this->stream);
|
||||
cudaFreeAsync(this->config.non_sparse_matrix, this->stream);
|
||||
cudaFreeAsync(this->config.sparse_matrices, this->stream);
|
||||
}
|
||||
|
||||
// Hash multiple preimages in parallel
|
||||
void hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream);
|
||||
|
||||
private:
|
||||
S tree_domain_tag, const_input_no_pad_domain_tag;
|
||||
cudaStream_t stream;
|
||||
};
|
||||
1102
icicle/appUtils/poseidon/poseidon_test.cu
Normal file
1102
icicle/appUtils/poseidon/poseidon_test.cu
Normal file
File diff suppressed because it is too large
Load Diff
22
icicle/curves/bls12_377/curve_config.cuh
Normal file
22
icicle/curves/bls12_377/curve_config.cuh
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BLS12_377 {
|
||||
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_377::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
327
icicle/curves/bls12_377/lde.cu
Normal file
327
icicle/curves/bls12_377/lde.cu
Normal file
@@ -0,0 +1,327 @@
|
||||
#ifndef _BLS12_377_LDE
|
||||
#define _BLS12_377_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn), stream);
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_evaluations, BLS12_377::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_evaluations, BLS12_377::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
93
icicle/curves/bls12_377/msm.cu
Normal file
93
icicle/curves/bls12_377/msm.cu
Normal file
@@ -0,0 +1,93 @@
|
||||
#ifndef _BLS12_377_MSM
|
||||
#define _BLS12_377_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,19 +1,23 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
namespace PARAMS{
|
||||
|
||||
namespace PARAMS_BLS12_377{
|
||||
struct fp_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
static constexpr unsigned modulus_bits_count = 253;
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
static constexpr unsigned modulus_bit_count = 253;
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
@@ -46,6 +50,14 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
|
||||
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
|
||||
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
|
||||
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
|
||||
};
|
||||
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
@@ -78,6 +90,14 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
|
||||
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
|
||||
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
|
||||
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
|
||||
};
|
||||
|
||||
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
|
||||
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
|
||||
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
|
||||
@@ -110,6 +130,13 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
|
||||
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
|
||||
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
|
||||
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
|
||||
};
|
||||
};
|
||||
|
||||
struct fq_config{
|
||||
@@ -118,10 +145,10 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr unsigned modulus_bits_count = 377;
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr unsigned modulus_bit_count = 377;
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
@@ -130,26 +157,24 @@ namespace PARAMS{
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
|
||||
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
|
||||
static constexpr storage<limbs_count> generator_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
|
||||
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52, 0x57db6b9b, 0x7ea501f5,
|
||||
0x203e5031, 0xc565f071, 0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984, 0x0799c9de, 0xe7223ece,
|
||||
0x6651cecb, 0x532777ee, 0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888, 0xf832d204, 0xe458c282,
|
||||
0x74b49a58, 0xde03ed72, 0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f, 0xcee304c2, 0x2463b01a,
|
||||
0x3d591bf1, 0x61ef11ac, 0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// TODO: correct parameters for G2 here
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
|
||||
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
|
||||
}
|
||||
22
icicle/curves/bls12_377/projective.cu
Normal file
22
icicle/curves/bls12_377/projective.cu
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#include "curve_config.cuh"
|
||||
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
@@ -1,5 +1,5 @@
|
||||
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
#include "poseidon.cu"
|
||||
69
icicle/curves/bls12_377/ve_mod_mult.cu
Normal file
69
icicle/curves/bls12_377/ve_mod_mult.cu
Normal file
@@ -0,0 +1,69 @@
|
||||
#ifndef _BLS12_377_VEC_MULT
|
||||
#define _BLS12_377_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
|
||||
BLS12_377::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
|
||||
BLS12_377::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_flattened,
|
||||
BLS12_377::scalar_t *input,
|
||||
BLS12_377::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
22
icicle/curves/bls12_381/curve_config.cuh
Normal file
22
icicle/curves/bls12_381/curve_config.cuh
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BLS12_381 {
|
||||
typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_381::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
327
icicle/curves/bls12_381/lde.cu
Normal file
327
icicle/curves/bls12_381/lde.cu
Normal file
@@ -0,0 +1,327 @@
|
||||
#ifndef _BLS12_381_LDE
|
||||
#define _BLS12_381_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn), stream);
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_evaluations, BLS12_381::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_evaluations, BLS12_381::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
92
icicle/curves/bls12_381/msm.cu
Normal file
92
icicle/curves/bls12_381/msm.cu
Normal file
@@ -0,0 +1,92 @@
|
||||
#ifndef _BLS12_381_MSM
|
||||
#define _BLS12_381_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -2,10 +2,11 @@
|
||||
|
||||
#include "../../utils/storage.cuh"
|
||||
|
||||
namespace PARAMS{
|
||||
namespace PARAMS_BLS12_381{
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
@@ -15,13 +16,13 @@ namespace PARAMS{
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr unsigned modulus_bits_count = 255;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr unsigned modulus_bit_count = 255;
|
||||
// m = floor(2^(2*modulus_bit_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
@@ -61,6 +62,13 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
|
||||
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
|
||||
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
|
||||
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
|
||||
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
|
||||
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
|
||||
};
|
||||
|
||||
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
|
||||
// Quick fix for linking issue
|
||||
@@ -97,7 +105,13 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
|
||||
|
||||
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
|
||||
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
|
||||
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
|
||||
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
|
||||
};
|
||||
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
|
||||
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
|
||||
@@ -131,7 +145,14 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
|
||||
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
|
||||
};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
|
||||
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
|
||||
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
|
||||
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
|
||||
};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
@@ -148,19 +169,19 @@ namespace PARAMS{
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr unsigned modulus_bits_count = 381;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr unsigned modulus_bit_count = 381;
|
||||
// m = floor(2^(2*modulus_bit_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
@@ -169,23 +190,22 @@ namespace PARAMS{
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
|
||||
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
|
||||
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
47
icicle/curves/bls12_381/poseidon.cu
Normal file
47
icicle/curves/bls12_381/poseidon.cu
Normal file
@@ -0,0 +1,47 @@
|
||||
#ifndef _BLS12_381_POSEIDON
|
||||
#define _BLS12_381_POSEIDON
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
#include "../../appUtils/poseidon/poseidon.cu"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
template class Poseidon<BLS12_381::scalar_t>;
|
||||
|
||||
extern "C" int poseidon_multi_cuda_bls12_381(BLS12_381::scalar_t input[], BLS12_381::scalar_t* out,
|
||||
size_t number_of_blocks, int arity, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: once we get bindings to pass a stream, we should make {stream} a required parameter and use it instead of
|
||||
// creating a new stream
|
||||
if (stream == 0) {
|
||||
cudaStreamCreate(&stream);
|
||||
}
|
||||
|
||||
cudaEvent_t start_event, end_event;
|
||||
cudaEventCreate(&start_event);
|
||||
cudaEventCreate(&end_event);
|
||||
cudaEventRecord(start_event, stream);
|
||||
Poseidon<BLS12_381::scalar_t> poseidon(arity, stream);
|
||||
poseidon.hash_blocks(input, number_of_blocks, out, Poseidon<BLS12_381::scalar_t>::HashType::MerkleTree, stream);
|
||||
cudaEventRecord(end_event, stream);
|
||||
cudaEventSynchronize(end_event);
|
||||
|
||||
#ifdef DEBUG
|
||||
float elapsedTime;
|
||||
cudaEventElapsedTime(&elapsedTime, start_event, end_event);
|
||||
printf("Time elapsed: %f", elapsedTime);
|
||||
#endif
|
||||
|
||||
cudaEventDestroy(start_event);
|
||||
cudaEventDestroy(end_event);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
19
icicle/curves/bls12_381/projective.cu
Normal file
19
icicle/curves/bls12_381/projective.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
5
icicle/curves/bls12_381/supported_operations.cu
Normal file
5
icicle/curves/bls12_381/supported_operations.cu
Normal file
@@ -0,0 +1,5 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
#include "poseidon.cu"
|
||||
68
icicle/curves/bls12_381/ve_mod_mult.cu
Normal file
68
icicle/curves/bls12_381/ve_mod_mult.cu
Normal file
@@ -0,0 +1,68 @@
|
||||
#ifndef _BLS12_381_VEC_MULT
|
||||
#define _BLS12_381_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
|
||||
BLS12_381::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_381(BLS12_381::scalar_t *inout,
|
||||
BLS12_381::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_381(BLS12_381::scalar_t *matrix_flattened,
|
||||
BLS12_381::scalar_t *input,
|
||||
BLS12_381::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
25
icicle/curves/bn254/curve_config.cuh
Normal file
25
icicle/curves/bn254/curve_config.cuh
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#if defined(G2_DEFINED)
|
||||
#include "../../primitives/extension_field.cuh"
|
||||
#endif
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BN254 {
|
||||
typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BN254::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BN254::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BN254::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BN254::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
@@ -1,20 +1,20 @@
|
||||
#ifndef _LDE
|
||||
#define _LDE
|
||||
#ifndef _BN254_LDE
|
||||
#define _BN254_LDE
|
||||
#include <cuda.h>
|
||||
#include "../appUtils/ntt/lde.cu"
|
||||
#include "../appUtils/ntt/ntt.cuh"
|
||||
#include "../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" scalar_t* build_domain_cuda(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega_inv(logn), stream);
|
||||
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega(logn), stream);
|
||||
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega(logn), stream);
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -24,12 +24,12 @@ extern "C" scalar_t* build_domain_cuda(uint32_t domain_size, uint32_t logn, bool
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<scalar_t,scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
return ntt_end2end_template<BN254::scalar_t,BN254::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
@@ -39,12 +39,12 @@ extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_i
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<projective_t,scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
return ntt_end2end_template<BN254::projective_t,BN254::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
@@ -53,12 +53,12 @@ extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t de
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<scalar_t,scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
return ntt_end2end_batch_template<BN254::scalar_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
@@ -67,12 +67,12 @@ extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_s
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<projective_t,scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
return ntt_end2end_batch_template<BN254::projective_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
@@ -81,7 +81,7 @@ extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t b
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda(scalar_t* d_out, scalar_t *d_evaluations, scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -94,7 +94,7 @@ extern "C" int interpolate_scalars_cuda(scalar_t* d_out, scalar_t *d_evaluations
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n,
|
||||
extern "C" int interpolate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_evaluations, BN254::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
@@ -109,7 +109,7 @@ extern "C" int interpolate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_evalu
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda(projective_t* d_out, projective_t *d_evaluations, scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -122,7 +122,7 @@ extern "C" int interpolate_points_cuda(projective_t* d_out, projective_t *d_eval
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain,
|
||||
extern "C" int interpolate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_evaluations, BN254::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
@@ -137,12 +137,12 @@ extern "C" int interpolate_points_batch_cuda(projective_t* d_out, projective_t*
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain,
|
||||
extern "C" int evaluate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
@@ -153,12 +153,12 @@ extern "C" int evaluate_scalars_cuda(scalar_t* d_out, scalar_t *d_coefficients,
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
extern "C" int evaluate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
@@ -169,12 +169,12 @@ extern "C" int evaluate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_coeffici
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain,
|
||||
extern "C" int evaluate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
@@ -185,12 +185,12 @@ extern "C" int evaluate_points_cuda(projective_t* d_out, projective_t *d_coeffic
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
extern "C" int evaluate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
scalar_t* _null = nullptr;
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
@@ -201,8 +201,8 @@ extern "C" int evaluate_points_batch_cuda(projective_t* d_out, projective_t* d_c
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BN254::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -216,8 +216,8 @@ extern "C" int evaluate_scalars_on_coset_cuda(scalar_t* d_out, scalar_t *d_coeff
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -231,8 +231,8 @@ extern "C" int evaluate_scalars_on_coset_batch_cuda(scalar_t* d_out, scalar_t* d
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_points_on_coset_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BN254::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -246,8 +246,8 @@ extern "C" int evaluate_points_on_coset_cuda(projective_t* d_out, projective_t *
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -261,7 +261,7 @@ extern "C" int evaluate_points_on_coset_batch_cuda(projective_t* d_out, projecti
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda(scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -277,7 +277,7 @@ extern "C" int reverse_order_scalars_cuda(scalar_t* arr, int n, size_t device_id
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda(scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -293,7 +293,7 @@ extern "C" int reverse_order_scalars_batch_cuda(scalar_t* arr, int n, int batch_
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda(projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -309,7 +309,7 @@ extern "C" int reverse_order_points_cuda(projective_t* arr, int n, size_t device
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda(projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int reverse_order_points_batch_cuda_bn254(BN254::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -1,18 +1,18 @@
|
||||
#ifndef _MSM
|
||||
#define _MSM
|
||||
#include "../appUtils/msm/msm.cu"
|
||||
#ifndef _BN254_MSM
|
||||
#define _BN254_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda(projective_t *out, affine_t points[],
|
||||
scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false, stream);
|
||||
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -22,13 +22,13 @@ int msm_cuda(projective_t *out, affine_t points[],
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
|
||||
scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<scalar_t, projective_t, affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
@@ -48,11 +48,11 @@ extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true, stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
@@ -73,7 +73,7 @@ extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
@@ -1,10 +1,39 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
namespace PARAMS{
|
||||
|
||||
namespace PARAMS_BN254{
|
||||
struct fp_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
////
|
||||
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
|
||||
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
|
||||
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
|
||||
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
|
||||
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
|
||||
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
|
||||
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
|
||||
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
|
||||
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
|
||||
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
|
||||
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
|
||||
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
|
||||
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
|
||||
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
|
||||
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
@@ -21,7 +50,30 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
////
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
|
||||
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
|
||||
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
|
||||
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
|
||||
};
|
||||
|
||||
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
@@ -38,8 +90,30 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
////
|
||||
////
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
|
||||
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
|
||||
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
|
||||
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
|
||||
};
|
||||
|
||||
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
|
||||
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
|
||||
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
|
||||
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
|
||||
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
|
||||
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
|
||||
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
|
||||
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
|
||||
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
|
||||
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
|
||||
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
|
||||
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
|
||||
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
|
||||
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
|
||||
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
|
||||
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
|
||||
static constexpr storage<limbs_count> inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
@@ -57,68 +131,12 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
|
||||
////
|
||||
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bits_count = 254;
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
|
||||
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
|
||||
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
|
||||
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
|
||||
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
|
||||
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
|
||||
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
|
||||
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
|
||||
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
|
||||
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
|
||||
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
|
||||
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
|
||||
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
|
||||
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
|
||||
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
|
||||
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
|
||||
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
|
||||
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
|
||||
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
|
||||
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
|
||||
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
|
||||
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
|
||||
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
|
||||
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
|
||||
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
|
||||
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
|
||||
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
|
||||
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
|
||||
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
|
||||
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
|
||||
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
|
||||
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
|
||||
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
|
||||
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
|
||||
};
|
||||
};
|
||||
|
||||
struct fq_config{
|
||||
@@ -127,10 +145,10 @@ namespace PARAMS{
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522, 0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45, 0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bits_count = 254;
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17, 0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
@@ -139,17 +157,15 @@ namespace PARAMS{
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4, 0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933, 0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769, 0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133, 0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// TODO: correct parameters for G2 here
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x24a138e5, 0x3267e6dc, 0x59dbefa3, 0xb5b4c5e5, 0x1be06ac3, 0x81be1899, 0xceb8aaae, 0x2b149d40};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x85c315d2, 0xe4a2bd06, 0xe52d1852, 0xa74fa084, 0xeed8fdf4, 0xcd2cafad, 0x3af0fed4, 0x009713b0};
|
||||
}
|
||||
19
icicle/curves/bn254/projective.cu
Normal file
19
icicle/curves/bn254/projective.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
4
icicle/curves/bn254/supported_operations.cu
Normal file
4
icicle/curves/bn254/supported_operations.cu
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
@@ -1,16 +1,16 @@
|
||||
#ifndef _VEC_MULT
|
||||
#define _VEC_MULT
|
||||
#ifndef _BN254_VEC_MULT
|
||||
#define _BN254_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../primitives/field.cuh"
|
||||
#include "../utils/storage.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point(projective_t *inout,
|
||||
scalar_t *scalar_vec,
|
||||
extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
|
||||
BN254::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
@@ -20,7 +20,7 @@ extern "C" int32_t vec_mod_mult_point(projective_t *inout,
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<projective_t, scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -30,8 +30,8 @@ extern "C" int32_t vec_mod_mult_point(projective_t *inout,
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
|
||||
scalar_t *scalar_vec,
|
||||
extern "C" int32_t vec_mod_mult_scalar_bn254(BN254::scalar_t *inout,
|
||||
BN254::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
@@ -41,7 +41,7 @@ extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<scalar_t, scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -51,9 +51,9 @@ extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
|
||||
scalar_t *input,
|
||||
scalar_t *output,
|
||||
extern "C" int32_t matrix_vec_mod_mult_bn254(BN254::scalar_t *matrix_flattened,
|
||||
BN254::scalar_t *input,
|
||||
BN254::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
@@ -63,7 +63,7 @@ extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
20
icicle/curves/curve_template/curve_config.cuh
Normal file
20
icicle/curves/curve_template/curve_config.cuh
Normal file
@@ -0,0 +1,20 @@
|
||||
#pragma once
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "params.cuh"
|
||||
|
||||
namespace ${CURVE_NAME_U} {
|
||||
typedef Field<PARAMS_${CURVE_NAME_U}::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_${CURVE_NAME_U}::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_${CURVE_NAME_U}::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
327
icicle/curves/curve_template/lde.cu
Normal file
327
icicle/curves/curve_template/lde.cu
Normal file
@@ -0,0 +1,327 @@
|
||||
#ifndef _${CURVE_NAME_U}_LDE
|
||||
#define _${CURVE_NAME_U}_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::scalar_t* build_domain_cuda_${CURVE_NAME_L}(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega(logn), stream);
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<${CURVE_NAME_U}::scalar_t,${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<${CURVE_NAME_U}::projective_t,${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<${CURVE_NAME_U}::scalar_t,${CURVE_NAME_U}::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<${CURVE_NAME_U}::projective_t,${CURVE_NAME_U}::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_evaluations, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_evaluations, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_evaluations, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_evaluations, ${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, ${CURVE_NAME_U}::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
92
icicle/curves/curve_template/msm.cu
Normal file
92
icicle/curves/curve_template/msm.cu
Normal file
@@ -0,0 +1,92 @@
|
||||
#ifndef _${CURVE_NAME_U}_MSM
|
||||
#define _${CURVE_NAME_U}_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *out, ${CURVE_NAME_U}::affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* out, ${CURVE_NAME_U}::affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
66
icicle/curves/curve_template/params.cuh
Normal file
66
icicle/curves/curve_template/params.cuh
Normal file
@@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
|
||||
namespace PARAMS_${curve_name_U} {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = ${fp_num_limbs};
|
||||
static constexpr unsigned omegas_count = ${num_omegas};
|
||||
static constexpr unsigned modulus_bit_count = ${fp_modulus_bit_count};
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {${fp_modulus}};
|
||||
static constexpr storage<limbs_count> modulus_2 = {${fp_modulus_2}};
|
||||
static constexpr storage<limbs_count> modulus_4 = {${fp_modulus_4}};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {${fp_modulus_wide}};
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {${fp_modulus_squared}};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {${fp_modulus_squared_2}};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {${fp_modulus_squared_4}};
|
||||
|
||||
static constexpr storage<limbs_count> m = {${fp_m}};
|
||||
static constexpr storage<limbs_count> one = {${fp_one}};
|
||||
static constexpr storage<limbs_count> zero = {${fp_zero}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = { {
|
||||
${omega}
|
||||
} };
|
||||
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = { {
|
||||
${omega_inv}
|
||||
} };
|
||||
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = { {
|
||||
${inv}
|
||||
} };
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = ${fq_num_limbs};
|
||||
static constexpr unsigned modulus_bit_count = ${fq_modulus_bit_count};
|
||||
static constexpr storage<limbs_count> modulus = {${fq_modulus}};
|
||||
static constexpr storage<limbs_count> modulus_2 = {${fq_modulus_2}};
|
||||
static constexpr storage<limbs_count> modulus_4 = {${fq_modulus_4}};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {${fq_modulus_wide}};
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {${fq_modulus_squared}};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {${fq_modulus_squared_2}};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {${fq_modulus_squared_4}};
|
||||
static constexpr storage<limbs_count> m = {${fq_m}};
|
||||
static constexpr storage<limbs_count> one = {${fq_one}};
|
||||
static constexpr storage<limbs_count> zero = {${fq_zero}};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> g1_gen_x = {${fq_gen_x}};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {${fq_gen_y}};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {${fq_gen_x_re}};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {${fq_gen_x_im}};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {${fq_gen_y_re}};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {${fq_gen_y_im}};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {${weier_b}};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {${weier_b_g2_re}};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {${weier_b_g2_im}};
|
||||
}
|
||||
19
icicle/curves/curve_template/projective.cu
Normal file
19
icicle/curves/curve_template/projective.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *point1, ${CURVE_NAME_U}::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::point_field_t::zero())) &&
|
||||
!((point2->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *point1, ${CURVE_NAME_U}::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::g2_point_field_t::zero())) &&
|
||||
!((point2->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
4
icicle/curves/curve_template/supported_operations.cu
Normal file
4
icicle/curves/curve_template/supported_operations.cu
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user