mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-12 00:47:59 -05:00
Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eced32b28b | ||
|
|
b316e4d4f7 | ||
|
|
b9ab19826a | ||
|
|
e3f9237ceb | ||
|
|
0aef5f2f70 | ||
|
|
396c5f3c7b | ||
|
|
f183dacfd6 | ||
|
|
10a638fba5 | ||
|
|
9e8f0ec8f2 | ||
|
|
53a63bb5ad | ||
|
|
c108f5cc90 | ||
|
|
af90ab0961 | ||
|
|
845a529423 | ||
|
|
071c24ce5a | ||
|
|
08c34a5183 | ||
|
|
c13b003720 | ||
|
|
25a4eebc0a | ||
|
|
e41de7dec7 | ||
|
|
472a9f5107 | ||
|
|
689b4814e1 | ||
|
|
7ace91528a | ||
|
|
e0f5eac3a8 | ||
|
|
55b0faa0f3 |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -11,3 +11,4 @@
|
||||
**/__pycache__/
|
||||
**/.DS_Store
|
||||
**/Cargo.lock
|
||||
**/icicle/build/
|
||||
|
||||
@@ -23,7 +23,9 @@ ark-std = "0.3.0"
|
||||
ark-ff = "0.3.0"
|
||||
ark-poly = "0.3.0"
|
||||
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
|
||||
ark-bls12-381 = { version = "0.3.0", optional = true }
|
||||
ark-bls12-381 = "0.3.0"
|
||||
ark-bls12-377 = "0.3.0"
|
||||
ark-bn254 = "0.3.0"
|
||||
|
||||
rustacuda = "0.1"
|
||||
rustacuda_core = "0.1"
|
||||
@@ -40,3 +42,4 @@ cc = { version = "1.0", features = ["parallel"] }
|
||||
[features]
|
||||
default = ["bls12_381"]
|
||||
bls12_381 = ["ark-bls12-381/curve"]
|
||||
g2 = []
|
||||
|
||||
55
README.md
55
README.md
@@ -23,8 +23,8 @@ ICICLE is a CUDA implementation of general functions widely used in ZKP. ICICLE
|
||||
- Affine: {x, y}
|
||||
- Curves
|
||||
- [BLS12-381]
|
||||
|
||||
> NOTE: _Support for BN254 and BLS12-377 are planned_
|
||||
- [BLS12-377]
|
||||
- [BN254]
|
||||
|
||||
## Build and usage
|
||||
|
||||
@@ -41,7 +41,7 @@ nvcc -o build/<ENTER_DIR_NAME> ./icicle/appUtils/ntt/ntt.cu ./icicle/appUtils/ms
|
||||
|
||||
### Testing the CUDA code
|
||||
|
||||
We are using [googletest] library for testing. To build and run the test suite for finite field and elliptic curve arithmetic, run from the `icicle` folder:
|
||||
We are using [googletest] library for testing. To build and run [the test suite](./icicle/README.md) for finite field and elliptic curve arithmetic, run from the `icicle` folder:
|
||||
|
||||
```sh
|
||||
mkdir -p build
|
||||
@@ -89,6 +89,55 @@ The flag `--test-threads=1` is needed because currently some tests might interfe
|
||||
|
||||
An example of using the Rust bindings library can be found in our [fast-danksharding implementation][FDI]
|
||||
|
||||
### Supporting Additional Curves
|
||||
|
||||
Supporting additional curves can be done as follows:
|
||||
|
||||
Create a JSON file with the curve parameters. The curve is defined by the following parameters:
|
||||
- ``curve_name`` - e.g. ``bls12_381``.
|
||||
- ``modolus_p`` - scalar field modolus (in decimal).
|
||||
- ``bit_count_p`` - number of bits needed to represent `` modolus_p`` .
|
||||
- ``limb_p`` - number of bytes needed to represent `` modolus_p`` (rounded).
|
||||
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
|
||||
- ``modolus_q`` - base field modulus (in decimal).
|
||||
- ``bit_count_q`` - number of bits needed to represent `` modolus_q`` .
|
||||
- ``limb_q`` number of bytes needed to represent `` modolus_p`` (rounded).
|
||||
- ``weierstrass_b`` - Weierstrauss constant of the curve.
|
||||
- ``gen_x`` - x-value of a generator element for the curve.
|
||||
- ``gen_y`` - y-value of a generator element for the curve.
|
||||
|
||||
Here's an example for BLS12-381.
|
||||
```
|
||||
{
|
||||
"curve_name" : "bls12_381",
|
||||
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"bit_count_p" : 255,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"bit_count_q" : 381,
|
||||
"limb_q" : 12,
|
||||
"weierstrass_b" : 4,
|
||||
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
|
||||
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
|
||||
}
|
||||
```
|
||||
|
||||
Save the parameters JSON file in ``curve_parameters``.
|
||||
|
||||
Then run the Python script ``new_curve_script.py `` from the main icicle folder:
|
||||
|
||||
```
|
||||
python3 ./curve_parameters/new_curve_script_rust.py ./curve_parameters/bls12_381.json
|
||||
```
|
||||
|
||||
The script does the following:
|
||||
- Creates a folder in ``icicle/curves`` with the curve name, which contains all of the files needed for the supported operations in cuda.
|
||||
- Adds the curve exported operations to ``icicle/curves/index.cu``.
|
||||
- Creates a file with the curve name in ``src/curves`` with the relevant objects for the curve.
|
||||
- Creates a test file with the curve name in ``src``.
|
||||
|
||||
Testing the new curve could be done by running the tests in ``tests_curve_name`` (e.g. ``tests_bls12_381``).
|
||||
## Contributions
|
||||
|
||||
Join our [Discord Server](https://discord.gg/Y4SkbDf2Ff) and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
|
||||
|
||||
@@ -2,7 +2,10 @@ extern crate criterion;
|
||||
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
|
||||
use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng};
|
||||
use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng, field::BaseField};
|
||||
#[cfg(feature = "g2")]
|
||||
use icicle_utils::{commit_batch_g2, field::ExtensionField};
|
||||
|
||||
use rustacuda::prelude::*;
|
||||
|
||||
|
||||
@@ -10,20 +13,35 @@ const LOG_MSM_SIZES: [usize; 1] = [12];
|
||||
const BATCH_SIZES: [usize; 2] = [128, 256];
|
||||
|
||||
fn bench_msm(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("MSM");
|
||||
for log_msm_size in LOG_MSM_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let msm_size = 1 << log_msm_size;
|
||||
let (scalars, _, _) = set_up_scalars(msm_size, 0, false);
|
||||
let batch_scalars = vec![scalars; batch_size].concat();
|
||||
let mut d_scalars = DeviceBuffer::from_slice(&batch_scalars[..]).unwrap();
|
||||
let points = generate_random_points(msm_size, get_rng(None));
|
||||
|
||||
let points = generate_random_points::<BaseField>(msm_size, get_rng(None));
|
||||
let batch_points = vec![points; batch_size].concat();
|
||||
let mut d_points = DeviceBuffer::from_slice(&batch_points[..]).unwrap();
|
||||
|
||||
c.bench_function(
|
||||
#[cfg(feature = "g2")]
|
||||
let g2_points = generate_random_points::<ExtensionField>(msm_size, get_rng(None));
|
||||
#[cfg(feature = "g2")]
|
||||
let g2_batch_points = vec![g2_points; batch_size].concat();
|
||||
#[cfg(feature = "g2")]
|
||||
let mut d_g2_points = DeviceBuffer::from_slice(&g2_batch_points[..]).unwrap();
|
||||
|
||||
group.sample_size(30).bench_function(
|
||||
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch(&mut d_points, &mut d_scalars, batch_size))
|
||||
);
|
||||
|
||||
#[cfg(feature = "g2")]
|
||||
group.sample_size(10).bench_function(
|
||||
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8,33 +8,26 @@ use icicle_utils::{interpolate_scalars_batch, interpolate_points_batch, set_up_s
|
||||
const LOG_NTT_SIZES: [usize; 1] = [15];
|
||||
const BATCH_SIZES: [usize; 2] = [8, 16];
|
||||
|
||||
fn bench_point_ntt(c: &mut Criterion) {
|
||||
for log_ntt_size in LOG_NTT_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let ntt_size = 1 << log_ntt_size;
|
||||
let (_, mut d_evals, mut d_domain) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
c.bench_function(
|
||||
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn bench_scalar_ntt(c: &mut Criterion) {
|
||||
fn bench_ntt(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("NTT");
|
||||
for log_ntt_size in LOG_NTT_SIZES {
|
||||
for batch_size in BATCH_SIZES {
|
||||
let ntt_size = 1 << log_ntt_size;
|
||||
let (_, mut d_evals, mut d_domain) = set_up_scalars(ntt_size * batch_size, log_ntt_size, true);
|
||||
let (_, mut d_points_evals, _) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
c.bench_function(
|
||||
group.sample_size(100).bench_function(
|
||||
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
|
||||
group.sample_size(10).bench_function(
|
||||
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_points_batch(&mut d_points_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_group!(ntt_benches, bench_point_ntt, bench_scalar_ntt);
|
||||
criterion_group!(ntt_benches, bench_ntt);
|
||||
criterion_main!(ntt_benches);
|
||||
|
||||
8
build.rs
8
build.rs
@@ -16,14 +16,14 @@ fn main() {
|
||||
|
||||
println!("Compiling icicle library using arch: {}", &arch);
|
||||
|
||||
if cfg!(feature = "g2") {
|
||||
nvcc.define("G2_DEFINED", None);
|
||||
}
|
||||
nvcc.cuda(true);
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.files([
|
||||
"./icicle/appUtils/vector_manipulation/ve_mod_mult.cu",
|
||||
"./icicle/appUtils/ntt/lde.cu",
|
||||
"./icicle/appUtils/msm/msm.cu",
|
||||
"./icicle/primitives/projective.cu",
|
||||
"./icicle/curves/index.cu",
|
||||
]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
|
||||
13
curve_parameters/bls12_377.json
Normal file
13
curve_parameters/bls12_377.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"curve_name" : "bls12_377",
|
||||
"modolus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
|
||||
"bit_count_p" : 253,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
|
||||
"bit_count_q" : 377,
|
||||
"limb_q" : 12,
|
||||
"weierstrass_b" : 1,
|
||||
"gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
|
||||
"gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030
|
||||
}
|
||||
13
curve_parameters/bls12_381.json
Normal file
13
curve_parameters/bls12_381.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"curve_name" : "bls12_381",
|
||||
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
|
||||
"bit_count_p" : 255,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 32,
|
||||
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
|
||||
"bit_count_q" : 381,
|
||||
"limb_q" : 12,
|
||||
"weierstrass_b" : 4,
|
||||
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
|
||||
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
|
||||
}
|
||||
13
curve_parameters/bn254.json
Normal file
13
curve_parameters/bn254.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"curve_name" : "bn254",
|
||||
"modolus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
|
||||
"bit_count_p" : 254,
|
||||
"limb_p" : 8,
|
||||
"ntt_size" : 16,
|
||||
"modolus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
|
||||
"bit_count_q" : 254,
|
||||
"limb_q" : 8,
|
||||
"weierstrass_b" : 3,
|
||||
"gen_x" : 1,
|
||||
"gen_y" : 2
|
||||
}
|
||||
203
curve_parameters/new_curve_script.py
Normal file
203
curve_parameters/new_curve_script.py
Normal file
@@ -0,0 +1,203 @@
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
from sympy.ntheory import isprime, primitive_root
|
||||
import subprocess
|
||||
import random
|
||||
import sys
|
||||
|
||||
data = None
|
||||
with open(sys.argv[1]) as json_file:
|
||||
data = json.load(json_file)
|
||||
|
||||
curve_name = data["curve_name"]
|
||||
modolus_p = data["modolus_p"]
|
||||
bit_count_p = data["bit_count_p"]
|
||||
limb_p = data["limb_p"]
|
||||
ntt_size = data["ntt_size"]
|
||||
modolus_q = data["modolus_q"]
|
||||
bit_count_q = data["bit_count_q"]
|
||||
limb_q = data["limb_q"]
|
||||
weierstrass_b = data["weierstrass_b"]
|
||||
gen_x = data["gen_x"]
|
||||
gen_y = data["gen_y"]
|
||||
|
||||
|
||||
def to_hex(val, length):
|
||||
x = str(hex(val))[2:]
|
||||
if len(x) % 8 != 0:
|
||||
x = "0" * (8-len(x) % 8) + x
|
||||
if len(x) != length:
|
||||
x = "0" * (length-len(x)) + x
|
||||
n = 8
|
||||
chunks = [x[i:i+n] for i in range(0, len(x), n)][::-1]
|
||||
s = ""
|
||||
for c in chunks:
|
||||
s += "0x" + c + ", "
|
||||
return s
|
||||
|
||||
|
||||
def get_root_of_unity(order: int) -> int:
|
||||
assert (modolus_p - 1) % order == 0
|
||||
return pow(5, (modolus_p - 1) // order, modolus_p)
|
||||
|
||||
def create_field_parameters_struct(modulus, modulus_bits_count,limbs,ntt,size,name):
|
||||
s = " struct "+name+"{\n"
|
||||
s += " static constexpr unsigned limbs_count = " + str(limbs)+";\n"
|
||||
s += " static constexpr storage<limbs_count> modulus = {"+to_hex(modulus,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<limbs_count> modulus_2 = {"+to_hex(modulus*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<limbs_count> modulus_4 = {"+to_hex(modulus*4,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_wide = {"+to_hex(modulus,8*limbs*2)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared = {"+to_hex(modulus*modulus,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared_2 = {"+to_hex(modulus*modulus*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr storage<2*limbs_count> modulus_sqared_4 = {"+to_hex(modulus*modulus*2*2,8*limbs)[:-2]+"};\n"
|
||||
s += " static constexpr unsigned modulus_bits_count = "+str(modulus_bits_count)+";\n"
|
||||
m = int(math.floor(int(pow(2,2*modulus_bits_count) // modulus)))
|
||||
s += " static constexpr storage<limbs_count> m = {"+ to_hex(m,8*limbs)[:-2] +"};\n"
|
||||
s += " static constexpr storage<limbs_count> one = {"+ to_hex(1,8*limbs)[:-2] +"};\n"
|
||||
s += " static constexpr storage<limbs_count> zero = {"+ to_hex(0,8*limbs)[:-2] +"};\n"
|
||||
|
||||
if ntt:
|
||||
for k in range(size):
|
||||
omega = get_root_of_unity(int(pow(2,k+1)))
|
||||
s += " static constexpr storage<limbs_count> omega"+str(k+1)+"= {"+ to_hex(omega,8*limbs)[:-2]+"};\n"
|
||||
for k in range(size):
|
||||
omega = get_root_of_unity(int(pow(2,k+1)))
|
||||
s += " static constexpr storage<limbs_count> omega_inv"+str(k+1)+"= {"+ to_hex(pow(omega, -1, modulus),8*limbs)[:-2]+"};\n"
|
||||
for k in range(size):
|
||||
s += " static constexpr storage<limbs_count> inv"+str(k+1)+"= {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),8*limbs)[:-2]+"};\n"
|
||||
s+=" };\n"
|
||||
return s
|
||||
|
||||
def create_gen():
|
||||
s = " struct group_generator {\n"
|
||||
s += " static constexpr storage<fq_config::limbs_count> generator_x = {"+to_hex(gen_x,8*limb_q)[:-2]+ "};\n"
|
||||
s += " static constexpr storage<fq_config::limbs_count> generator_y = {"+to_hex(gen_y,8*limb_q)[:-2]+ "};\n"
|
||||
s+=" };\n"
|
||||
return s
|
||||
|
||||
def get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b):
|
||||
file_content = ""
|
||||
file_content += "#pragma once\n#include \"../../utils/storage.cuh\"\n"
|
||||
file_content += "namespace PARAMS_"+curve_name.upper()+"{\n"
|
||||
file_content += create_field_parameters_struct(modolus_p,bit_count_p,limb_p,True,ntt_size,"fp_config")
|
||||
file_content += create_field_parameters_struct(modolus_q,bit_count_q,limb_q,False,0,"fq_config")
|
||||
file_content += " static constexpr unsigned weierstrass_b = " + str(weierstrass_b)+ ";\n"
|
||||
file_content += create_gen()
|
||||
file_content+="}\n"
|
||||
return file_content
|
||||
|
||||
|
||||
# Create Cuda interface
|
||||
|
||||
newpath = "./icicle/curves/"+curve_name
|
||||
if not os.path.exists(newpath):
|
||||
os.makedirs(newpath)
|
||||
|
||||
fc = get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b)
|
||||
text_file = open("./icicle/curves/"+curve_name+"/params.cuh", "w")
|
||||
n = text_file.write(fc)
|
||||
text_file.close()
|
||||
|
||||
with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
|
||||
content = lde_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle/curves/"+curve_name+"/lde.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
|
||||
content = msm_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle/curves/"+curve_name+"/msm.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
|
||||
content = ve_mod_mult_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./icicle/curves/"+curve_name+"/ve_mod_mult.cu", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
|
||||
namespace = '#include "params.cuh"\n'+'''namespace CURVE_NAME_U {
|
||||
typedef Field<PARAMS_CURVE_NAME_U::fp_config> scalar_field_t;\
|
||||
typedef scalar_field_t scalar_t;\
|
||||
typedef Field<PARAMS_CURVE_NAME_U::fq_config> point_field_t;
|
||||
typedef Projective<point_field_t, scalar_field_t, PARAMS_CURVE_NAME_U::group_generator, PARAMS_CURVE_NAME_U::weierstrass_b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
}'''
|
||||
|
||||
with open('./icicle/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
|
||||
f.write(namespace.replace("CURVE_NAME_U",curve_name.upper()))
|
||||
|
||||
|
||||
eq = '''
|
||||
#include <cuda.h>\n
|
||||
#include "curve_config.cuh"\n
|
||||
#include "../../primitives/projective.cuh"\n
|
||||
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2);
|
||||
}'''
|
||||
|
||||
with open('./icicle/curves/'+curve_name+'/projective.cu', 'w') as f:
|
||||
f.write(eq.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
|
||||
|
||||
supported_operations = '''
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
'''
|
||||
|
||||
with open('./icicle/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
|
||||
f.write(supported_operations.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
|
||||
|
||||
with open('./icicle/curves/index.cu', 'a') as f:
|
||||
f.write('\n#include "'+curve_name.lower()+'/supported_operations.cu"')
|
||||
|
||||
|
||||
|
||||
# Create Rust interface and tests
|
||||
|
||||
if limb_p == limb_q:
|
||||
with open("./src/curve_templates/curve_same_limbs.rs", "r") as curve_file:
|
||||
content = curve_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
|
||||
content = content.replace("limbs_p",str(limb_p))
|
||||
text_file = open("./src/curves/"+curve_name+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
else:
|
||||
with open("./src/curve_templates/curve_different_limbs.rs", "r") as curve_file:
|
||||
content = curve_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
|
||||
content = content.replace("limbs_p",str(limb_p))
|
||||
content = content.replace("_limbs_q",str(limb_q * 8 * 4))
|
||||
content = content.replace("limbs_q",str(limb_q))
|
||||
text_file = open("./src/curves/"+curve_name+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open("./src/curve_templates/test.rs", "r") as test_file:
|
||||
content = test_file.read()
|
||||
content = content.replace("CURVE_NAME_U",curve_name.upper())
|
||||
content = content.replace("CURVE_NAME_L",curve_name.lower())
|
||||
text_file = open("./src/test_"+curve_name+".rs", "w")
|
||||
n = text_file.write(content)
|
||||
text_file.close()
|
||||
|
||||
with open('./src/curves/mod.rs', 'a') as f:
|
||||
f.write('\n pub mod ' + curve_name + ';')
|
||||
|
||||
with open('./src/lib.rs', 'a') as f:
|
||||
f.write('\npub mod ' + curve_name + ';')
|
||||
@@ -1,5 +1,4 @@
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(icicle)
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
# GoogleTest requires at least C++14
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
@@ -9,9 +8,9 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
# add the target cuda architectures
|
||||
# each additional architecture increases the compilation time and output file size
|
||||
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||
set(CMAKE_CUDA_ARCHITECTURES 80)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(bellman-cuda LANGUAGES CUDA CXX)
|
||||
project(icicle LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
|
||||
81
icicle/README.md
Normal file
81
icicle/README.md
Normal file
@@ -0,0 +1,81 @@
|
||||
# Tests
|
||||
|
||||
```sh
|
||||
mkdir -p build; cmake -S . -B build; cmake --build build; cd build && ctest; cd ..
|
||||
```
|
||||
|
||||
## Prerequisites on Ubuntu
|
||||
|
||||
Before proceeding, make sure the following software installed:
|
||||
|
||||
1. CMake at least version 3.16, which can be downloaded from [cmake.org](https://cmake.org/files/)
|
||||
It is recommended to have the latest version installed.
|
||||
2. [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu) version 12.0 or newer.
|
||||
3. GCC - version 9 or newer recommended.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
In case you encounter problems during the build, please follow the points below to troubleshoot:
|
||||
|
||||
### 1 - Check CMake log files
|
||||
|
||||
If there are issues with the CMake configuration, please check the logs which are located in the `./build/CMakeFiles` directory. Depending on the version of CMake, the log file may have a different name. For example, for CMake version 3.20, one of log files is called `CMakeConfigureLog.yaml`.
|
||||
|
||||
### 2 - Check for conflicting GCC versions
|
||||
|
||||
Make sure that there are no conflicting versions of GCC installed. You can use the following commands to install and switch between different versions:
|
||||
|
||||
```sh
|
||||
sudo update-alternatives --install /usr/bin/gcc gcc /home/linuxbrew/.linuxbrew/bin/gcc-12 12
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /home/linuxbrew/.linuxbrew/bin/g++-12 12
|
||||
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9
|
||||
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9
|
||||
```
|
||||
|
||||
Then you can select with the following command
|
||||
|
||||
```sh
|
||||
sudo update-alternatives --config gcc
|
||||
```
|
||||
|
||||
### 3 - Check for conflicting binaries in PATH
|
||||
|
||||
Make sure that there are no conflicting binaries in the PATH environment variable. For example, if `/home/linuxbrew/.linuxbrew/bin` precedes `/usr/bin/` in the PATH, it will override the `update-alternatives` settings.
|
||||
|
||||
### 4 - Add nvvm path to PATH
|
||||
|
||||
If you encounter the error `cicc not found`, make sure to add the nvvm path to PATH. For example, for CUDA version 12.1, the nvvm path is `/usr/local/cuda-12.1/nvvm/bin`.
|
||||
|
||||
### 5 - Add CUDA libraries to the project
|
||||
|
||||
If you encounter the error `Failed to extract nvcc implicit link line`, add the following code to the CMakeLists.txt file after enabling CUDA:
|
||||
|
||||
```c
|
||||
check_language(CUDA)
|
||||
if(CMAKE_CUDA_COMPILER)
|
||||
enable_language(CUDA)
|
||||
find_package(CUDAToolkit)
|
||||
target_link_libraries(project CUDA::cudart)
|
||||
target_link_libraries(project CUDA::cuda_driver)
|
||||
else()
|
||||
message(STATUS "No CUDA compiler found")
|
||||
endif()
|
||||
```
|
||||
|
||||
### 6 - Fix update alternatives
|
||||
|
||||
If the `update-alternatives` settings are broken, you can try to fix them with the following command:
|
||||
|
||||
`yes '' | update-alternatives --force --all`
|
||||
|
||||
### 7 - ..bin/crt/link.stub: No such file or directory
|
||||
|
||||
If you encounter the error, check if the `$CUDA_HOME/bin/crt/link.stub` file is available.
|
||||
|
||||
Othrewise create a symlink. For example, if the CUDA toolkit is installed with apt-get to the default path, you can create a symlink with the following command:
|
||||
|
||||
`ln -sf /usr/local/cuda-12.1/bin/crt/link.stub /usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub`
|
||||
|
||||
Alternatively, you can replace the old CUDA root with a symlink to the new CUDA installation with the following command:
|
||||
|
||||
`ln -sf /usr/local/cuda-12.1/ /usr/lib/nvidia-cuda-toolkit/`
|
||||
4
icicle/appUtils/msm/Makefile
Normal file
4
icicle/appUtils/msm/Makefile
Normal file
@@ -0,0 +1,4 @@
|
||||
test_msm:
|
||||
mkdir -p work
|
||||
nvcc -o work/test_msm -I. tests/msm_test.cu
|
||||
work/test_msm
|
||||
@@ -1,3 +1,9 @@
|
||||
#ifndef MSM
|
||||
#define MSM
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "../../primitives/affine.cuh"
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <cub/device/device_radix_sort.cuh>
|
||||
@@ -6,7 +12,6 @@
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../curves/curve_config.cuh"
|
||||
#include "msm.cuh"
|
||||
|
||||
|
||||
@@ -78,16 +83,17 @@ __global__ void split_scalars_kernel(unsigned *buckets_indices, unsigned *point_
|
||||
|
||||
//this kernel adds up the points in each bucket
|
||||
template <typename P, typename A>
|
||||
__global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
|
||||
unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
|
||||
// __global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
|
||||
// unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
|
||||
__global__ void accumulate_buckets_kernel(P *buckets, unsigned *bucket_offsets, unsigned *bucket_sizes, unsigned *single_bucket_indices, unsigned *point_indices, A *points, unsigned nof_buckets, unsigned *nof_buckets_to_compute, unsigned msm_idx_shift){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
|
||||
unsigned bucket_index = msm_index * nof_buckets + (single_bucket_indices[tid]&((1<<msm_idx_shift)-1));
|
||||
unsigned bucket_size = bucket_sizes[tid];
|
||||
if (tid>=nof_buckets*batch_size || bucket_size == 0){ //if the bucket is empty we don't need to continue
|
||||
if (tid ==0) printf("nof buckets to comp: %u", *nof_buckets_to_compute);
|
||||
if (tid>=*nof_buckets_to_compute){
|
||||
return;
|
||||
}
|
||||
unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
|
||||
unsigned bucket_index = msm_index * nof_buckets + (single_bucket_indices[tid]&((1<<msm_idx_shift)-1));
|
||||
unsigned bucket_offset = bucket_offsets[tid];
|
||||
for (unsigned i = 0; i < bucket_sizes[tid]; i++) //add the relevant points starting from the relevant offset up to the bucket size
|
||||
{
|
||||
@@ -101,7 +107,8 @@ template <typename P>
|
||||
__global__ void big_triangle_sum_kernel(P* buckets, P* final_sums, unsigned nof_bms, unsigned c){
|
||||
|
||||
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid>nof_bms) return;
|
||||
if (tid>=nof_bms) return;
|
||||
// printf("%u",tid);
|
||||
P line_sum = buckets[(tid+1)*(1<<c)-1];
|
||||
final_sums[tid] = line_sum;
|
||||
for (unsigned i = (1<<c)-2; i >0; i--)
|
||||
@@ -190,12 +197,13 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
NUM_BLOCKS = (size * (nof_bms+1) + NUM_THREADS - 1) / NUM_THREADS;
|
||||
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size,
|
||||
nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
|
||||
|
||||
|
||||
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
|
||||
unsigned *sort_indices_temp_storage{};
|
||||
size_t sort_indices_temp_storage_bytes;
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + size, bucket_indices,
|
||||
point_indices + size, point_indices, size);
|
||||
|
||||
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
|
||||
for (unsigned i = 0; i < nof_bms; i++) {
|
||||
unsigned offset_out = i * size;
|
||||
@@ -231,11 +239,10 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets);
|
||||
cudaFree(offsets_temp_storage);
|
||||
|
||||
//launch the accumulation kernel with maximum threads
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices,
|
||||
d_points, nof_buckets, 1, c+bm_bitsize);
|
||||
d_points, nof_buckets, nof_buckets_to_compute, c+bm_bitsize);
|
||||
|
||||
#ifdef SSM_SUM
|
||||
//sum each bucket
|
||||
@@ -271,6 +278,7 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
|
||||
cudaDeviceSynchronize();
|
||||
if (!on_device)
|
||||
cudaMemcpy(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost);
|
||||
std::cout<<"final res "<<(*final_result)<<std::endl;
|
||||
|
||||
//free memory
|
||||
if (!on_device) {
|
||||
@@ -346,12 +354,6 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
|
||||
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
|
||||
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
|
||||
// for (unsigned i = 0; i < nof_bms*batch_size; i++) {
|
||||
// unsigned offset_out = i * msm_size;
|
||||
// unsigned offset_in = offset_out + msm_size;
|
||||
// cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in,
|
||||
// bucket_indices + offset_out, point_indices + offset_in, point_indices + offset_out, msm_size);
|
||||
// }
|
||||
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
|
||||
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
|
||||
cudaFree(sort_indices_temp_storage);
|
||||
@@ -386,7 +388,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
|
||||
NUM_THREADS = 1 << 8;
|
||||
NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
|
||||
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
|
||||
d_points, nof_buckets, batch_size, c+bm_bitsize);
|
||||
d_points, nof_buckets, total_nof_buckets_to_compute, c+bm_bitsize);
|
||||
|
||||
#ifdef SSM_SUM
|
||||
//sum each bucket
|
||||
@@ -419,7 +421,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
|
||||
NUM_THREADS = 1<<8;
|
||||
NUM_BLOCKS = (batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
|
||||
|
||||
|
||||
//copy final result to host
|
||||
cudaDeviceSynchronize();
|
||||
if (!on_device)
|
||||
@@ -456,8 +458,7 @@ __global__ void to_proj_kernel(A* affine_points, P* proj_points, unsigned N){
|
||||
|
||||
//the function computes msm using ssm
|
||||
template <typename S, typename P, typename A>
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device){ //works up to 2^8
|
||||
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result){ //works up to 2^8
|
||||
S *scalars;
|
||||
A *a_points;
|
||||
P *p_points;
|
||||
@@ -502,12 +503,12 @@ void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool
|
||||
template <typename A, typename S, typename P>
|
||||
void reference_msm(S* scalars, A* a_points, unsigned size){
|
||||
|
||||
P points[size];
|
||||
P *points = new P[size];
|
||||
// P points[size];
|
||||
for (unsigned i = 0; i < size ; i++)
|
||||
{
|
||||
points[i] = P::from_affine(a_points[i]);
|
||||
}
|
||||
|
||||
|
||||
P res = P::zero();
|
||||
|
||||
@@ -522,7 +523,10 @@ void reference_msm(S* scalars, A* a_points, unsigned size){
|
||||
}
|
||||
|
||||
unsigned get_optimal_c(const unsigned size) {
|
||||
return 10;
|
||||
if (size < 17)
|
||||
return 1;
|
||||
// return 15;
|
||||
return ceil(log2(size))-4;
|
||||
}
|
||||
|
||||
//this function is used to compute msms of size larger than 256
|
||||
@@ -544,88 +548,4 @@ void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_
|
||||
unsigned bitsize = 255;
|
||||
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device);
|
||||
}
|
||||
|
||||
extern "C"
|
||||
int msm_cuda(projective_t *out, affine_t points[],
|
||||
scalar_t scalars[], size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (count>256){
|
||||
large_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false);
|
||||
}
|
||||
else{
|
||||
short_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false);
|
||||
}
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
|
||||
scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm<scalar_t, projective_t, affine_t>(scalars, points, batch_size, msm_size, out, false);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,10 +1,6 @@
|
||||
#ifndef MSM_H
|
||||
#define MSM_H
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/affine.cuh"
|
||||
#include "../../curves/curve_config.cuh"
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device);
|
||||
@@ -20,3 +16,7 @@ void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device);
|
||||
|
||||
template <typename A, typename S, typename P>
|
||||
void reference_msm(S* scalars, A* a_points, unsigned size);
|
||||
#endif
|
||||
256
icicle/appUtils/msm/tests/msm_test.cu
Normal file
256
icicle/appUtils/msm/tests/msm_test.cu
Normal file
@@ -0,0 +1,256 @@
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include "msm.cu"
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../curves/bls12_381/curve_config.cuh"
|
||||
|
||||
using namespace BLS12_381;
|
||||
|
||||
struct fake_point
|
||||
{
|
||||
unsigned val = 0;
|
||||
|
||||
__host__ __device__ inline fake_point operator+(fake_point fp) {
|
||||
return {val+fp.val};
|
||||
}
|
||||
|
||||
__host__ __device__ fake_point zero() {
|
||||
fake_point p;
|
||||
return p;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &strm, const fake_point &a) {
|
||||
return strm <<a.val;
|
||||
}
|
||||
|
||||
struct fake_scalar
|
||||
{
|
||||
unsigned val = 0;
|
||||
unsigned bitsize = 32;
|
||||
|
||||
// __host__ __device__ unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width){
|
||||
|
||||
// return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
|
||||
|
||||
// }
|
||||
__host__ __device__ int get_scalar_digit(int digit_num, int digit_width){
|
||||
|
||||
return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
|
||||
|
||||
}
|
||||
|
||||
__host__ __device__ inline fake_point operator*(fake_point fp) {
|
||||
|
||||
fake_point p1;
|
||||
fake_point p2;
|
||||
unsigned x = val;
|
||||
if (x == 0) return fake_point().zero();
|
||||
|
||||
unsigned i = 1;
|
||||
unsigned c_bit = (x & (1<<(bitsize-1)))>>(bitsize-1);
|
||||
while (c_bit==0 && i<bitsize){
|
||||
i++;
|
||||
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
|
||||
}
|
||||
p1 = fp;
|
||||
p2 = p1+p1;
|
||||
while (i<bitsize){
|
||||
i++;
|
||||
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
|
||||
if (c_bit){
|
||||
p1 = p1 + p2;
|
||||
p2 = p2 + p2;
|
||||
}
|
||||
else {
|
||||
p2 = p1 + p2;
|
||||
p1 = p1 + p1;
|
||||
}
|
||||
}
|
||||
|
||||
return p1;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
class Dummy_Scalar {
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) {
|
||||
return (x>>(digit_num*digit_width))&((1<<digit_width)-1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
|
||||
return {p1.x+p2.x};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
|
||||
return (p1.x == p2.x);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) {
|
||||
return (p1.x == p2);
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
static HOST_INLINE Dummy_Scalar rand_host() {
|
||||
return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
class Dummy_Projective {
|
||||
|
||||
public:
|
||||
Dummy_Scalar x;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective zero() {
|
||||
return {0};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
return {p1.x+p2.x};
|
||||
}
|
||||
|
||||
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
// return p1 + neg(p2);
|
||||
// }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point) {
|
||||
os << point.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point) {
|
||||
Dummy_Projective res = zero();
|
||||
#ifdef CUDA_ARCH
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
|
||||
if (i > 0) {
|
||||
res = res + res;
|
||||
}
|
||||
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) {
|
||||
res = res + point;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2) {
|
||||
return (p1.x == p2.x);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective &point) {
|
||||
return point.x == 0;
|
||||
}
|
||||
|
||||
static HOST_INLINE Dummy_Projective rand_host() {
|
||||
return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
//switch between dummy and real:
|
||||
|
||||
typedef scalar_t test_scalar;
|
||||
typedef projective_t test_projective;
|
||||
typedef affine_t test_affine;
|
||||
|
||||
// typedef Dummy_Scalar test_scalar;
|
||||
// typedef Dummy_Projective test_projective;
|
||||
// typedef Dummy_Projective test_affine;
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned batch_size = 4;
|
||||
unsigned msm_size = 1<<15;
|
||||
unsigned N = batch_size*msm_size;
|
||||
|
||||
test_scalar *scalars = new test_scalar[N];
|
||||
test_affine *points = new test_affine[N];
|
||||
|
||||
for (unsigned i=0;i<N;i++){
|
||||
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
|
||||
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
|
||||
// scalars[i] = test_scalar::rand_host();
|
||||
// points[i] = test_projective::to_affine(test_projective::rand_host());
|
||||
}
|
||||
std::cout<<"finished generating"<<std::endl;
|
||||
|
||||
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
|
||||
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
|
||||
test_projective large_res[batch_size];
|
||||
test_projective batched_large_res[batch_size];
|
||||
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
|
||||
// fake_point batched_large_res[256];
|
||||
|
||||
|
||||
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
|
||||
for (unsigned i=0;i<batch_size;i++){
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
|
||||
// std::cout<<"final result large"<<std::endl;
|
||||
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
|
||||
}
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
|
||||
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
|
||||
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
|
||||
|
||||
std::cout<<"final results batched large"<<std::endl;
|
||||
bool success = true;
|
||||
for (unsigned i = 0; i < batch_size; i++)
|
||||
{
|
||||
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
|
||||
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
|
||||
std::cout<<"good"<<std::endl;
|
||||
}
|
||||
else{
|
||||
std::cout<<"miss"<<std::endl;
|
||||
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
if (success){
|
||||
std::cout<<"success!"<<std::endl;
|
||||
}
|
||||
|
||||
// std::cout<<batched_large_res[0]<<std::endl;
|
||||
// std::cout<<batched_large_res[1]<<std::endl;
|
||||
// std::cout<<projective_t::to_affine(batched_large_res[0])<<std::endl;
|
||||
// std::cout<<projective_t::to_affine(batched_large_res[1])<<std::endl;
|
||||
|
||||
// std::cout<<"final result short"<<std::endl;
|
||||
// std::cout<<pr<<std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,7 +1,9 @@
|
||||
#ifndef LDE
|
||||
#define LDE
|
||||
#include <cuda.h>
|
||||
#include "ntt.cuh"
|
||||
#include "../vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "lde.cuh"
|
||||
#include "../vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
|
||||
/**
|
||||
@@ -25,7 +27,7 @@ template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluat
|
||||
}
|
||||
|
||||
NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, scalar_t::inv_log_size(logn));
|
||||
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, S::inv_log_size(logn));
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -65,13 +67,13 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
|
||||
if (domain_size > n) {
|
||||
// allocate and initialize an array of stream handles to parallelize data copying across batches
|
||||
cudaStream_t *memcpy_streams = (cudaStream_t *) malloc(batch_size * sizeof(cudaStream_t));
|
||||
for (int i = 0; i < batch_size; i++)
|
||||
for (unsigned i = 0; i < batch_size; i++)
|
||||
{
|
||||
cudaStreamCreate(&(memcpy_streams[i]));
|
||||
|
||||
cudaMemcpyAsync(&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
|
||||
int NUM_THREADS = MAX_THREADS_BATCH;
|
||||
int NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
|
||||
uint32_t NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
fill_array <E> <<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>> (&d_out[i * domain_size + n], E::zero(), domain_size - n);
|
||||
|
||||
cudaStreamSynchronize(memcpy_streams[i]);
|
||||
@@ -109,355 +111,72 @@ int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size,
|
||||
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers);
|
||||
}
|
||||
|
||||
int interpolate_scalars(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n) {
|
||||
template <typename S>
|
||||
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n) {
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
|
||||
int interpolate_scalars_batch(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size) {
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
|
||||
int interpolate_points(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n) {
|
||||
template <typename E, typename S>
|
||||
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n) {
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
|
||||
int interpolate_points_batch(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size) {
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
|
||||
int evaluate_scalars(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n) {
|
||||
scalar_t* _null = nullptr;
|
||||
template <typename S>
|
||||
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
|
||||
S* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_scalars_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
|
||||
scalar_t* _null = nullptr;
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
|
||||
S* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_points(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n) {
|
||||
scalar_t* _null = nullptr;
|
||||
template <typename E, typename S>
|
||||
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
|
||||
S* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_points_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size) {
|
||||
scalar_t* _null = nullptr;
|
||||
S* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
|
||||
int evaluate_scalars_on_coset(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers) {
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers) {
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
|
||||
int evaluate_scalars_on_coset_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers) {
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
|
||||
int evaluate_points_on_coset(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers) {
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
|
||||
int evaluate_points_on_coset_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers) {
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
|
||||
extern "C" scalar_t* build_domain_cuda(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, scalar_t::omega(logn));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda(scalar_t* d_out, scalar_t *d_evaluations, scalar_t *d_domain, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_scalars(d_out, d_evaluations, d_domain, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_scalars_batch(d_out, d_evaluations, d_domain, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda(projective_t* d_out, projective_t *d_evaluations, scalar_t *d_domain, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_points(d_out, d_evaluations, d_domain, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_points_batch(d_out, d_evaluations, d_domain, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars(d_out, d_coefficients, d_domain, domain_size, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points(d_out, d_coefficients, d_domain, domain_size, n); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars_on_coset(d_out, d_coefficients, d_domain, domain_size, n, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_scalars_on_coset_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points_on_coset(d_out, d_coefficients, d_domain, domain_size, n, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_points_on_coset_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, coset_powers); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda(scalar_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda(scalar_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda(projective_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda(projective_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,31 +1,46 @@
|
||||
#ifndef LDE_H
|
||||
#define LDE_H
|
||||
#pragma once
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n);
|
||||
|
||||
int interpolate_scalars(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n);
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
|
||||
|
||||
int interpolate_scalars_batch(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size);
|
||||
template <typename E, typename S>
|
||||
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n);
|
||||
|
||||
int interpolate_points(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n);
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
|
||||
|
||||
int interpolate_points_batch(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size);
|
||||
template <typename S>
|
||||
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
|
||||
|
||||
int evaluate_scalars(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n);
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);
|
||||
|
||||
int evaluate_scalars_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);
|
||||
template <typename E, typename S>
|
||||
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
|
||||
|
||||
int evaluate_points(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n);
|
||||
|
||||
int evaluate_points_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size);
|
||||
|
||||
int evaluate_scalars_on_coset(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers);
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers);
|
||||
|
||||
int evaluate_scalars_on_coset_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers);
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers);
|
||||
|
||||
int evaluate_points_on_coset(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
|
||||
unsigned domain_size, unsigned n, scalar_t* coset_powers);
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers);
|
||||
|
||||
int evaluate_points_on_coset_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, scalar_t* coset_powers);
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers);
|
||||
|
||||
#endif
|
||||
@@ -1,54 +0,0 @@
|
||||
#include <cuda.h>
|
||||
#include "ntt.cuh"
|
||||
|
||||
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ecntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -1,22 +1,10 @@
|
||||
#ifndef NTT
|
||||
#define NTT
|
||||
#pragma once
|
||||
#include "../../curves/curve_config.cuh"
|
||||
|
||||
const uint32_t MAX_NUM_THREADS = 1024;
|
||||
const uint32_t MAX_THREADS_BATCH = 256;
|
||||
|
||||
/**
|
||||
* Copy twiddle factors array to device (returns a pointer to the device allocated array).
|
||||
* @param twiddles input empty array.
|
||||
* @param n_twiddles length of twiddle factors.
|
||||
*/
|
||||
scalar_t * copy_twiddle_factors_to_device(scalar_t * twiddles, uint32_t n_twiddles) {
|
||||
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
|
||||
scalar_t * d_twiddles;
|
||||
cudaMalloc( & d_twiddles, size_twiddles);
|
||||
cudaMemcpy(d_twiddles, twiddles, size_twiddles, cudaMemcpyHostToDevice);
|
||||
return d_twiddles;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the twiddle factors.
|
||||
* Outputs: d_twiddles[i] = omega^i.
|
||||
@@ -24,8 +12,11 @@ const uint32_t MAX_THREADS_BATCH = 256;
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
*/
|
||||
__global__ void twiddle_factors_kernel(scalar_t * d_twiddles, uint32_t n_twiddles, scalar_t omega) {
|
||||
d_twiddles[0] = scalar_t::one();
|
||||
template < typename S > __global__ void twiddle_factors_kernel(S * d_twiddles, uint32_t n_twiddles, S omega) {
|
||||
for (uint32_t i = 0; i < n_twiddles; i++) {
|
||||
d_twiddles[i] = S::zero();
|
||||
}
|
||||
d_twiddles[0] = S::one();
|
||||
for (uint32_t i = 0; i < n_twiddles - 1; i++) {
|
||||
d_twiddles[i + 1] = omega * d_twiddles[i];
|
||||
}
|
||||
@@ -37,11 +28,11 @@ __global__ void twiddle_factors_kernel(scalar_t * d_twiddles, uint32_t n_twiddle
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
*/
|
||||
scalar_t * fill_twiddle_factors_array(uint32_t n_twiddles, scalar_t omega) {
|
||||
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
|
||||
scalar_t * d_twiddles;
|
||||
template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega) {
|
||||
size_t size_twiddles = n_twiddles * sizeof(S);
|
||||
S * d_twiddles;
|
||||
cudaMalloc( & d_twiddles, size_twiddles);
|
||||
twiddle_factors_kernel <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
|
||||
twiddle_factors_kernel<S> <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
|
||||
return d_twiddles;
|
||||
}
|
||||
|
||||
@@ -55,7 +46,7 @@ scalar_t * fill_twiddle_factors_array(uint32_t n_twiddles, scalar_t omega) {
|
||||
*/
|
||||
__device__ __host__ uint32_t reverseBits(uint32_t num, uint32_t logn) {
|
||||
unsigned int reverse_num = 0;
|
||||
for (int i = 0; i < logn; i++) {
|
||||
for (uint32_t i = 0; i < logn; i++) {
|
||||
if ((num & (1 << i))) reverse_num |= 1 << ((logn - 1) - i);
|
||||
}
|
||||
return reverse_num;
|
||||
@@ -168,9 +159,9 @@ template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr
|
||||
uint32_t m = 2;
|
||||
for (uint32_t s = 0; s < logn; s++) {
|
||||
for (uint32_t i = 0; i < n; i += m) {
|
||||
int shifted_m = m >> 1;
|
||||
int number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
|
||||
int number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
|
||||
uint32_t shifted_m = m >> 1;
|
||||
uint32_t number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
|
||||
uint32_t number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
|
||||
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
|
||||
}
|
||||
m <<= 1;
|
||||
@@ -204,74 +195,23 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
|
||||
return arrReversed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey Elliptic Curve NTT.
|
||||
* NOTE! this function assumes that d_twiddles are located in the device memory.
|
||||
* @param arr input array of type projective_t.
|
||||
* @param n length of d_arr.
|
||||
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
|
||||
* @param n_twiddles length of d_twiddles.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
projective_t * ecntt(projective_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
|
||||
return ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* NOTE! this function assumes that d_twiddles are located in the device memory.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param n length of d_arr.
|
||||
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
|
||||
* @param n_twiddles length of d_twiddles.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
scalar_t * ntt(scalar_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
|
||||
return ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param arr input array of type E (element).
|
||||
* @param n length of d_arr.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
extern "C" uint32_t ntt_end2end(scalar_t * arr, uint32_t n, bool inverse) {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
|
||||
scalar_t * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
}
|
||||
scalar_t * result = ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
for(int i = 0; i < n; i++){
|
||||
arr[i] = result[i];
|
||||
}
|
||||
cudaFree(d_twiddles);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* @param arr input array of type projective_t.
|
||||
* @param n length of d_arr.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
extern "C" uint32_t ecntt_end2end(projective_t * arr, uint32_t n, bool inverse) {
|
||||
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse) {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n;
|
||||
scalar_t * twiddles = new scalar_t[n_twiddles];
|
||||
scalar_t * d_twiddles;
|
||||
S * twiddles = new S[n_twiddles];
|
||||
S * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
|
||||
} else{
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
|
||||
}
|
||||
projective_t * result = ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse);
|
||||
for(int i = 0; i < n; i++){
|
||||
arr[i] = result[i];
|
||||
}
|
||||
@@ -289,14 +229,14 @@ scalar_t * ntt(scalar_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twi
|
||||
* @param logn log(n).
|
||||
* @param task log(n).
|
||||
*/
|
||||
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
|
||||
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
|
||||
for (uint32_t i = 0; i < n; i++) {
|
||||
uint32_t reversed = reverseBits(i, logn);
|
||||
if (reversed > i) {
|
||||
T tmp = arr[task * n + i];
|
||||
arr[task * n + i] = arr[task * n + reversed];
|
||||
arr[task * n + reversed] = tmp;
|
||||
}
|
||||
uint32_t reversed = reverseBits(i, logn);
|
||||
if (reversed > i) {
|
||||
T tmp = arr[task * n + i];
|
||||
arr[task * n + i] = arr[task * n + reversed];
|
||||
arr[task * n + reversed] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -387,34 +327,32 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//TODO: batch ntt and ecntt can be unified into batch_template
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* This is a bached version - meaning it assumes than the input array
|
||||
* consists of N arrays of size n. The function performs n-size NTT on each small array.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param arr input array of type BLS12_381::scalar_t.
|
||||
* @param arr_size number of total elements = n * N.
|
||||
* @param n size of batch.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
extern "C" uint32_t ntt_end2end_batch(scalar_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
|
||||
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse) {
|
||||
int batches = int(arr_size / n);
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
|
||||
size_t size_E = arr_size * sizeof(scalar_t);
|
||||
scalar_t * d_twiddles;
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
|
||||
size_t size_E = arr_size * sizeof(E);
|
||||
S * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
|
||||
} else{
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
|
||||
}
|
||||
scalar_t * d_arr;
|
||||
E * d_arr;
|
||||
cudaMalloc( & d_arr, size_E);
|
||||
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
|
||||
int NUM_THREADS = MAX_THREADS_BATCH;
|
||||
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
|
||||
ntt_template_kernel_rev_ord<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
|
||||
ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
|
||||
|
||||
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
|
||||
int chunks = max(int((n / 2) / NUM_THREADS), 1);
|
||||
@@ -423,14 +361,13 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
|
||||
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
|
||||
ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
|
||||
}
|
||||
|
||||
if (inverse == true)
|
||||
{
|
||||
NUM_THREADS = MAX_NUM_THREADS;
|
||||
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel < scalar_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, scalar_t::inv_log_size(logn));
|
||||
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, S::inv_log_size(logn));
|
||||
}
|
||||
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
|
||||
cudaFree(d_arr);
|
||||
@@ -438,50 +375,4 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* This is a bached version - meaning it assumes than the input array
|
||||
* consists of N arrays of size n. The function performs n-size NTT on each small array.
|
||||
* @param arr input array of type scalar_t.
|
||||
* @param arr_size number of total elements = n * N.
|
||||
* @param n size of batch.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
extern "C" uint32_t ecntt_end2end_batch(projective_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
|
||||
int batches = int(arr_size / n);
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
|
||||
size_t size_E = arr_size * sizeof(projective_t);
|
||||
scalar_t * d_twiddles;
|
||||
if (inverse){
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
|
||||
} else{
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
|
||||
}
|
||||
projective_t * d_arr;
|
||||
cudaMalloc( & d_arr, size_E);
|
||||
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
|
||||
int NUM_THREADS = MAX_THREADS_BATCH;
|
||||
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
|
||||
ntt_template_kernel_rev_ord<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
|
||||
|
||||
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
|
||||
int chunks = max(int((n / 2) / NUM_THREADS), 1);
|
||||
int total_tasks = batches * chunks;
|
||||
NUM_BLOCKS = total_tasks;
|
||||
|
||||
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
|
||||
}
|
||||
if (inverse == true)
|
||||
{
|
||||
NUM_THREADS = MAX_NUM_THREADS;
|
||||
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
template_normalize_kernel < projective_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, scalar_t::inv_log_size(logn));
|
||||
}
|
||||
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
|
||||
cudaFree(d_arr);
|
||||
cudaFree(d_twiddles);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
@@ -1,3 +1,5 @@
|
||||
#ifndef VEC_MULT
|
||||
#define VEC_MULT
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
@@ -108,3 +110,4 @@ int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t di
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
22
icicle/curves/bls12_377/curve_config.cuh
Normal file
22
icicle/curves/bls12_377/curve_config.cuh
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BLS12_377 {
|
||||
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_377::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
308
icicle/curves/bls12_377/lde.cu
Normal file
308
icicle/curves/bls12_377/lde.cu
Normal file
@@ -0,0 +1,308 @@
|
||||
#ifndef _BLS12_377_LDE
|
||||
#define _BLS12_377_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_evaluations, BLS12_377::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_evaluations, BLS12_377::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_377::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
87
icicle/curves/bls12_377/msm.cu
Normal file
87
icicle/curves/bls12_377/msm.cu
Normal file
@@ -0,0 +1,87 @@
|
||||
#ifndef _BLS12_377_MSM
|
||||
#define _BLS12_377_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
155
icicle/curves/bls12_377/params.cuh
Normal file
155
icicle/curves/bls12_377/params.cuh
Normal file
@@ -0,0 +1,155 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
namespace PARAMS_BLS12_377{
|
||||
struct fp_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
static constexpr unsigned modulus_bits_count = 253;
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega4= {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega5= {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291, 0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
|
||||
static constexpr storage<limbs_count> omega6= {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34, 0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
|
||||
static constexpr storage<limbs_count> omega7= {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1, 0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
|
||||
static constexpr storage<limbs_count> omega8= {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111, 0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
|
||||
static constexpr storage<limbs_count> omega9= {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244, 0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
|
||||
static constexpr storage<limbs_count> omega10= {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4, 0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
|
||||
static constexpr storage<limbs_count> omega11= {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645, 0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
|
||||
static constexpr storage<limbs_count> omega12= {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb, 0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
|
||||
static constexpr storage<limbs_count> omega13= {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc, 0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
|
||||
static constexpr storage<limbs_count> omega14= {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f, 0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
|
||||
static constexpr storage<limbs_count> omega15= {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557, 0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
|
||||
static constexpr storage<limbs_count> omega16= {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112, 0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
|
||||
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63, 0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f, 0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6, 0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd, 0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b, 0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a, 0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69, 0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c, 0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3, 0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2, 0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3, 0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016, 0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571, 0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894, 0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c, 0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec, 0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc, 0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024, 0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7, 0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db, 0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3, 0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e, 0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1, 0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697, 0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf, 0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
|
||||
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
|
||||
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
|
||||
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
|
||||
static constexpr storage<limbs_count> inv4= {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
|
||||
static constexpr storage<limbs_count> inv5= {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
|
||||
static constexpr storage<limbs_count> inv6= {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
|
||||
static constexpr storage<limbs_count> inv7= {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
|
||||
static constexpr storage<limbs_count> inv8= {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
|
||||
static constexpr storage<limbs_count> inv9= {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
|
||||
static constexpr storage<limbs_count> inv10= {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
|
||||
static constexpr storage<limbs_count> inv11= {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
|
||||
static constexpr storage<limbs_count> inv12= {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
|
||||
static constexpr storage<limbs_count> inv13= {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
|
||||
static constexpr storage<limbs_count> inv14= {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
|
||||
static constexpr storage<limbs_count> inv15= {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
|
||||
static constexpr storage<limbs_count> inv16= {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
|
||||
static constexpr storage<limbs_count> inv17= {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
|
||||
static constexpr storage<limbs_count> inv18= {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
|
||||
static constexpr storage<limbs_count> inv19= {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
|
||||
static constexpr storage<limbs_count> inv20= {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
|
||||
static constexpr storage<limbs_count> inv21= {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
|
||||
static constexpr storage<limbs_count> inv22= {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
|
||||
static constexpr storage<limbs_count> inv23= {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
|
||||
static constexpr storage<limbs_count> inv24= {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
|
||||
static constexpr storage<limbs_count> inv25= {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
|
||||
static constexpr storage<limbs_count> inv26= {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
|
||||
static constexpr storage<limbs_count> inv27= {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
|
||||
static constexpr storage<limbs_count> inv28= {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
|
||||
static constexpr storage<limbs_count> inv29= {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
|
||||
};
|
||||
|
||||
struct fq_config{
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr unsigned modulus_bits_count = 377;
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
|
||||
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
|
||||
static constexpr storage<limbs_count> generator_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
|
||||
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// TODO: correct parameters for G2 here
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
}
|
||||
22
icicle/curves/bls12_377/projective.cu
Normal file
22
icicle/curves/bls12_377/projective.cu
Normal file
@@ -0,0 +1,22 @@
|
||||
|
||||
#include <cuda.h>
|
||||
|
||||
#include "curve_config.cuh"
|
||||
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
5
icicle/curves/bls12_377/supported_operations.cu
Normal file
5
icicle/curves/bls12_377/supported_operations.cu
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
@@ -1,21 +1,23 @@
|
||||
#ifndef _BLS12_377_VEC_MULT
|
||||
#define _BLS12_377_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../curves/curve_config.cuh"
|
||||
#include "ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point(projective_t *inout,
|
||||
scalar_t *scalar_vec,
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
|
||||
BLS12_377::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<projective_t, scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -25,15 +27,15 @@ extern "C" int32_t vec_mod_mult_point(projective_t *inout,
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
|
||||
scalar_t *scalar_vec,
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
|
||||
BLS12_377::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<scalar_t, scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -43,16 +45,16 @@ extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
|
||||
scalar_t *input,
|
||||
scalar_t *output,
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_flattened,
|
||||
BLS12_377::scalar_t *input,
|
||||
BLS12_377::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<scalar_t>(matrix_flattened, input, output, n_elments);
|
||||
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -61,3 +63,4 @@ extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,176 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../utils/storage.cuh"
|
||||
|
||||
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr unsigned modulus_bits_count = 255;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
|
||||
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
|
||||
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
|
||||
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
|
||||
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
|
||||
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
|
||||
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
|
||||
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
|
||||
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
|
||||
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
|
||||
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
|
||||
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
|
||||
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
|
||||
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
|
||||
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
|
||||
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
|
||||
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
|
||||
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
|
||||
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
|
||||
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
|
||||
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
|
||||
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
|
||||
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
|
||||
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
|
||||
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
|
||||
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
|
||||
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
|
||||
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
|
||||
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
|
||||
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
|
||||
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
|
||||
|
||||
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
|
||||
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
|
||||
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
|
||||
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
|
||||
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
|
||||
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
|
||||
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
|
||||
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
|
||||
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
|
||||
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
|
||||
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
|
||||
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
|
||||
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
|
||||
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
|
||||
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
|
||||
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
|
||||
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
|
||||
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
|
||||
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
|
||||
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
|
||||
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
|
||||
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
|
||||
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
|
||||
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
|
||||
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
|
||||
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
|
||||
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
|
||||
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
|
||||
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
|
||||
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
|
||||
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr unsigned modulus_bits_count = 381;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
};
|
||||
|
||||
struct group_generator {
|
||||
static constexpr storage<fq_config::limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
|
||||
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<fq_config::limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
|
||||
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
};
|
||||
|
||||
static constexpr unsigned weierstrass_b = 4;
|
||||
22
icicle/curves/bls12_381/curve_config.cuh
Normal file
22
icicle/curves/bls12_381/curve_config.cuh
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BLS12_381 {
|
||||
typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_381::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
308
icicle/curves/bls12_381/lde.cu
Normal file
308
icicle/curves/bls12_381/lde.cu
Normal file
@@ -0,0 +1,308 @@
|
||||
#ifndef _BLS12_381_LDE
|
||||
#define _BLS12_381_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_evaluations, BLS12_381::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_evaluations, BLS12_381::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
89
icicle/curves/bls12_381/msm.cu
Normal file
89
icicle/curves/bls12_381/msm.cu
Normal file
@@ -0,0 +1,89 @@
|
||||
#ifndef _BLS12_381_MSM
|
||||
#define _BLS12_381_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
193
icicle/curves/bls12_381/params.cuh
Normal file
193
icicle/curves/bls12_381/params.cuh
Normal file
@@ -0,0 +1,193 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../utils/storage.cuh"
|
||||
|
||||
namespace PARAMS_BLS12_381{
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr unsigned modulus_bits_count = 255;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
|
||||
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
|
||||
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
|
||||
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
|
||||
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
|
||||
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
|
||||
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
|
||||
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
|
||||
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
|
||||
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
|
||||
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
|
||||
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
|
||||
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
|
||||
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
|
||||
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
|
||||
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
|
||||
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
|
||||
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
|
||||
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
|
||||
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
|
||||
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
|
||||
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
|
||||
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
|
||||
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
|
||||
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
|
||||
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
|
||||
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
|
||||
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
|
||||
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
|
||||
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
|
||||
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
|
||||
|
||||
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
|
||||
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
|
||||
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
|
||||
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
|
||||
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
|
||||
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
|
||||
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
|
||||
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
|
||||
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
|
||||
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
|
||||
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
|
||||
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
|
||||
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
|
||||
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
|
||||
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
|
||||
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
|
||||
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
|
||||
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
|
||||
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
|
||||
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
|
||||
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
|
||||
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
|
||||
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
|
||||
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
|
||||
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
|
||||
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
|
||||
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
|
||||
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
|
||||
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
|
||||
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
|
||||
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr unsigned modulus_bits_count = 381;
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
|
||||
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
|
||||
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
}
|
||||
19
icicle/curves/bls12_381/projective.cu
Normal file
19
icicle/curves/bls12_381/projective.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
4
icicle/curves/bls12_381/supported_operations.cu
Normal file
4
icicle/curves/bls12_381/supported_operations.cu
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
65
icicle/curves/bls12_381/ve_mod_mult.cu
Normal file
65
icicle/curves/bls12_381/ve_mod_mult.cu
Normal file
@@ -0,0 +1,65 @@
|
||||
#ifndef _BLS12_381_VEC_MULT
|
||||
#define _BLS12_381_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
|
||||
BLS12_381::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_381(BLS12_381::scalar_t *inout,
|
||||
BLS12_381::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_381(BLS12_381::scalar_t *matrix_flattened,
|
||||
BLS12_381::scalar_t *input,
|
||||
BLS12_381::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
22
icicle/curves/bn254/curve_config.cuh
Normal file
22
icicle/curves/bn254/curve_config.cuh
Normal file
@@ -0,0 +1,22 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BN254 {
|
||||
typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BN254::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BN254::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BN254::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BN254::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
308
icicle/curves/bn254/lde.cu
Normal file
308
icicle/curves/bn254/lde.cu
Normal file
@@ -0,0 +1,308 @@
|
||||
#ifndef _BN254_LDE
|
||||
#define _BN254_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega(logn));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<BN254::scalar_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<BN254::projective_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<BN254::scalar_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<BN254::projective_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_evaluations, BN254::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_evaluations, BN254::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BN254::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BN254::scalar_t *coset_powers, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BN254::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_bn254(BN254::projective_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
87
icicle/curves/bn254/msm.cu
Normal file
87
icicle/curves/bn254/msm.cu
Normal file
@@ -0,0 +1,87 @@
|
||||
#ifndef _BN254_MSM
|
||||
#define _BN254_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
155
icicle/curves/bn254/params.cuh
Normal file
155
icicle/curves/bn254/params.cuh
Normal file
@@ -0,0 +1,155 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
namespace PARAMS_BN254{
|
||||
struct fp_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
|
||||
////
|
||||
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
////
|
||||
static constexpr storage<limbs_count> omega_inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
////
|
||||
////
|
||||
static constexpr storage<limbs_count> inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
|
||||
////
|
||||
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bits_count = 254;
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
|
||||
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
|
||||
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
|
||||
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
|
||||
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
|
||||
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
|
||||
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
|
||||
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
|
||||
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
|
||||
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
|
||||
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
|
||||
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
|
||||
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
|
||||
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
|
||||
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
|
||||
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
|
||||
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
|
||||
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
|
||||
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
|
||||
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
|
||||
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
|
||||
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
|
||||
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
|
||||
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
|
||||
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
|
||||
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
|
||||
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
|
||||
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
|
||||
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
|
||||
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
|
||||
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
|
||||
};
|
||||
|
||||
struct fq_config{
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522, 0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45, 0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr unsigned modulus_bits_count = 254;
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17, 0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> generator_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_x_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_x_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> generator_y_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// TODO: correct parameters for G2 here
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
}
|
||||
19
icicle/curves/bn254/projective.cu
Normal file
19
icicle/curves/bn254/projective.cu
Normal file
@@ -0,0 +1,19 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
4
icicle/curves/bn254/supported_operations.cu
Normal file
4
icicle/curves/bn254/supported_operations.cu
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
72
icicle/curves/bn254/ve_mod_mult.cu
Normal file
72
icicle/curves/bn254/ve_mod_mult.cu
Normal file
@@ -0,0 +1,72 @@
|
||||
#ifndef _BN254_VEC_MULT
|
||||
#define _BN254_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
|
||||
BN254::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_bn254(BN254::scalar_t *inout,
|
||||
BN254::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_bn254(BN254::scalar_t *matrix_flattened,
|
||||
BN254::scalar_t *input,
|
||||
BN254::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,14 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../primitives/field.cuh"
|
||||
#include "../primitives/projective.cuh"
|
||||
|
||||
#include "bls12_381.cuh"
|
||||
// #include "bn254.cuh"
|
||||
|
||||
|
||||
typedef Field<fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<fq_config> point_field_t;
|
||||
typedef Projective<point_field_t, scalar_field_t, group_generator, weierstrass_b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
@@ -1,86 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../utils/storage.cuh"
|
||||
|
||||
|
||||
// y^2 = weierstrass_a * x^3 + weierstrass_b
|
||||
static constexpr unsigned weierstrass_b = 4;
|
||||
|
||||
// a generator of the elliptic curve group
|
||||
struct group_generator {
|
||||
static constexpr storage<fq_config::limbs_count> generator_x = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> generator_y = {0x4abe706c, 0x5ea93e35, 0x00e1de5d, 0x6346b8ed, 0x92848344, 0xda9dd85e,
|
||||
0xc9926b26, 0xc760f988, 0xf3763e9b, 0xb33cffc3, 0xd40d6212, 0x0a989bad};
|
||||
};
|
||||
|
||||
/// SCALAR FIELD
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8; // array size of 32bit int fo form a field element
|
||||
static constexpr unsigned modulus_bits_count = 255; // field bit size
|
||||
// field modulus split into array, ordered in Little-Endian
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513 -> 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// Scalar specific
|
||||
static constexpr storage<limbs_count> omega = {0xa5d36306, 0xe206da11, 0x378fbf96, 0x0ad1347b, 0xe0f8245f, 0xfc3e8acf, 0xa0f704f4, 0x564c0a11};
|
||||
static constexpr storage<limbs_count> omega_inv = {3629396834, 2518295853, 1679307267, 1346818424, 3118225798, 1256349690, 3322524792, 958081110};
|
||||
static constexpr storage<limbs_count> inv_2 = {2147483649,2147483647,2147429887,2849952257,80800770,429714436,2496577188,972477353};
|
||||
static constexpr storage<limbs_count> inv_4 = {1073741825,1073741823,1073661183,4274928386,121201155,644571654,1597382134,1458716030};
|
||||
static constexpr storage<limbs_count> inv_256 = {16777217,16777215,4244528547,1315563102,26752557,3943079472,3597918154,1937357227};
|
||||
static constexpr storage<limbs_count> inv_512 = {8388609,8388607,4269694161,1360250160,94177049,2401254172,2148052617,1941155967};
|
||||
static constexpr storage<limbs_count> inv_4096 = {1048577,1074790399,3217972249,3546834984,2300657127,1589027946,3026903920,1944479864};
|
||||
};
|
||||
|
||||
|
||||
/// BASE FIELD
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
static constexpr unsigned limbs_count = 12; // array size of 32bit int fo form a field element
|
||||
static constexpr unsigned modulus_bits_count = 381; // field bit size
|
||||
// field modulus split into array, ordered in Little-Endian
|
||||
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787 -> 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2 * limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
// m = floor(2^(2*modulus_bits_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
};
|
||||
14
icicle/curves/curve_template/curve_config.cuh
Normal file
14
icicle/curves/curve_template/curve_config.cuh
Normal file
@@ -0,0 +1,14 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BN254 {
|
||||
typedef Field<CURVE_NAME_U::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<CURVE_NAME_U::fq_config> point_field_t;
|
||||
typedef Projective<point_field_t, scalar_field_t, CURVE_NAME_U::group_generator, CURVE_NAME_U::weierstrass_b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
}
|
||||
308
icicle/curves/curve_template/lde.cu
Normal file
308
icicle/curves/curve_template/lde.cu
Normal file
@@ -0,0 +1,308 @@
|
||||
#ifndef _CURVE_NAME_U_LDE
|
||||
#define _CURVE_NAME_U_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
|
||||
extern "C" CURVE_NAME_U::scalar_t* build_domain_cuda_CURVE_NAME_L(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega_inv(logn));
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega(logn));
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return ntt_end2end_batch_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
CURVE_NAME_U::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
CURVE_NAME_U::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
CURVE_NAME_U::scalar_t* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
CURVE_NAME_U::scalar_t* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, unsigned device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order(arr, n, logn);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, int batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
reverse_order_batch(arr, n, logn, batch_size);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
94
icicle/curves/curve_template/msm.cu
Normal file
94
icicle/curves/curve_template/msm.cu
Normal file
@@ -0,0 +1,94 @@
|
||||
#ifndef _CURVE_NAME_U_MSM
|
||||
#define _CURVE_NAME_U_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *out, CURVE_NAME_U::affine_t points[],
|
||||
CURVE_NAME_U::scalar_t scalars[], size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (count>256){
|
||||
large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
|
||||
}
|
||||
else{
|
||||
short_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
|
||||
}
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* out, CURVE_NAME_U::affine_t points[],
|
||||
CURVE_NAME_U::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, batch_size, msm_size, out, false);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
8
icicle/curves/curve_template/projective.cu
Normal file
8
icicle/curves/curve_template/projective.cu
Normal file
@@ -0,0 +1,8 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2, size_t device_id = 0)
|
||||
{
|
||||
return (*point1 == *point2);
|
||||
}
|
||||
4
icicle/curves/curve_template/supported_operations.cu
Normal file
4
icicle/curves/curve_template/supported_operations.cu
Normal file
@@ -0,0 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
66
icicle/curves/curve_template/ve_mod_mult.cu
Normal file
66
icicle/curves/curve_template/ve_mod_mult.cu
Normal file
@@ -0,0 +1,66 @@
|
||||
#ifndef _CURVE_NAME_U_VEC_MULT
|
||||
#define _CURVE_NAME_U_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_CURVE_NAME_L(CURVE_NAME_U::projective_t *inout,
|
||||
CURVE_NAME_U::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<CURVE_NAME_U::projective_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_CURVE_NAME_L(CURVE_NAME_U::scalar_t *inout,
|
||||
CURVE_NAME_U::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
vector_mod_mult<CURVE_NAME_U::scalar_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_CURVE_NAME_L(CURVE_NAME_U::scalar_t *matrix_flattened,
|
||||
CURVE_NAME_U::scalar_t *input,
|
||||
CURVE_NAME_U::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<CURVE_NAME_U::scalar_t>(matrix_flattened, input, output, n_elments);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
3
icicle/curves/index.cu
Normal file
3
icicle/curves/index.cu
Normal file
@@ -0,0 +1,3 @@
|
||||
#include "bls12_381/supported_operations.cu"
|
||||
#include "bls12_377/supported_operations.cu"
|
||||
#include "bn254/supported_operations.cu"
|
||||
149
icicle/primitives/extension_field.cuh
Normal file
149
icicle/primitives/extension_field.cuh
Normal file
@@ -0,0 +1,149 @@
|
||||
#pragma once
|
||||
|
||||
#include "field.cuh"
|
||||
|
||||
#define HOST_INLINE __host__ __forceinline__
|
||||
#define DEVICE_INLINE __device__ __forceinline__
|
||||
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
|
||||
|
||||
template <typename CONFIG> class ExtensionField {
|
||||
private:
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
FWide imaginary;
|
||||
|
||||
ExtensionField HOST_DEVICE_INLINE get_lower() {
|
||||
return ExtensionField { real.get_lower(), imaginary.get_lower() };
|
||||
}
|
||||
|
||||
ExtensionField HOST_DEVICE_INLINE get_higher_with_slack() {
|
||||
return ExtensionField { real.get_higher_with_slack(), imaginary.get_higher_with_slack() };
|
||||
}
|
||||
};
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys) {
|
||||
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
|
||||
}
|
||||
|
||||
// an incomplete impl that assumes that xs > ys
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys) {
|
||||
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
|
||||
}
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
FF imaginary;
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField zero() {
|
||||
return ExtensionField { FF::zero(), FF::zero() };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField one() {
|
||||
return ExtensionField { FF::one(), FF::zero() };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_x() {
|
||||
return ExtensionField { FF { CONFIG::generator_x_re }, FF { CONFIG::generator_x_im } };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_y() {
|
||||
return ExtensionField { FF { CONFIG::generator_y_re }, FF { CONFIG::generator_y_im } };
|
||||
}
|
||||
|
||||
|
||||
static HOST_INLINE ExtensionField rand_host() {
|
||||
return ExtensionField { FF::rand_host(), FF::rand_host() };
|
||||
}
|
||||
|
||||
template <unsigned REDUCTION_SIZE = 1> static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionField &xs) {
|
||||
return ExtensionField { FF::reduce<REDUCTION_SIZE>(&xs.real), FF::reduce<REDUCTION_SIZE>(&xs.imaginary) };
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs) {
|
||||
os << "{ Real: " << xs.real << " }; { Imaginary: " << xs.imaginary << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys) {
|
||||
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys) {
|
||||
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
FWide real_prod = FF::mul_wide(xs.real * ys.real);
|
||||
FWide imaginary_prod = FF::mul_wide(xs.imaginary * ys.imaginary);
|
||||
FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
|
||||
FWide i_sq_times_im = FF::mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator*(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
FF real_prod = xs.real * ys.real;
|
||||
FF imaginary_prod = xs.imaginary * ys.imaginary;
|
||||
FF prod_of_sums = (xs.real + xs.imaginary) * (ys.real + ys.imaginary);
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
return (xs.real == ys.real) && (xs.imaginary == ys.imaginary);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
return !(xs == ys);
|
||||
}
|
||||
|
||||
template <const ExtensionField& mutliplier>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_const(const ExtensionField &xs) {
|
||||
constexpr uint32_t mul_real = mutliplier.real.limbs_storage.limbs[0];
|
||||
constexpr uint32_t mul_imaginary = mutliplier.imaginary.limbs_storage.limbs[0];
|
||||
FF real_prod = FF::template mul_unsigned<mul_real>(xs.real);
|
||||
FF imaginary_prod = FF::template mul_unsigned<mul_imaginary>(xs.imaginary);
|
||||
FF re_im = FF::template mul_unsigned<mul_real>(xs.imaginary);
|
||||
FF im_re = FF::template mul_unsigned<mul_imaginary>(xs.real);
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField { real_prod + i_sq_times_im, re_im + im_re };
|
||||
}
|
||||
|
||||
template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField &xs) {
|
||||
return { FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary) };
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return xs * xs;
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs) {
|
||||
return ExtensionField { FF::neg(xs.real), FF::neg(xs.imaginary) };
|
||||
}
|
||||
|
||||
// inverse assumes that xs is nonzero
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs) {
|
||||
ExtensionField xs_conjugate = { xs.real, FF::neg(xs.imaginary) };
|
||||
// TODO: wide here
|
||||
FF xs_norm_squared = FF::sqr(xs.real) + FF::sqr(xs.imaginary);
|
||||
return xs_conjugate * ExtensionField { FF::inverse(xs_norm_squared), FF::zero() };
|
||||
}
|
||||
};
|
||||
@@ -23,6 +23,14 @@ template <class CONFIG> class Field {
|
||||
return Field { CONFIG::one };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field generator_x() {
|
||||
return Field { CONFIG::generator_x };
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE Field generator_y() {
|
||||
return Field { CONFIG::generator_y };
|
||||
}
|
||||
|
||||
static constexpr HOST_INLINE Field omega(uint32_t log_size) {
|
||||
// Quick fix to linking issue, permanent fix will follow
|
||||
switch (log_size) {
|
||||
@@ -93,6 +101,7 @@ template <class CONFIG> class Field {
|
||||
case 32:
|
||||
return Field { CONFIG::omega32 };
|
||||
}
|
||||
return Field { CONFIG::one };
|
||||
// return Field { CONFIG::omega[log_size-1] };
|
||||
}
|
||||
|
||||
@@ -166,6 +175,7 @@ template <class CONFIG> class Field {
|
||||
case 32:
|
||||
return Field { CONFIG::omega_inv32 };
|
||||
}
|
||||
return Field { CONFIG::one };
|
||||
// return Field { CONFIG::omega_inv[log_size-1] };
|
||||
}
|
||||
|
||||
@@ -237,6 +247,7 @@ template <class CONFIG> class Field {
|
||||
case 32:
|
||||
return Field { CONFIG::inv32 };
|
||||
}
|
||||
return Field { CONFIG::one };
|
||||
// return Field { CONFIG::inv[log_size-1] };
|
||||
}
|
||||
|
||||
@@ -244,14 +255,13 @@ template <class CONFIG> class Field {
|
||||
return Field { CONFIG::modulus };
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
// private:
|
||||
typedef storage<TLC> ff_storage;
|
||||
typedef storage<2*TLC> ff_wide_storage;
|
||||
|
||||
static constexpr unsigned slack_bits = 32 * TLC - NBITS;
|
||||
|
||||
struct wide {
|
||||
struct Wide {
|
||||
ff_wide_storage limbs_storage;
|
||||
|
||||
Field HOST_DEVICE_INLINE get_lower() {
|
||||
@@ -280,15 +290,15 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
};
|
||||
|
||||
friend HOST_DEVICE_INLINE wide operator+(wide xs, const wide& ys) {
|
||||
wide rs = {};
|
||||
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys) {
|
||||
Wide rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
// an incomplete impl that assumes that xs > ys
|
||||
friend HOST_DEVICE_INLINE wide operator-(wide xs, const wide& ys) {
|
||||
wide rs = {};
|
||||
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys) {
|
||||
Wide rs = {};
|
||||
sub_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
@@ -337,7 +347,9 @@ template <class CONFIG> class Field {
|
||||
const uint32_t *y = ys.limbs;
|
||||
uint32_t *r = rs.limbs;
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (unsigned i = 1; i < (CARRY_OUT ? TLC : TLC - 1); i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
@@ -353,7 +365,9 @@ template <class CONFIG> class Field {
|
||||
const uint32_t *y = ys.limbs;
|
||||
uint32_t *r = rs.limbs;
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (unsigned i = 1; i < (CARRY_OUT ? 2 * TLC : 2 * TLC - 1); i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
@@ -414,7 +428,7 @@ template <class CONFIG> class Field {
|
||||
static DEVICE_INLINE void cmad_n(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC) {
|
||||
acc[0] = ptx::mad_lo_cc(a[0], bi, acc[0]);
|
||||
acc[1] = ptx::madc_hi_cc(a[0], bi, acc[1]);
|
||||
#pragma unroll
|
||||
#pragma unroll
|
||||
for (size_t i = 2; i < n; i += 2) {
|
||||
acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
|
||||
acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
|
||||
@@ -481,8 +495,6 @@ template <class CONFIG> class Field {
|
||||
const uint32_t limb_lsb_idx = (digit_num*digit_width) / 32;
|
||||
const uint32_t shift_bits = (digit_num*digit_width) % 32;
|
||||
unsigned rv = limbs_storage.limbs[limb_lsb_idx] >> shift_bits;
|
||||
// printf("get_scalar_func digit %u rv %u\n",digit_num,rv);
|
||||
// if (shift_bits + digit_width > 32) {
|
||||
if ((shift_bits + digit_width > 32) && (limb_lsb_idx+1 < TLC)) {
|
||||
rv += limbs_storage.limbs[limb_lsb_idx + 1] << (32 - shift_bits);
|
||||
}
|
||||
@@ -493,6 +505,7 @@ template <class CONFIG> class Field {
|
||||
static HOST_INLINE Field rand_host() {
|
||||
std::random_device rd;
|
||||
std::mt19937_64 generator(rd());
|
||||
// std::mt19937_64 generator(0);
|
||||
std::uniform_int_distribution<unsigned> distribution;
|
||||
Field value{};
|
||||
for (unsigned i = 0; i < TLC; i++)
|
||||
@@ -518,13 +531,13 @@ template <class CONFIG> class Field {
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
|
||||
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
|
||||
Field rs = {};
|
||||
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return reduce<1>(rs);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
|
||||
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
|
||||
Field rs = {};
|
||||
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
if (carry == 0)
|
||||
@@ -535,22 +548,22 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE wide mul_wide(const Field& xs, const Field& ys) {
|
||||
wide rs = {};
|
||||
static constexpr HOST_DEVICE_INLINE Wide mul_wide(const Field& xs, const Field& ys) {
|
||||
Wide rs = {};
|
||||
multiply_raw(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
|
||||
return rs;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys) {
|
||||
wide xy = mul_wide(xs, ys);
|
||||
Wide xy = mul_wide(xs, ys);
|
||||
Field xy_hi = xy.get_higher_with_slack();
|
||||
wide l = {};
|
||||
Wide l = {};
|
||||
multiply_raw(xy_hi.limbs_storage, get_m(), l.limbs_storage);
|
||||
Field l_hi = l.get_higher_with_slack();
|
||||
wide lp = {};
|
||||
Wide lp = {};
|
||||
multiply_raw(l_hi.limbs_storage, get_modulus(), lp.limbs_storage);
|
||||
wide r_wide = xy - lp;
|
||||
wide r_wide_reduced = {};
|
||||
Wide r_wide = xy - lp;
|
||||
Wide r_wide_reduced = {};
|
||||
uint32_t reduced = sub_limbs<true>(r_wide.limbs_storage, modulus_wide(), r_wide_reduced.limbs_storage);
|
||||
r_wide = reduced ? r_wide : r_wide_reduced;
|
||||
Field r = r_wide.get_lower();
|
||||
@@ -578,22 +591,24 @@ template <class CONFIG> class Field {
|
||||
return !(xs == ys);
|
||||
}
|
||||
|
||||
template <unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Field mul(const unsigned scalar, const Field &xs) {
|
||||
Field rs = {};
|
||||
Field temp = xs;
|
||||
unsigned l = scalar;
|
||||
template <const Field& multiplier, class T> static constexpr HOST_DEVICE_INLINE T mul_const(const T &xs) {
|
||||
return mul_unsigned<multiplier.limbs_storage.limbs[0], T>(xs);
|
||||
}
|
||||
|
||||
template <uint32_t mutliplier, class T, unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T &xs) {
|
||||
T rs = {};
|
||||
T temp = xs;
|
||||
bool is_zero = true;
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (unsigned i = 0; i < 32; i++) {
|
||||
if (l & 1) {
|
||||
if (mutliplier & (1 << i)) {
|
||||
rs = is_zero ? temp : (rs + temp);
|
||||
is_zero = false;
|
||||
}
|
||||
l >>= 1;
|
||||
if (l == 0)
|
||||
if (mutliplier & ((1 << (31 - i) - 1) << (i + 1)))
|
||||
break;
|
||||
temp = temp + temp;
|
||||
}
|
||||
@@ -601,7 +616,7 @@ template <class CONFIG> class Field {
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE wide sqr_wide(const Field& xs) {
|
||||
static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const Field& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
|
||||
}
|
||||
|
||||
@@ -1,8 +1,49 @@
|
||||
#include <cuda.h>
|
||||
#include "../curves/curve_config.cuh"
|
||||
#include "../curves/bls12_381/curve_config.cuh"
|
||||
#include "../curves/bls12_377/curve_config.cuh"
|
||||
#include "../curves/bn254/curve_config.cuh"
|
||||
#include "projective.cuh"
|
||||
|
||||
extern "C" bool eq(projective_t *point1, projective_t *point2, size_t device_id = 0)
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2);
|
||||
}
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
#include "affine.cuh"
|
||||
|
||||
template <class FF, class SCALAR_FF, class GEN, unsigned B_VALUE>
|
||||
template <typename FF, class SCALAR_FF, const FF& B_VALUE>
|
||||
class Projective {
|
||||
friend Affine<FF>;
|
||||
|
||||
@@ -11,10 +11,6 @@ class Projective {
|
||||
FF y;
|
||||
FF z;
|
||||
|
||||
static HOST_DEVICE_INLINE Projective generator() {
|
||||
return { FF { GEN::generator_x }, FF { GEN::generator_y }, FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective zero() {
|
||||
return {FF::zero(), FF::one(), FF::zero()};
|
||||
}
|
||||
@@ -28,50 +24,56 @@ class Projective {
|
||||
return {point.x, point.y, FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective generator() {
|
||||
return {FF::generator_x(), FF::generator_y(), FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective neg(const Projective &point) {
|
||||
return {point.x, FF::neg(point.y), point.z};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2) {
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF Z2 = p2.z; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::mul(3 * B_VALUE, t02); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::mul(3 * B_VALUE, t17); // t23 ← b3 · t17 < 2
|
||||
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
|
||||
const FF t25 = t07 * t22; // t25 ← t07 · t22 < 2
|
||||
const FF X3 = t25 - t24; // X3 ← t25 − t24 < 2
|
||||
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
|
||||
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
|
||||
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
|
||||
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF Z2 = p2.z; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17 < 2
|
||||
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
|
||||
const FF t25 = t07 * t22; // t25 ← t07 · t22 < 2
|
||||
const FF X3 = t25 - t24; // X3 ← t25 − t24 < 2
|
||||
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
|
||||
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
|
||||
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
|
||||
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
|
||||
return {X3, Y3, Z3};
|
||||
}
|
||||
|
||||
@@ -85,7 +87,7 @@ class Projective {
|
||||
}
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point) {
|
||||
os << "x: " << point.x << "; y: " << point.y << "; z: " << point.z;
|
||||
os << "Point { x: " << point.x << "; y: " << point.y << "; z: " << point.z << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
@@ -95,7 +97,7 @@ class Projective {
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point) {
|
||||
Projective res = zero();
|
||||
#ifdef CUDA_ARCH
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
|
||||
@@ -120,7 +122,7 @@ class Projective {
|
||||
static HOST_DEVICE_INLINE bool is_on_curve(const Projective &point) {
|
||||
if (is_zero(point))
|
||||
return true;
|
||||
bool eq_holds = (FF::mul(B_VALUE, FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
|
||||
bool eq_holds = (FF::template mul_const<B_VALUE>(FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
|
||||
return point.z != FF::zero() && eq_holds;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,14 +26,20 @@ protected:
|
||||
|
||||
proj *points1{};
|
||||
proj *points2{};
|
||||
g2_proj *g2_points1{};
|
||||
g2_proj *g2_points2{};
|
||||
scalar_field *scalars1{};
|
||||
scalar_field *scalars2{};
|
||||
proj *zero_points{};
|
||||
g2_proj *g2_zero_points{};
|
||||
scalar_field *zero_scalars{};
|
||||
scalar_field *one_scalars{};
|
||||
affine *aff_points{};
|
||||
g2_affine *g2_aff_points{};
|
||||
proj *res_points1{};
|
||||
proj *res_points2{};
|
||||
g2_proj *g2_res_points1{};
|
||||
g2_proj *g2_res_points2{};
|
||||
scalar_field *res_scalars1{};
|
||||
scalar_field *res_scalars2{};
|
||||
|
||||
@@ -41,14 +47,20 @@ protected:
|
||||
assert(!cudaDeviceReset());
|
||||
assert(!cudaMallocManaged(&points1, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&points2, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&g2_points1, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&g2_points2, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&scalars1, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&scalars2, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&zero_points, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&g2_zero_points, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&zero_scalars, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&one_scalars, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&aff_points, n * sizeof(affine)));
|
||||
assert(!cudaMallocManaged(&g2_aff_points, n * sizeof(g2_affine)));
|
||||
assert(!cudaMallocManaged(&res_points1, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&res_points2, n * sizeof(proj)));
|
||||
assert(!cudaMallocManaged(&g2_res_points1, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&g2_res_points2, n * sizeof(g2_proj)));
|
||||
assert(!cudaMallocManaged(&res_scalars1, n * sizeof(scalar_field)));
|
||||
assert(!cudaMallocManaged(&res_scalars2, n * sizeof(scalar_field)));
|
||||
}
|
||||
@@ -56,14 +68,20 @@ protected:
|
||||
~PrimitivesTest() override {
|
||||
cudaFree(points1);
|
||||
cudaFree(points2);
|
||||
cudaFree(g2_points1);
|
||||
cudaFree(g2_points2);
|
||||
cudaFree(scalars1);
|
||||
cudaFree(scalars2);
|
||||
cudaFree(zero_points);
|
||||
cudaFree(g2_zero_points);
|
||||
cudaFree(zero_scalars);
|
||||
cudaFree(one_scalars);
|
||||
cudaFree(aff_points);
|
||||
cudaFree(g2_aff_points);
|
||||
cudaFree(res_points1);
|
||||
cudaFree(res_points2);
|
||||
cudaFree(g2_res_points1);
|
||||
cudaFree(g2_res_points2);
|
||||
cudaFree(res_scalars1);
|
||||
cudaFree(res_scalars2);
|
||||
cudaDeviceReset();
|
||||
@@ -72,14 +90,20 @@ protected:
|
||||
void SetUp() override {
|
||||
ASSERT_EQ(device_populate_random<proj>(points1, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<proj>(points2, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<g2_proj>(g2_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<g2_proj>(g2_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<scalar_field>(scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<scalar_field>(scalars2, n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<proj>(zero_points, proj::zero(), n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<g2_proj>(g2_zero_points, g2_proj::zero(), n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<scalar_field>(zero_scalars, scalar_field::zero(), n), cudaSuccess);
|
||||
ASSERT_EQ(device_set<scalar_field>(one_scalars, scalar_field::one(), n), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(aff_points, 0, n * sizeof(affine)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(g2_aff_points, 0, n * sizeof(g2_affine)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_points1, 0, n * sizeof(proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_points2, 0, n * sizeof(proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(g2_res_points1, 0, n * sizeof(g2_proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(g2_res_points2, 0, n * sizeof(g2_proj)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_scalars1, 0, n * sizeof(scalar_field)), cudaSuccess);
|
||||
ASSERT_EQ(cudaMemset(res_scalars2, 0, n * sizeof(scalar_field)), cudaSuccess);
|
||||
}
|
||||
@@ -255,6 +279,104 @@ TEST_F(PrimitivesTest, ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
ASSERT_EQ(res_points1[i], points1[i] + res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECRandomPointsAreOnCurve) {
|
||||
for (unsigned i = 0; i < 2; i++)
|
||||
ASSERT_PRED1(g2_proj::is_on_curve, g2_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionSubtractionCancel) {
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(g2_res_points1, g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointZeroAddition) {
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_zero_points, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionHostDeviceEq) {
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i] + g2_points2[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationHostDeviceEq) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i] * g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByOne) {
|
||||
ASSERT_EQ(vec_mul(one_scalars, points1, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByMinusOne) {
|
||||
ASSERT_EQ(vec_neg(one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(g2_points1, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByTwo) {
|
||||
ASSERT_EQ(vec_add(one_scalars, one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ((one_scalars[i] + one_scalars[i]) * g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationInverseCancel) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(field_vec_inv(scalars1, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_res_points1, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverMultiplication) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, g2_res_points1, g2_res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverAddition) {
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, g2_points1, g2_res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(res_scalars1[i] * g2_points1[i], g2_res_points1[i] + g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECProjectiveToAffine) {
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points1, g2_aff_points, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_proj::from_affine(g2_aff_points[i]));
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECMixedPointAddition) {
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_points1[i] + g2_res_points2[i]);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
@@ -3,12 +3,17 @@
|
||||
// TODO: change the curve depending on env variable
|
||||
#include "../curves/bls12_381.cuh"
|
||||
#include "projective.cuh"
|
||||
#include "field.cuh"
|
||||
#include "extension_field.cuh"
|
||||
|
||||
typedef Field<fp_config> scalar_field;
|
||||
typedef Field<fq_config> base_field;
|
||||
typedef Affine<base_field> affine;
|
||||
typedef Projective<base_field, scalar_field, group_generator, weierstrass_b> proj;
|
||||
static constexpr base_field b = base_field{ weierstrass_b };
|
||||
typedef Projective<base_field, scalar_field, b> proj;
|
||||
typedef ExtensionField<fq_config> base_extension_field;
|
||||
typedef Affine<base_extension_field> g2_affine;
|
||||
static constexpr base_extension_field b2 = base_extension_field{ base_field {b_re}, base_field {b_im}};
|
||||
typedef Projective<base_extension_field, scalar_field, b2> g2_proj;
|
||||
|
||||
|
||||
template <class T1, class T2>
|
||||
@@ -93,15 +98,16 @@ int field_vec_sqr(const scalar_field *x, scalar_field *result, const unsigned co
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void to_affine_points_kernel(const proj *x, affine *result, const unsigned count) {
|
||||
template <class P, class A>
|
||||
__global__ void to_affine_points_kernel(const P *x, A *result, const unsigned count) {
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
result[gid] = proj::to_affine(x[gid]);
|
||||
result[gid] = P::to_affine(x[gid]);
|
||||
}
|
||||
|
||||
int point_vec_to_affine(const proj *x, affine *result, const unsigned count) {
|
||||
to_affine_points_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
template <class P, class A> int point_vec_to_affine(const P *x, A *result, const unsigned count) {
|
||||
to_affine_points_kernel<P, A><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
334
src/curve_templates/curve_different_limbs.rs
Normal file
334
src/curve_templates/curve_different_limbs.rs
Normal file
@@ -0,0 +1,334 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger_limbs_q, BigInteger_limbs_p, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field_CURVE_NAME_U<const NUM_LIMBS: usize> {
|
||||
pub s: [u32; NUM_LIMBS],
|
||||
}
|
||||
|
||||
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_CURVE_NAME_U<NUM_LIMBS> {}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Default for Field_CURVE_NAME_U<NUM_LIMBS> {
|
||||
fn default() -> Self {
|
||||
Field_CURVE_NAME_U::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_CURVE_NAME_U<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_CURVE_NAME_U {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
Field_CURVE_NAME_U { s }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_LIMBS_CURVE_NAME_U: usize = limbs_q;
|
||||
pub const SCALAR_LIMBS_CURVE_NAME_U: usize = limbs_p;
|
||||
|
||||
pub type BaseField_CURVE_NAME_U = Field_CURVE_NAME_U<BASE_LIMBS_CURVE_NAME_U>;
|
||||
pub type ScalarField_CURVE_NAME_U = Field_CURVE_NAME_U<SCALAR_LIMBS_CURVE_NAME_U>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
impl BaseField_CURVE_NAME_U {
|
||||
pub fn limbs(&self) -> [u32; BASE_LIMBS_CURVE_NAME_U] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger_limbs_q {
|
||||
BigInteger_limbs_q::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger_limbs_q) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
//
|
||||
|
||||
impl ScalarField_CURVE_NAME_U {
|
||||
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_CURVE_NAME_U] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger_limbs_p {
|
||||
BigInteger_limbs_p::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger_limbs_p) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> BigInteger_limbs_p {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: BigInteger_limbs_p) -> ScalarField_CURVE_NAME_U {
|
||||
unsafe { transmute(v) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct Point_CURVE_NAME_U {
|
||||
pub x: BaseField_CURVE_NAME_U,
|
||||
pub y: BaseField_CURVE_NAME_U,
|
||||
pub z: BaseField_CURVE_NAME_U,
|
||||
}
|
||||
|
||||
impl Default for Point_CURVE_NAME_U {
|
||||
fn default() -> Self {
|
||||
Point_CURVE_NAME_U::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_CURVE_NAME_U {
|
||||
pub fn zero() -> Self {
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::zero(),
|
||||
y: BaseField_CURVE_NAME_U::one(),
|
||||
z: BaseField_CURVE_NAME_U::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_CURVE_NAME_U {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_CURVE_NAME_U {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_CURVE_NAME_U::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_CURVE_NAME_U) -> Point_CURVE_NAME_U {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: BaseField_CURVE_NAME_U::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: BaseField_CURVE_NAME_U::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn eq_CURVE_NAME_L(point1: *const Point_CURVE_NAME_U, point2: *const Point_CURVE_NAME_U) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point_CURVE_NAME_U {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq_CURVE_NAME_L(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinity_CURVE_NAME_U {
|
||||
pub x: BaseField_CURVE_NAME_U,
|
||||
pub y: BaseField_CURVE_NAME_U,
|
||||
}
|
||||
|
||||
impl Default for PointAffineNoInfinity_CURVE_NAME_U {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::zero(),
|
||||
y: BaseField_CURVE_NAME_U::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity_CURVE_NAME_U {
|
||||
// TODO: generics
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_CURVE_NAME_U {
|
||||
Point_CURVE_NAME_U {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BaseField_CURVE_NAME_U::one(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_CURVE_NAME_U {
|
||||
G1Affine_CURVE_NAME_U::new(Fq_CURVE_NAME_U::new(self.x.to_ark()), Fq_CURVE_NAME_U::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_CURVE_NAME_U {
|
||||
G1Affine_CURVE_NAME_U::new(
|
||||
Fq_CURVE_NAME_U::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_CURVE_NAME_U::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_CURVE_NAME_U) -> Self {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::from_ark(p.x.into_repr()),
|
||||
y: BaseField_CURVE_NAME_U::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_CURVE_NAME_U {
|
||||
// TODO: generics
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point_CURVE_NAME_U {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS_CURVE_NAME_U, "length must be 3 * {}", BASE_LIMBS_CURVE_NAME_U);
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U {
|
||||
s: value[..BASE_LIMBS_CURVE_NAME_U].try_into().unwrap(),
|
||||
},
|
||||
y: BaseField_CURVE_NAME_U {
|
||||
s: value[BASE_LIMBS_CURVE_NAME_U..BASE_LIMBS_CURVE_NAME_U * 2].try_into().unwrap(),
|
||||
},
|
||||
z: BaseField_CURVE_NAME_U {
|
||||
s: value[BASE_LIMBS_CURVE_NAME_U * 2..].try_into().unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_CURVE_NAME_U::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_CURVE_NAME_U {
|
||||
pub fn from_limbs(value: &[u32]) -> ScalarField_CURVE_NAME_U {
|
||||
ScalarField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_CURVE_NAME_L::{Fr as Fr_CURVE_NAME_U};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::CURVE_NAME_L::{Point_CURVE_NAME_U, ScalarField_CURVE_NAME_U}};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
|
||||
let scalar = ScalarField_CURVE_NAME_U::from_limbs(&limbs);
|
||||
assert_eq!(
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute(),
|
||||
"{:08X?} {:08X?}",
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_point_equality() {
|
||||
let left = Point_CURVE_NAME_U::zero();
|
||||
let right = Point_CURVE_NAME_U::zero();
|
||||
assert_eq!(left, right);
|
||||
let right = Point_CURVE_NAME_U::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point_CURVE_NAME_U::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 12],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
312
src/curve_templates/curve_same_limbs.rs
Normal file
312
src/curve_templates/curve_same_limbs.rs
Normal file
@@ -0,0 +1,312 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger_limbs_p, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field_CURVE_NAME_U<const NUM_LIMBS: usize> {
|
||||
pub s: [u32; NUM_LIMBS],
|
||||
}
|
||||
|
||||
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_CURVE_NAME_U<NUM_LIMBS> {}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Default for Field_CURVE_NAME_U<NUM_LIMBS> {
|
||||
fn default() -> Self {
|
||||
Field_CURVE_NAME_U::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_CURVE_NAME_U<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_CURVE_NAME_U {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
Field_CURVE_NAME_U { s }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_LIMBS_CURVE_NAME_U: usize = limbs_p;
|
||||
pub const SCALAR_LIMBS_CURVE_NAME_U: usize = limbs_p;
|
||||
|
||||
pub type BaseField_CURVE_NAME_U = Field_CURVE_NAME_U<BASE_LIMBS_CURVE_NAME_U>;
|
||||
pub type ScalarField_CURVE_NAME_U = Field_CURVE_NAME_U<SCALAR_LIMBS_CURVE_NAME_U>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_CURVE_NAME_U {
|
||||
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_CURVE_NAME_U] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger_limbs_p {
|
||||
BigInteger_limbs_p::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger_limbs_p) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> BigInteger_limbs_p {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: BigInteger_limbs_p) -> ScalarField_CURVE_NAME_U {
|
||||
unsafe { transmute(v) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct Point_CURVE_NAME_U {
|
||||
pub x: BaseField_CURVE_NAME_U,
|
||||
pub y: BaseField_CURVE_NAME_U,
|
||||
pub z: BaseField_CURVE_NAME_U,
|
||||
}
|
||||
|
||||
impl Default for Point_CURVE_NAME_U {
|
||||
fn default() -> Self {
|
||||
Point_CURVE_NAME_U::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_CURVE_NAME_U {
|
||||
pub fn zero() -> Self {
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::zero(),
|
||||
y: BaseField_CURVE_NAME_U::one(),
|
||||
z: BaseField_CURVE_NAME_U::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_CURVE_NAME_U {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_CURVE_NAME_U {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_CURVE_NAME_U::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_CURVE_NAME_U) -> Point_CURVE_NAME_U {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: BaseField_CURVE_NAME_U::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: BaseField_CURVE_NAME_U::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn eq_CURVE_NAME_L(point1: *const Point_CURVE_NAME_U, point2: *const Point_CURVE_NAME_U) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point_CURVE_NAME_U {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq_CURVE_NAME_L(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinity_CURVE_NAME_U {
|
||||
pub x: BaseField_CURVE_NAME_U,
|
||||
pub y: BaseField_CURVE_NAME_U,
|
||||
}
|
||||
|
||||
impl Default for PointAffineNoInfinity_CURVE_NAME_U {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::zero(),
|
||||
y: BaseField_CURVE_NAME_U::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity_CURVE_NAME_U {
|
||||
// TODO: generics
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_CURVE_NAME_U {
|
||||
Point_CURVE_NAME_U {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BaseField_CURVE_NAME_U::one(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_CURVE_NAME_U {
|
||||
G1Affine_CURVE_NAME_U::new(Fq_CURVE_NAME_U::new(self.x.to_ark()), Fq_CURVE_NAME_U::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_CURVE_NAME_U {
|
||||
G1Affine_CURVE_NAME_U::new(
|
||||
Fq_CURVE_NAME_U::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_CURVE_NAME_U::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_CURVE_NAME_U) -> Self {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::from_ark(p.x.into_repr()),
|
||||
y: BaseField_CURVE_NAME_U::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_CURVE_NAME_U {
|
||||
// TODO: generics
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point_CURVE_NAME_U {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS_CURVE_NAME_U, "length must be 3 * {}", BASE_LIMBS_CURVE_NAME_U);
|
||||
Point_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U {
|
||||
s: value[..BASE_LIMBS_CURVE_NAME_U].try_into().unwrap(),
|
||||
},
|
||||
y: BaseField_CURVE_NAME_U {
|
||||
s: value[BASE_LIMBS_CURVE_NAME_U..BASE_LIMBS_CURVE_NAME_U * 2].try_into().unwrap(),
|
||||
},
|
||||
z: BaseField_CURVE_NAME_U {
|
||||
s: value[BASE_LIMBS_CURVE_NAME_U * 2..].try_into().unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: BaseField_CURVE_NAME_U::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_CURVE_NAME_U::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
|
||||
PointAffineNoInfinity_CURVE_NAME_U {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_CURVE_NAME_U {
|
||||
pub fn from_limbs(value: &[u32]) -> ScalarField_CURVE_NAME_U {
|
||||
ScalarField_CURVE_NAME_U {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_CURVE_NAME_L::{Fr as Fr_CURVE_NAME_U};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::CURVE_NAME_L::{Point_CURVE_NAME_U, ScalarField_CURVE_NAME_U}};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
|
||||
let scalar = ScalarField_CURVE_NAME_U::from_limbs(&limbs);
|
||||
assert_eq!(
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute(),
|
||||
"{:08X?} {:08X?}",
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_point_equality() {
|
||||
let left = Point_CURVE_NAME_U::zero();
|
||||
let right = Point_CURVE_NAME_U::zero();
|
||||
assert_eq!(left, right);
|
||||
let right = Point_CURVE_NAME_U::from_limbs(&[0; 8], &[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point_CURVE_NAME_U::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 8],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
1486
src/curve_templates/test.rs
Normal file
1486
src/curve_templates/test.rs
Normal file
File diff suppressed because it is too large
Load Diff
332
src/curves/bls12_377.rs
Normal file
332
src/curves/bls12_377.rs
Normal file
@@ -0,0 +1,332 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field_BLS12_377<const NUM_LIMBS: usize> {
|
||||
pub s: [u32; NUM_LIMBS],
|
||||
}
|
||||
|
||||
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BLS12_377<NUM_LIMBS> {}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Default for Field_BLS12_377<NUM_LIMBS> {
|
||||
fn default() -> Self {
|
||||
Field_BLS12_377::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_BLS12_377<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_BLS12_377 {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
Field_BLS12_377 { s }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_LIMBS_BLS12_377: usize = 12;
|
||||
pub const SCALAR_LIMBS_BLS12_377: usize = 8;
|
||||
|
||||
pub type BaseField_BLS12_377 = Field_BLS12_377<BASE_LIMBS_BLS12_377>;
|
||||
pub type ScalarField_BLS12_377 = Field_BLS12_377<SCALAR_LIMBS_BLS12_377>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
impl BaseField_BLS12_377 {
|
||||
pub fn limbs(&self) -> [u32; BASE_LIMBS_BLS12_377] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_BLS12_377 {
|
||||
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BLS12_377] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BLS12_377 {
|
||||
unsafe { transmute(v) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct Point_BLS12_377 {
|
||||
pub x: BaseField_BLS12_377,
|
||||
pub y: BaseField_BLS12_377,
|
||||
pub z: BaseField_BLS12_377,
|
||||
}
|
||||
|
||||
impl Default for Point_BLS12_377 {
|
||||
fn default() -> Self {
|
||||
Point_BLS12_377::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_BLS12_377 {
|
||||
pub fn zero() -> Self {
|
||||
Point_BLS12_377 {
|
||||
x: BaseField_BLS12_377::zero(),
|
||||
y: BaseField_BLS12_377::one(),
|
||||
z: BaseField_BLS12_377::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_377 {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_377 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_377::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_377) -> Point_BLS12_377 {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_BLS12_377 {
|
||||
x: BaseField_BLS12_377::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: BaseField_BLS12_377::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: BaseField_BLS12_377::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn eq_bls12_377(point1: *const Point_BLS12_377, point2: *const Point_BLS12_377) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point_BLS12_377 {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq_bls12_377(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinity_BLS12_377 {
|
||||
pub x: BaseField_BLS12_377,
|
||||
pub y: BaseField_BLS12_377,
|
||||
}
|
||||
|
||||
impl Default for PointAffineNoInfinity_BLS12_377 {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: BaseField_BLS12_377::zero(),
|
||||
y: BaseField_BLS12_377::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity_BLS12_377 {
|
||||
// TODO: generics
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_BLS12_377 {
|
||||
Point_BLS12_377 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BaseField_BLS12_377::one(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(Fq_BLS12_377::new(self.x.to_ark()), Fq_BLS12_377::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(
|
||||
Fq_BLS12_377::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_377::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BLS12_377) -> Self {
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: BaseField_BLS12_377::from_ark(p.x.into_repr()),
|
||||
y: BaseField_BLS12_377::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_BLS12_377 {
|
||||
// TODO: generics
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_BLS12_377 {
|
||||
x: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_377 {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS_BLS12_377, "length must be 3 * {}", BASE_LIMBS_BLS12_377);
|
||||
Point_BLS12_377 {
|
||||
x: BaseField_BLS12_377 {
|
||||
s: value[..BASE_LIMBS_BLS12_377].try_into().unwrap(),
|
||||
},
|
||||
y: BaseField_BLS12_377 {
|
||||
s: value[BASE_LIMBS_BLS12_377..BASE_LIMBS_BLS12_377 * 2].try_into().unwrap(),
|
||||
},
|
||||
z: BaseField_BLS12_377 {
|
||||
s: value[BASE_LIMBS_BLS12_377 * 2..].try_into().unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_377 {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: BaseField_BLS12_377::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_BLS12_377::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_377 {
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_BLS12_377 {
|
||||
pub fn from_limbs(value: &[u32]) -> ScalarField_BLS12_377 {
|
||||
ScalarField_BLS12_377 {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bls12_377::{Fr as Fr_BLS12_377};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_377::{Point_BLS12_377, ScalarField_BLS12_377}};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
|
||||
let scalar = ScalarField_BLS12_377::from_limbs(&limbs);
|
||||
assert_eq!(
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute(),
|
||||
"{:08X?} {:08X?}",
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_point_equality() {
|
||||
let left = Point_BLS12_377::zero();
|
||||
let right = Point_BLS12_377::zero();
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BLS12_377::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BLS12_377::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 12],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
332
src/curves/bls12_381.rs
Normal file
332
src/curves/bls12_381.rs
Normal file
@@ -0,0 +1,332 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field_BLS12_381<const NUM_LIMBS: usize> {
|
||||
pub s: [u32; NUM_LIMBS],
|
||||
}
|
||||
|
||||
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BLS12_381<NUM_LIMBS> {}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Default for Field_BLS12_381<NUM_LIMBS> {
|
||||
fn default() -> Self {
|
||||
Field_BLS12_381::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_BLS12_381<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_BLS12_381 {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
Field_BLS12_381 { s }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_LIMBS_BLS12_381: usize = 12;
|
||||
pub const SCALAR_LIMBS_BLS12_381: usize = 8;
|
||||
|
||||
pub type BaseField_BLS12_381 = Field_BLS12_381<BASE_LIMBS_BLS12_381>;
|
||||
pub type ScalarField_BLS12_381 = Field_BLS12_381<SCALAR_LIMBS_BLS12_381>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
impl BaseField_BLS12_381 {
|
||||
pub fn limbs(&self) -> [u32; BASE_LIMBS_BLS12_381] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_BLS12_381 {
|
||||
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BLS12_381] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BLS12_381 {
|
||||
unsafe { transmute(v) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct Point_BLS12_381 {
|
||||
pub x: BaseField_BLS12_381,
|
||||
pub y: BaseField_BLS12_381,
|
||||
pub z: BaseField_BLS12_381,
|
||||
}
|
||||
|
||||
impl Default for Point_BLS12_381 {
|
||||
fn default() -> Self {
|
||||
Point_BLS12_381::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_BLS12_381 {
|
||||
pub fn zero() -> Self {
|
||||
Point_BLS12_381 {
|
||||
x: BaseField_BLS12_381::zero(),
|
||||
y: BaseField_BLS12_381::one(),
|
||||
z: BaseField_BLS12_381::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_381 {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_381 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_381::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_381) -> Point_BLS12_381 {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_BLS12_381 {
|
||||
x: BaseField_BLS12_381::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: BaseField_BLS12_381::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: BaseField_BLS12_381::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn eq_bls12_381(point1: *const Point_BLS12_381, point2: *const Point_BLS12_381) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point_BLS12_381 {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq_bls12_381(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinity_BLS12_381 {
|
||||
pub x: BaseField_BLS12_381,
|
||||
pub y: BaseField_BLS12_381,
|
||||
}
|
||||
|
||||
impl Default for PointAffineNoInfinity_BLS12_381 {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: BaseField_BLS12_381::zero(),
|
||||
y: BaseField_BLS12_381::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity_BLS12_381 {
|
||||
// TODO: generics
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_BLS12_381 {
|
||||
Point_BLS12_381 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BaseField_BLS12_381::one(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(Fq_BLS12_381::new(self.x.to_ark()), Fq_BLS12_381::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(
|
||||
Fq_BLS12_381::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_381::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BLS12_381) -> Self {
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: BaseField_BLS12_381::from_ark(p.x.into_repr()),
|
||||
y: BaseField_BLS12_381::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_BLS12_381 {
|
||||
// TODO: generics
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_BLS12_381 {
|
||||
x: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_381 {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS_BLS12_381, "length must be 3 * {}", BASE_LIMBS_BLS12_381);
|
||||
Point_BLS12_381 {
|
||||
x: BaseField_BLS12_381 {
|
||||
s: value[..BASE_LIMBS_BLS12_381].try_into().unwrap(),
|
||||
},
|
||||
y: BaseField_BLS12_381 {
|
||||
s: value[BASE_LIMBS_BLS12_381..BASE_LIMBS_BLS12_381 * 2].try_into().unwrap(),
|
||||
},
|
||||
z: BaseField_BLS12_381 {
|
||||
s: value[BASE_LIMBS_BLS12_381 * 2..].try_into().unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_381 {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: BaseField_BLS12_381::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_BLS12_381::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_381 {
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_BLS12_381 {
|
||||
pub fn from_limbs(value: &[u32]) -> ScalarField_BLS12_381 {
|
||||
ScalarField_BLS12_381 {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bls12_381::{Fr as Fr_BLS12_381};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_381::{Point_BLS12_381, ScalarField_BLS12_381}};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
|
||||
let scalar = ScalarField_BLS12_381::from_limbs(&limbs);
|
||||
assert_eq!(
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute(),
|
||||
"{:08X?} {:08X?}",
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_point_equality() {
|
||||
let left = Point_BLS12_381::zero();
|
||||
let right = Point_BLS12_381::zero();
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BLS12_381::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BLS12_381::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 12],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
312
src/curves/bn254.rs
Normal file
312
src/curves/bn254.rs
Normal file
@@ -0,0 +1,312 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field_BN254<const NUM_LIMBS: usize> {
|
||||
pub s: [u32; NUM_LIMBS],
|
||||
}
|
||||
|
||||
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BN254<NUM_LIMBS> {}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Default for Field_BN254<NUM_LIMBS> {
|
||||
fn default() -> Self {
|
||||
Field_BN254::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_BN254<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_BN254 {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
Field_BN254 { s }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_LIMBS_BN254: usize = 8;
|
||||
pub const SCALAR_LIMBS_BN254: usize = 8;
|
||||
|
||||
pub type BaseField_BN254 = Field_BN254<BASE_LIMBS_BN254>;
|
||||
pub type ScalarField_BN254 = Field_BN254<SCALAR_LIMBS_BN254>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_BN254 {
|
||||
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BN254] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BN254 {
|
||||
unsafe { transmute(v) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct Point_BN254 {
|
||||
pub x: BaseField_BN254,
|
||||
pub y: BaseField_BN254,
|
||||
pub z: BaseField_BN254,
|
||||
}
|
||||
|
||||
impl Default for Point_BN254 {
|
||||
fn default() -> Self {
|
||||
Point_BN254::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_BN254 {
|
||||
pub fn zero() -> Self {
|
||||
Point_BN254 {
|
||||
x: BaseField_BN254::zero(),
|
||||
y: BaseField_BN254::one(),
|
||||
z: BaseField_BN254::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_BN254 {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BN254 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BN254::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BN254::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BN254::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BN254::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BN254) -> Point_BN254 {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_BN254 {
|
||||
x: BaseField_BN254::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: BaseField_BN254::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: BaseField_BN254::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn eq_bn254(point1: *const Point_BN254, point2: *const Point_BN254) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point_BN254 {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq_bn254(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinity_BN254 {
|
||||
pub x: BaseField_BN254,
|
||||
pub y: BaseField_BN254,
|
||||
}
|
||||
|
||||
impl Default for PointAffineNoInfinity_BN254 {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: BaseField_BN254::zero(),
|
||||
y: BaseField_BN254::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity_BN254 {
|
||||
// TODO: generics
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: BaseField_BN254 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BN254 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_BN254 {
|
||||
Point_BN254 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BaseField_BN254::one(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BN254 {
|
||||
G1Affine_BN254::new(Fq_BN254::new(self.x.to_ark()), Fq_BN254::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BN254 {
|
||||
G1Affine_BN254::new(
|
||||
Fq_BN254::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BN254::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine_BN254) -> Self {
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: BaseField_BN254::from_ark(p.x.into_repr()),
|
||||
y: BaseField_BN254::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point_BN254 {
|
||||
// TODO: generics
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_BN254 {
|
||||
x: BaseField_BN254 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BN254 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_BN254 {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point_BN254 {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS_BN254, "length must be 3 * {}", BASE_LIMBS_BN254);
|
||||
Point_BN254 {
|
||||
x: BaseField_BN254 {
|
||||
s: value[..BASE_LIMBS_BN254].try_into().unwrap(),
|
||||
},
|
||||
y: BaseField_BN254 {
|
||||
s: value[BASE_LIMBS_BN254..BASE_LIMBS_BN254 * 2].try_into().unwrap(),
|
||||
},
|
||||
z: BaseField_BN254 {
|
||||
s: value[BASE_LIMBS_BN254 * 2..].try_into().unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_BN254 {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: BaseField_BN254::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_BN254::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BN254 {
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField_BN254 {
|
||||
pub fn from_limbs(value: &[u32]) -> ScalarField_BN254 {
|
||||
ScalarField_BN254 {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bn254::{Fr as Fr_BN254};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bn254::{Point_BN254, ScalarField_BN254}};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
|
||||
let scalar = ScalarField_BN254::from_limbs(&limbs);
|
||||
assert_eq!(
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute(),
|
||||
"{:08X?} {:08X?}",
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_point_equality() {
|
||||
let left = Point_BN254::zero();
|
||||
let right = Point_BN254::zero();
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BN254::from_limbs(&[0; 8], &[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BN254::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 8],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
3
src/curves/mod.rs
Normal file
3
src/curves/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
pub mod bls12_381;
|
||||
pub mod bls12_377;
|
||||
pub mod bn254;
|
||||
336
src/field.rs
336
src/field.rs
@@ -1,336 +0,0 @@
|
||||
use std::ffi::c_uint;
|
||||
use std::mem::transmute;
|
||||
|
||||
use ark_bls12_381::{Fq, G1Affine, G1Projective};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field<const NUM_LIMBS: usize> {
|
||||
pub s: [u32; NUM_LIMBS],
|
||||
}
|
||||
|
||||
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field<NUM_LIMBS> {}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Default for Field<NUM_LIMBS> {
|
||||
fn default() -> Self {
|
||||
Field::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
let mut s = [0u32; NUM_LIMBS];
|
||||
s[0] = 1;
|
||||
Field { s }
|
||||
}
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
}
|
||||
|
||||
pub const BASE_LIMBS: usize = 12;
|
||||
pub const SCALAR_LIMBS: usize = 8;
|
||||
|
||||
#[cfg(feature = "bn254")]
|
||||
pub const BASE_LIMBS: usize = 8;
|
||||
#[cfg(feature = "bn254")]
|
||||
pub const SCALAR_LIMBS: usize = 8;
|
||||
|
||||
pub type BaseField = Field<BASE_LIMBS>;
|
||||
pub type ScalarField = Field<SCALAR_LIMBS>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
match val.len() {
|
||||
n if n < NUM_LIMBS => {
|
||||
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
|
||||
impl BaseField {
|
||||
pub fn limbs(&self) -> [u32; BASE_LIMBS] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn from_limbs(value: &[u32]) -> Self {
|
||||
Self {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField {
|
||||
pub fn limbs(&self) -> [u32; SCALAR_LIMBS] {
|
||||
self.s
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
|
||||
}
|
||||
|
||||
pub fn to_ark_transmute(&self) -> BigInteger256 {
|
||||
unsafe { transmute(*self) }
|
||||
}
|
||||
|
||||
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField {
|
||||
unsafe { transmute(v) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct Point {
|
||||
pub x: BaseField,
|
||||
pub y: BaseField,
|
||||
pub z: BaseField,
|
||||
}
|
||||
|
||||
impl Default for Point {
|
||||
fn default() -> Self {
|
||||
Point::zero()
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
pub fn zero() -> Self {
|
||||
Point {
|
||||
x: BaseField::zero(),
|
||||
y: BaseField::one(),
|
||||
z: BaseField::zero(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn infinity() -> Self {
|
||||
Self::zero()
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective) -> Point {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point {
|
||||
x: BaseField::from_ark((ark.x * z_invsq).into_repr()),
|
||||
y: BaseField::from_ark((ark.y * z_invq3).into_repr()),
|
||||
z: BaseField::one(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
|
||||
}
|
||||
|
||||
impl PartialEq for Point {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
unsafe { eq(self, other) != 0 }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
|
||||
#[repr(C)]
|
||||
pub struct PointAffineNoInfinity {
|
||||
pub x: BaseField,
|
||||
pub y: BaseField,
|
||||
}
|
||||
|
||||
impl Default for PointAffineNoInfinity {
|
||||
fn default() -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField::zero(),
|
||||
y: BaseField::zero(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PointAffineNoInfinity {
|
||||
// TODO: generics
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point {
|
||||
Point {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
z: BaseField::one(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine {
|
||||
G1Affine::new(Fq::new(self.x.to_ark()), Fq::new(self.y.to_ark()), false)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine {
|
||||
G1Affine::new(
|
||||
Fq::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq::from_repr(self.y.to_ark()).unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(p: &G1Affine) -> Self {
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField::from_ark(p.x.into_repr()),
|
||||
y: BaseField::from_ark(p.y.into_repr()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Point {
|
||||
// TODO: generics
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point {
|
||||
x: BaseField {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS, "length must be 3 * {}", BASE_LIMBS);
|
||||
Point {
|
||||
x: BaseField {
|
||||
s: value[..BASE_LIMBS].try_into().unwrap(),
|
||||
},
|
||||
y: BaseField {
|
||||
s: value[BASE_LIMBS..BASE_LIMBS * 2].try_into().unwrap(),
|
||||
},
|
||||
z: BaseField {
|
||||
s: value[BASE_LIMBS * 2..].try_into().unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity {
|
||||
x: BaseField::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField::from_ark(ark_affine.y.into_repr()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity {
|
||||
PointAffineNoInfinity {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ScalarField {
|
||||
pub fn from_limbs(value: &[u32]) -> ScalarField {
|
||||
ScalarField {
|
||||
s: get_fixed_limbs(value),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bls12_381::Fr;
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, field::{Point, ScalarField}};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
|
||||
let scalar = ScalarField::from_limbs(&limbs);
|
||||
assert_eq!(
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute(),
|
||||
"{:08X?} {:08X?}",
|
||||
scalar.to_ark(),
|
||||
scalar.to_ark_transmute()
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(non_snake_case)]
|
||||
fn test_point_equality() {
|
||||
let left = Point::zero();
|
||||
let right = Point::zero();
|
||||
assert_eq!(left, right);
|
||||
let right = Point::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 12],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
1488
src/lib.rs
1488
src/lib.rs
File diff suppressed because it is too large
Load Diff
1474
src/test_bls12_377.rs
Normal file
1474
src/test_bls12_377.rs
Normal file
File diff suppressed because it is too large
Load Diff
1479
src/test_bls12_381.rs
Normal file
1479
src/test_bls12_381.rs
Normal file
File diff suppressed because it is too large
Load Diff
1478
src/test_bn254.rs
Normal file
1478
src/test_bn254.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -33,7 +33,7 @@ pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
|
||||
mod tests {
|
||||
use ark_ff::BigInteger256;
|
||||
|
||||
use crate::field::ScalarField;
|
||||
use crate::curves::bls12_381::{ScalarField_BLS12_381 as ScalarField};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -46,7 +46,7 @@ mod tests {
|
||||
))
|
||||
.limbs();
|
||||
|
||||
assert_eq!(arr_u32, s);
|
||||
assert_eq!(arr_u32.to_vec(), s);
|
||||
|
||||
let arr_u64_expected = [
|
||||
0x0FFFFFFF00000001,
|
||||
|
||||
Reference in New Issue
Block a user