Compare commits

..

6 Commits

Author SHA1 Message Date
ido
a2f221dda1 integrated lsb mult and msb mult into mod mult. 2023-05-24 21:45:46 +03:00
ido
c166b6d4d5 MSB multiplier + test 2023-05-23 16:46:24 +03:00
ido
ecc3970c12 removed constant 2023-05-22 11:50:41 +03:00
ido
a20f603f6f ingo mp mult 2023-05-22 11:48:31 +03:00
ido
32a178bc27 LSB mult works 2023-05-18 15:33:47 +03:00
ido
8d094bd5fb LSB mult almost works 2023-05-15 19:56:57 +03:00
69 changed files with 3535 additions and 11190 deletions

View File

@@ -23,9 +23,7 @@ ark-std = "0.3.0"
ark-ff = "0.3.0"
ark-poly = "0.3.0"
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
ark-bls12-381 = "0.3.0"
ark-bls12-377 = "0.3.0"
ark-bn254 = "0.3.0"
ark-bls12-381 = { version = "0.3.0", optional = true }
rustacuda = "0.1"
rustacuda_core = "0.1"
@@ -42,4 +40,3 @@ cc = { version = "1.0", features = ["parallel"] }
[features]
default = ["bls12_381"]
bls12_381 = ["ark-bls12-381/curve"]
g2 = []

View File

@@ -23,8 +23,8 @@ ICICLE is a CUDA implementation of general functions widely used in ZKP. ICICLE
- Affine: {x, y}
- Curves
- [BLS12-381]
- [BLS12-377]
- [BN254]
> NOTE: _Support for BN254 and BLS12-377 are planned_
## Build and usage
@@ -89,55 +89,6 @@ The flag `--test-threads=1` is needed because currently some tests might interfe
An example of using the Rust bindings library can be found in our [fast-danksharding implementation][FDI]
### Supporting Additional Curves
Supporting additional curves can be done as follows:
Create a JSON file with the curve parameters. The curve is defined by the following parameters:
- ``curve_name`` - e.g. ``bls12_381``.
- ``modolus_p`` - scalar field modolus (in decimal).
- ``bit_count_p`` - number of bits needed to represent `` modolus_p`` .
- ``limb_p`` - number of bytes needed to represent `` modolus_p`` (rounded).
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
- ``modolus_q`` - base field modulus (in decimal).
- ``bit_count_q`` - number of bits needed to represent `` modolus_q`` .
- ``limb_q`` number of bytes needed to represent `` modolus_p`` (rounded).
- ``weierstrass_b`` - Weierstrauss constant of the curve.
- ``gen_x`` - x-value of a generator element for the curve.
- ``gen_y`` - y-value of a generator element for the curve.
Here's an example for BLS12-381.
```
{
"curve_name" : "bls12_381",
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"bit_count_p" : 255,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"bit_count_q" : 381,
"limb_q" : 12,
"weierstrass_b" : 4,
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
}
```
Save the parameters JSON file in ``curve_parameters``.
Then run the Python script ``new_curve_script.py `` from the main icicle folder:
```
python3 ./curve_parameters/new_curve_script_rust.py ./curve_parameters/bls12_381.json
```
The script does the following:
- Creates a folder in ``icicle/curves`` with the curve name, which contains all of the files needed for the supported operations in cuda.
- Adds the curve exported operations to ``icicle/curves/index.cu``.
- Creates a file with the curve name in ``src/curves`` with the relevant objects for the curve.
- Creates a test file with the curve name in ``src``.
Testing the new curve could be done by running the tests in ``tests_curve_name`` (e.g. ``tests_bls12_381``).
## Contributions
Join our [Discord Server](https://discord.gg/Y4SkbDf2Ff) and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.

View File

@@ -2,10 +2,7 @@ extern crate criterion;
use criterion::{criterion_group, criterion_main, Criterion};
use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng, field::BaseField};
#[cfg(feature = "g2")]
use icicle_utils::{commit_batch_g2, field::ExtensionField};
use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng};
use rustacuda::prelude::*;
@@ -13,35 +10,20 @@ const LOG_MSM_SIZES: [usize; 1] = [12];
const BATCH_SIZES: [usize; 2] = [128, 256];
fn bench_msm(c: &mut Criterion) {
let mut group = c.benchmark_group("MSM");
for log_msm_size in LOG_MSM_SIZES {
for batch_size in BATCH_SIZES {
let msm_size = 1 << log_msm_size;
let (scalars, _, _) = set_up_scalars(msm_size, 0, false);
let batch_scalars = vec![scalars; batch_size].concat();
let mut d_scalars = DeviceBuffer::from_slice(&batch_scalars[..]).unwrap();
let points = generate_random_points::<BaseField>(msm_size, get_rng(None));
let points = generate_random_points(msm_size, get_rng(None));
let batch_points = vec![points; batch_size].concat();
let mut d_points = DeviceBuffer::from_slice(&batch_points[..]).unwrap();
#[cfg(feature = "g2")]
let g2_points = generate_random_points::<ExtensionField>(msm_size, get_rng(None));
#[cfg(feature = "g2")]
let g2_batch_points = vec![g2_points; batch_size].concat();
#[cfg(feature = "g2")]
let mut d_g2_points = DeviceBuffer::from_slice(&g2_batch_points[..]).unwrap();
group.sample_size(30).bench_function(
c.bench_function(
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch(&mut d_points, &mut d_scalars, batch_size))
);
#[cfg(feature = "g2")]
group.sample_size(10).bench_function(
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size))
);
}
}
}

View File

@@ -8,26 +8,33 @@ use icicle_utils::{interpolate_scalars_batch, interpolate_points_batch, set_up_s
const LOG_NTT_SIZES: [usize; 1] = [15];
const BATCH_SIZES: [usize; 2] = [8, 16];
fn bench_ntt(c: &mut Criterion) {
let mut group = c.benchmark_group("NTT");
fn bench_point_ntt(c: &mut Criterion) {
for log_ntt_size in LOG_NTT_SIZES {
for batch_size in BATCH_SIZES {
let ntt_size = 1 << log_ntt_size;
let (_, mut d_evals, mut d_domain) = set_up_scalars(ntt_size * batch_size, log_ntt_size, true);
let (_, mut d_points_evals, _) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
let (_, mut d_evals, mut d_domain) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
group.sample_size(100).bench_function(
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size))
);
group.sample_size(10).bench_function(
c.bench_function(
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_points_batch(&mut d_points_evals, &mut d_domain, batch_size))
|b| b.iter(|| interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size))
);
}
}
}
criterion_group!(ntt_benches, bench_ntt);
fn bench_scalar_ntt(c: &mut Criterion) {
for log_ntt_size in LOG_NTT_SIZES {
for batch_size in BATCH_SIZES {
let ntt_size = 1 << log_ntt_size;
let (_, mut d_evals, mut d_domain) = set_up_scalars(ntt_size * batch_size, log_ntt_size, true);
c.bench_function(
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size))
);
}
}
}
criterion_group!(ntt_benches, bench_point_ntt, bench_scalar_ntt);
criterion_main!(ntt_benches);

View File

@@ -16,14 +16,14 @@ fn main() {
println!("Compiling icicle library using arch: {}", &arch);
if cfg!(feature = "g2") {
nvcc.define("G2_DEFINED", None);
}
nvcc.cuda(true);
nvcc.debug(false);
nvcc.flag(&arch);
nvcc.files([
"./icicle/curves/index.cu",
"./icicle/appUtils/vector_manipulation/ve_mod_mult.cu",
"./icicle/appUtils/ntt/lde.cu",
"./icicle/appUtils/msm/msm.cu",
"./icicle/primitives/projective.cu",
]);
nvcc.compile("ingo_icicle"); //TODO: extension??
}

View File

@@ -1,13 +0,0 @@
{
"curve_name" : "bls12_377",
"modolus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
"bit_count_p" : 253,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
"bit_count_q" : 377,
"limb_q" : 12,
"weierstrass_b" : 1,
"gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
"gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030
}

View File

@@ -1,13 +0,0 @@
{
"curve_name" : "bls12_381",
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"bit_count_p" : 255,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"bit_count_q" : 381,
"limb_q" : 12,
"weierstrass_b" : 4,
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
}

View File

@@ -1,13 +0,0 @@
{
"curve_name" : "bn254",
"modolus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
"bit_count_p" : 254,
"limb_p" : 8,
"ntt_size" : 16,
"modolus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
"bit_count_q" : 254,
"limb_q" : 8,
"weierstrass_b" : 3,
"gen_x" : 1,
"gen_y" : 2
}

View File

@@ -1,203 +0,0 @@
import json
import math
import os
from sympy.ntheory import isprime, primitive_root
import subprocess
import random
import sys
data = None
with open(sys.argv[1]) as json_file:
data = json.load(json_file)
curve_name = data["curve_name"]
modolus_p = data["modolus_p"]
bit_count_p = data["bit_count_p"]
limb_p = data["limb_p"]
ntt_size = data["ntt_size"]
modolus_q = data["modolus_q"]
bit_count_q = data["bit_count_q"]
limb_q = data["limb_q"]
weierstrass_b = data["weierstrass_b"]
gen_x = data["gen_x"]
gen_y = data["gen_y"]
def to_hex(val, length):
x = str(hex(val))[2:]
if len(x) % 8 != 0:
x = "0" * (8-len(x) % 8) + x
if len(x) != length:
x = "0" * (length-len(x)) + x
n = 8
chunks = [x[i:i+n] for i in range(0, len(x), n)][::-1]
s = ""
for c in chunks:
s += "0x" + c + ", "
return s
def get_root_of_unity(order: int) -> int:
assert (modolus_p - 1) % order == 0
return pow(5, (modolus_p - 1) // order, modolus_p)
def create_field_parameters_struct(modulus, modulus_bits_count,limbs,ntt,size,name):
s = " struct "+name+"{\n"
s += " static constexpr unsigned limbs_count = " + str(limbs)+";\n"
s += " static constexpr storage<limbs_count> modulus = {"+to_hex(modulus,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<limbs_count> modulus_2 = {"+to_hex(modulus*2,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<limbs_count> modulus_4 = {"+to_hex(modulus*4,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_wide = {"+to_hex(modulus,8*limbs*2)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared = {"+to_hex(modulus*modulus,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared_2 = {"+to_hex(modulus*modulus*2,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared_4 = {"+to_hex(modulus*modulus*2*2,8*limbs)[:-2]+"};\n"
s += " static constexpr unsigned modulus_bits_count = "+str(modulus_bits_count)+";\n"
m = int(math.floor(int(pow(2,2*modulus_bits_count) // modulus)))
s += " static constexpr storage<limbs_count> m = {"+ to_hex(m,8*limbs)[:-2] +"};\n"
s += " static constexpr storage<limbs_count> one = {"+ to_hex(1,8*limbs)[:-2] +"};\n"
s += " static constexpr storage<limbs_count> zero = {"+ to_hex(0,8*limbs)[:-2] +"};\n"
if ntt:
for k in range(size):
omega = get_root_of_unity(int(pow(2,k+1)))
s += " static constexpr storage<limbs_count> omega"+str(k+1)+"= {"+ to_hex(omega,8*limbs)[:-2]+"};\n"
for k in range(size):
omega = get_root_of_unity(int(pow(2,k+1)))
s += " static constexpr storage<limbs_count> omega_inv"+str(k+1)+"= {"+ to_hex(pow(omega, -1, modulus),8*limbs)[:-2]+"};\n"
for k in range(size):
s += " static constexpr storage<limbs_count> inv"+str(k+1)+"= {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),8*limbs)[:-2]+"};\n"
s+=" };\n"
return s
def create_gen():
s = " struct group_generator {\n"
s += " static constexpr storage<fq_config::limbs_count> generator_x = {"+to_hex(gen_x,8*limb_q)[:-2]+ "};\n"
s += " static constexpr storage<fq_config::limbs_count> generator_y = {"+to_hex(gen_y,8*limb_q)[:-2]+ "};\n"
s+=" };\n"
return s
def get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b):
file_content = ""
file_content += "#pragma once\n#include \"../../utils/storage.cuh\"\n"
file_content += "namespace PARAMS_"+curve_name.upper()+"{\n"
file_content += create_field_parameters_struct(modolus_p,bit_count_p,limb_p,True,ntt_size,"fp_config")
file_content += create_field_parameters_struct(modolus_q,bit_count_q,limb_q,False,0,"fq_config")
file_content += " static constexpr unsigned weierstrass_b = " + str(weierstrass_b)+ ";\n"
file_content += create_gen()
file_content+="}\n"
return file_content
# Create Cuda interface
newpath = "./icicle/curves/"+curve_name
if not os.path.exists(newpath):
os.makedirs(newpath)
fc = get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b)
text_file = open("./icicle/curves/"+curve_name+"/params.cuh", "w")
n = text_file.write(fc)
text_file.close()
with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
content = lde_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/lde.cu", "w")
n = text_file.write(content)
text_file.close()
with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
content = msm_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/msm.cu", "w")
n = text_file.write(content)
text_file.close()
with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
content = ve_mod_mult_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/ve_mod_mult.cu", "w")
n = text_file.write(content)
text_file.close()
namespace = '#include "params.cuh"\n'+'''namespace CURVE_NAME_U {
typedef Field<PARAMS_CURVE_NAME_U::fp_config> scalar_field_t;\
typedef scalar_field_t scalar_t;\
typedef Field<PARAMS_CURVE_NAME_U::fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, PARAMS_CURVE_NAME_U::group_generator, PARAMS_CURVE_NAME_U::weierstrass_b> projective_t;
typedef Affine<point_field_t> affine_t;
}'''
with open('./icicle/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
f.write(namespace.replace("CURVE_NAME_U",curve_name.upper()))
eq = '''
#include <cuda.h>\n
#include "curve_config.cuh"\n
#include "../../primitives/projective.cuh"\n
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2)
{
return (*point1 == *point2);
}'''
with open('./icicle/curves/'+curve_name+'/projective.cu', 'w') as f:
f.write(eq.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
supported_operations = '''
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"
'''
with open('./icicle/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
f.write(supported_operations.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
with open('./icicle/curves/index.cu', 'a') as f:
f.write('\n#include "'+curve_name.lower()+'/supported_operations.cu"')
# Create Rust interface and tests
if limb_p == limb_q:
with open("./src/curve_templates/curve_same_limbs.rs", "r") as curve_file:
content = curve_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
content = content.replace("limbs_p",str(limb_p))
text_file = open("./src/curves/"+curve_name+".rs", "w")
n = text_file.write(content)
text_file.close()
else:
with open("./src/curve_templates/curve_different_limbs.rs", "r") as curve_file:
content = curve_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
content = content.replace("limbs_p",str(limb_p))
content = content.replace("_limbs_q",str(limb_q * 8 * 4))
content = content.replace("limbs_q",str(limb_q))
text_file = open("./src/curves/"+curve_name+".rs", "w")
n = text_file.write(content)
text_file.close()
with open("./src/curve_templates/test.rs", "r") as test_file:
content = test_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./src/test_"+curve_name+".rs", "w")
n = text_file.write(content)
text_file.close()
with open('./src/curves/mod.rs', 'a') as f:
f.write('\n pub mod ' + curve_name + ';')
with open('./src/lib.rs', 'a') as f:
f.write('\npub mod ' + curve_name + ';')

View File

@@ -22,6 +22,10 @@ FetchContent_Declare(
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
# boosting lib
include_directories("/home/miner/include/boost_1_80_0")
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)

View File

@@ -1,4 +0,0 @@
test_msm:
mkdir -p work
nvcc -o work/test_msm -I. tests/msm_test.cu
work/test_msm

View File

@@ -1,9 +1,3 @@
#ifndef MSM
#define MSM
#pragma once
#include <stdexcept>
#include <cuda.h>
#include "../../primitives/affine.cuh"
#include <iostream>
#include <vector>
#include <cub/device/device_radix_sort.cuh>
@@ -12,6 +6,7 @@
#include "../../utils/cuda_utils.cuh"
#include "../../primitives/projective.cuh"
#include "../../primitives/field.cuh"
#include "../../curves/curve_config.cuh"
#include "msm.cuh"
@@ -83,17 +78,16 @@ __global__ void split_scalars_kernel(unsigned *buckets_indices, unsigned *point_
//this kernel adds up the points in each bucket
template <typename P, typename A>
// __global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
// unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
__global__ void accumulate_buckets_kernel(P *buckets, unsigned *bucket_offsets, unsigned *bucket_sizes, unsigned *single_bucket_indices, unsigned *point_indices, A *points, unsigned nof_buckets, unsigned *nof_buckets_to_compute, unsigned msm_idx_shift){
__global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid ==0) printf("nof buckets to comp: %u", *nof_buckets_to_compute);
if (tid>=*nof_buckets_to_compute){
return;
}
unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
unsigned bucket_index = msm_index * nof_buckets + (single_bucket_indices[tid]&((1<<msm_idx_shift)-1));
unsigned bucket_size = bucket_sizes[tid];
if (tid>=nof_buckets*batch_size || bucket_size == 0){ //if the bucket is empty we don't need to continue
return;
}
unsigned bucket_offset = bucket_offsets[tid];
for (unsigned i = 0; i < bucket_sizes[tid]; i++) //add the relevant points starting from the relevant offset up to the bucket size
{
@@ -107,8 +101,7 @@ template <typename P>
__global__ void big_triangle_sum_kernel(P* buckets, P* final_sums, unsigned nof_bms, unsigned c){
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid>=nof_bms) return;
// printf("%u",tid);
if (tid>nof_bms) return;
P line_sum = buckets[(tid+1)*(1<<c)-1];
final_sums[tid] = line_sum;
for (unsigned i = (1<<c)-2; i >0; i--)
@@ -197,13 +190,12 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
NUM_BLOCKS = (size * (nof_bms+1) + NUM_THREADS - 1) / NUM_THREADS;
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size,
nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
unsigned *sort_indices_temp_storage{};
size_t sort_indices_temp_storage_bytes;
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + size, bucket_indices,
point_indices + size, point_indices, size);
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
for (unsigned i = 0; i < nof_bms; i++) {
unsigned offset_out = i * size;
@@ -239,10 +231,11 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets);
cudaFree(offsets_temp_storage);
NUM_THREADS = 1 << 8;
//launch the accumulation kernel with maximum threads
NUM_THREADS = 1 << 8;
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices,
d_points, nof_buckets, nof_buckets_to_compute, c+bm_bitsize);
d_points, nof_buckets, 1, c+bm_bitsize);
#ifdef SSM_SUM
//sum each bucket
@@ -278,7 +271,6 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
cudaDeviceSynchronize();
if (!on_device)
cudaMemcpy(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost);
std::cout<<"final res "<<(*final_result)<<std::endl;
//free memory
if (!on_device) {
@@ -354,6 +346,12 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
// for (unsigned i = 0; i < nof_bms*batch_size; i++) {
// unsigned offset_out = i * msm_size;
// unsigned offset_in = offset_out + msm_size;
// cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in,
// bucket_indices + offset_out, point_indices + offset_in, point_indices + offset_out, msm_size);
// }
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
cudaFree(sort_indices_temp_storage);
@@ -388,7 +386,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
NUM_THREADS = 1 << 8;
NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
d_points, nof_buckets, total_nof_buckets_to_compute, c+bm_bitsize);
d_points, nof_buckets, batch_size, c+bm_bitsize);
#ifdef SSM_SUM
//sum each bucket
@@ -421,7 +419,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
NUM_THREADS = 1<<8;
NUM_BLOCKS = (batch_size + NUM_THREADS - 1) / NUM_THREADS;
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
//copy final result to host
cudaDeviceSynchronize();
if (!on_device)
@@ -458,7 +456,8 @@ __global__ void to_proj_kernel(A* affine_points, P* proj_points, unsigned N){
//the function computes msm using ssm
template <typename S, typename P, typename A>
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result){ //works up to 2^8
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device){ //works up to 2^8
S *scalars;
A *a_points;
P *p_points;
@@ -503,12 +502,12 @@ void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result){ //w
template <typename A, typename S, typename P>
void reference_msm(S* scalars, A* a_points, unsigned size){
P *points = new P[size];
// P points[size];
P points[size];
for (unsigned i = 0; i < size ; i++)
{
points[i] = P::from_affine(a_points[i]);
}
P res = P::zero();
@@ -523,10 +522,7 @@ void reference_msm(S* scalars, A* a_points, unsigned size){
}
unsigned get_optimal_c(const unsigned size) {
if (size < 17)
return 1;
// return 15;
return ceil(log2(size))-4;
return 10;
}
//this function is used to compute msms of size larger than 256
@@ -548,4 +544,88 @@ void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_
unsigned bitsize = 255;
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device);
}
#endif
extern "C"
int msm_cuda(projective_t *out, affine_t points[],
scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
if (count>256){
large_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false);
}
else{
short_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false);
}
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<scalar_t, projective_t, affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}

View File

@@ -1,6 +1,10 @@
#ifndef MSM_H
#define MSM_H
#pragma once
#include <stdexcept>
#include <cuda.h>
#include "../../primitives/projective.cuh"
#include "../../primitives/affine.cuh"
#include "../../curves/curve_config.cuh"
template <typename S, typename P, typename A>
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device);
@@ -16,7 +20,3 @@ void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device);
template <typename S, typename P, typename A>
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device);
template <typename A, typename S, typename P>
void reference_msm(S* scalars, A* a_points, unsigned size);
#endif

View File

@@ -1,256 +0,0 @@
#include <iostream>
#include <chrono>
#include <vector>
#include "msm.cu"
#include "../../utils/cuda_utils.cuh"
#include "../../primitives/projective.cuh"
#include "../../primitives/field.cuh"
#include "../../curves/bls12_381/curve_config.cuh"
using namespace BLS12_381;
struct fake_point
{
unsigned val = 0;
__host__ __device__ inline fake_point operator+(fake_point fp) {
return {val+fp.val};
}
__host__ __device__ fake_point zero() {
fake_point p;
return p;
}
};
std::ostream& operator<<(std::ostream &strm, const fake_point &a) {
return strm <<a.val;
}
struct fake_scalar
{
unsigned val = 0;
unsigned bitsize = 32;
// __host__ __device__ unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width){
// return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
// }
__host__ __device__ int get_scalar_digit(int digit_num, int digit_width){
return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
}
__host__ __device__ inline fake_point operator*(fake_point fp) {
fake_point p1;
fake_point p2;
unsigned x = val;
if (x == 0) return fake_point().zero();
unsigned i = 1;
unsigned c_bit = (x & (1<<(bitsize-1)))>>(bitsize-1);
while (c_bit==0 && i<bitsize){
i++;
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
}
p1 = fp;
p2 = p1+p1;
while (i<bitsize){
i++;
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
if (c_bit){
p1 = p1 + p2;
p2 = p2 + p2;
}
else {
p2 = p1 + p2;
p1 = p1 + p1;
}
}
return p1;
}
};
class Dummy_Scalar {
public:
static constexpr unsigned NBITS = 32;
unsigned x;
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
os << scalar.x;
return os;
}
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) {
return (x>>(digit_num*digit_width))&((1<<digit_width)-1);
}
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
return {p1.x+p2.x};
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
return (p1.x == p2.x);
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) {
return (p1.x == p2);
}
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
// return {Dummy_Scalar::neg(point.x)};
// }
static HOST_INLINE Dummy_Scalar rand_host() {
return {(unsigned)rand()};
}
};
class Dummy_Projective {
public:
Dummy_Scalar x;
static HOST_DEVICE_INLINE Dummy_Projective zero() {
return {0};
}
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
return {point.x};
}
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective &point) {
return {point.x};
}
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
// return {Dummy_Scalar::neg(point.x)};
// }
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
return {p1.x+p2.x};
}
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
// return p1 + neg(p2);
// }
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point) {
os << point.x;
return os;
}
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point) {
Dummy_Projective res = zero();
#ifdef CUDA_ARCH
#pragma unroll
#endif
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
if (i > 0) {
res = res + res;
}
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) {
res = res + point;
}
}
return res;
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2) {
return (p1.x == p2.x);
}
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective &point) {
return point.x == 0;
}
static HOST_INLINE Dummy_Projective rand_host() {
return {(unsigned)rand()};
}
};
//switch between dummy and real:
typedef scalar_t test_scalar;
typedef projective_t test_projective;
typedef affine_t test_affine;
// typedef Dummy_Scalar test_scalar;
// typedef Dummy_Projective test_projective;
// typedef Dummy_Projective test_affine;
int main()
{
unsigned batch_size = 4;
unsigned msm_size = 1<<15;
unsigned N = batch_size*msm_size;
test_scalar *scalars = new test_scalar[N];
test_affine *points = new test_affine[N];
for (unsigned i=0;i<N;i++){
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
// scalars[i] = test_scalar::rand_host();
// points[i] = test_projective::to_affine(test_projective::rand_host());
}
std::cout<<"finished generating"<<std::endl;
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
test_projective large_res[batch_size];
test_projective batched_large_res[batch_size];
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
// fake_point batched_large_res[256];
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
for (unsigned i=0;i<batch_size;i++){
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
// std::cout<<"final result large"<<std::endl;
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
}
auto begin = std::chrono::high_resolution_clock::now();
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
std::cout<<"final results batched large"<<std::endl;
bool success = true;
for (unsigned i = 0; i < batch_size; i++)
{
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
std::cout<<"good"<<std::endl;
}
else{
std::cout<<"miss"<<std::endl;
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
success = false;
}
}
if (success){
std::cout<<"success!"<<std::endl;
}
// std::cout<<batched_large_res[0]<<std::endl;
// std::cout<<batched_large_res[1]<<std::endl;
// std::cout<<projective_t::to_affine(batched_large_res[0])<<std::endl;
// std::cout<<projective_t::to_affine(batched_large_res[1])<<std::endl;
// std::cout<<"final result short"<<std::endl;
// std::cout<<pr<<std::endl;
return 0;
}

View File

@@ -1,9 +1,7 @@
#ifndef LDE
#define LDE
#include <cuda.h>
#include "ntt.cuh"
#include "lde.cuh"
#include "../vector_manipulation/ve_mod_mult.cuh"
#include "lde.cuh"
/**
@@ -27,7 +25,7 @@ template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluat
}
NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, S::inv_log_size(logn));
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, scalar_t::inv_log_size(logn));
return 0;
}
@@ -67,13 +65,13 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
if (domain_size > n) {
// allocate and initialize an array of stream handles to parallelize data copying across batches
cudaStream_t *memcpy_streams = (cudaStream_t *) malloc(batch_size * sizeof(cudaStream_t));
for (unsigned i = 0; i < batch_size; i++)
for (int i = 0; i < batch_size; i++)
{
cudaStreamCreate(&(memcpy_streams[i]));
cudaMemcpyAsync(&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
uint32_t NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
int NUM_THREADS = MAX_THREADS_BATCH;
int NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
fill_array <E> <<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>> (&d_out[i * domain_size + n], E::zero(), domain_size - n);
cudaStreamSynchronize(memcpy_streams[i]);
@@ -111,72 +109,355 @@ int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size,
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers);
}
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n) {
int interpolate_scalars(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n) {
return interpolate(d_out, d_evaluations, d_domain, n);
}
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
int interpolate_scalars_batch(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n) {
int interpolate_points(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n) {
return interpolate(d_out, d_evaluations, d_domain, n);
}
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
int interpolate_points_batch(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
S* _null = nullptr;
int evaluate_scalars(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n) {
scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
S* _null = nullptr;
int evaluate_scalars_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
S* _null = nullptr;
int evaluate_points(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n) {
scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
int evaluate_points_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size) {
S* _null = nullptr;
scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers) {
int evaluate_scalars_on_coset(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain,
unsigned domain_size, unsigned n, scalar_t* coset_powers) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
template <typename E, typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers) {
int evaluate_scalars_on_coset_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, scalar_t* coset_powers) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers) {
int evaluate_points_on_coset(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
unsigned domain_size, unsigned n, scalar_t* coset_powers) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers) {
int evaluate_points_on_coset_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, scalar_t* coset_powers) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
#endif
extern "C" scalar_t* build_domain_cuda(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ecntt_end2end(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ecntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda(scalar_t* d_out, scalar_t *d_evaluations, scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate_scalars(d_out, d_evaluations, d_domain, n); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_scalars_batch(d_out, d_evaluations, d_domain, n, batch_size); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda(projective_t* d_out, projective_t *d_evaluations, scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate_points(d_out, d_evaluations, d_domain, n); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_points_batch(d_out, d_evaluations, d_domain, n, batch_size); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
return evaluate_scalars(d_out, d_coefficients, d_domain, domain_size, n); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return evaluate_scalars_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
return evaluate_points(d_out, d_coefficients, d_domain, domain_size, n); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return evaluate_points_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
unsigned n, scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate_scalars_on_coset(d_out, d_coefficients, d_domain, domain_size, n, coset_powers); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_scalars_on_coset_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, coset_powers); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
unsigned n, scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_points_on_coset(d_out, d_coefficients, d_domain, domain_size, n, coset_powers); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_points_on_coset_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, coset_powers); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda(scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda(scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda(projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda(projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}

View File

@@ -1,46 +1,31 @@
#ifndef LDE_H
#define LDE_H
#pragma once
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n);
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
int interpolate_scalars(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n);
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n);
int interpolate_scalars_batch(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size);
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
int interpolate_points(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n);
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
int interpolate_points_batch(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain, unsigned n, unsigned batch_size);
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);
int evaluate_scalars(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n);
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
int evaluate_scalars_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
int evaluate_points(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, unsigned n);
int evaluate_points_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size);
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers);
int evaluate_scalars_on_coset(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain,
unsigned domain_size, unsigned n, scalar_t* coset_powers);
template <typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers);
int evaluate_scalars_on_coset_batch(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, scalar_t* coset_powers);
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers);
int evaluate_points_on_coset(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain,
unsigned domain_size, unsigned n, scalar_t* coset_powers);
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers);
#endif
int evaluate_points_on_coset_batch(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, scalar_t* coset_powers);

View File

@@ -0,0 +1,54 @@
#include <cuda.h>
#include "ntt.cuh"
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ecntt_end2end(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ecntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}

View File

@@ -1,10 +1,22 @@
#ifndef NTT
#define NTT
#pragma once
#include "../../curves/curve_config.cuh"
const uint32_t MAX_NUM_THREADS = 1024;
const uint32_t MAX_THREADS_BATCH = 256;
/**
* Copy twiddle factors array to device (returns a pointer to the device allocated array).
* @param twiddles input empty array.
* @param n_twiddles length of twiddle factors.
*/
scalar_t * copy_twiddle_factors_to_device(scalar_t * twiddles, uint32_t n_twiddles) {
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
scalar_t * d_twiddles;
cudaMalloc( & d_twiddles, size_twiddles);
cudaMemcpy(d_twiddles, twiddles, size_twiddles, cudaMemcpyHostToDevice);
return d_twiddles;
}
/**
* Computes the twiddle factors.
* Outputs: d_twiddles[i] = omega^i.
@@ -12,11 +24,8 @@ const uint32_t MAX_THREADS_BATCH = 256;
* @param n_twiddles number of twiddle factors.
* @param omega multiplying factor.
*/
template < typename S > __global__ void twiddle_factors_kernel(S * d_twiddles, uint32_t n_twiddles, S omega) {
for (uint32_t i = 0; i < n_twiddles; i++) {
d_twiddles[i] = S::zero();
}
d_twiddles[0] = S::one();
__global__ void twiddle_factors_kernel(scalar_t * d_twiddles, uint32_t n_twiddles, scalar_t omega) {
d_twiddles[0] = scalar_t::one();
for (uint32_t i = 0; i < n_twiddles - 1; i++) {
d_twiddles[i + 1] = omega * d_twiddles[i];
}
@@ -28,11 +37,11 @@ const uint32_t MAX_THREADS_BATCH = 256;
* @param n_twiddles number of twiddle factors.
* @param omega multiplying factor.
*/
template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega) {
size_t size_twiddles = n_twiddles * sizeof(S);
S * d_twiddles;
scalar_t * fill_twiddle_factors_array(uint32_t n_twiddles, scalar_t omega) {
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
scalar_t * d_twiddles;
cudaMalloc( & d_twiddles, size_twiddles);
twiddle_factors_kernel<S> <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
twiddle_factors_kernel <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
return d_twiddles;
}
@@ -46,7 +55,7 @@ const uint32_t MAX_THREADS_BATCH = 256;
*/
__device__ __host__ uint32_t reverseBits(uint32_t num, uint32_t logn) {
unsigned int reverse_num = 0;
for (uint32_t i = 0; i < logn; i++) {
for (int i = 0; i < logn; i++) {
if ((num & (1 << i))) reverse_num |= 1 << ((logn - 1) - i);
}
return reverse_num;
@@ -159,9 +168,9 @@ template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr
uint32_t m = 2;
for (uint32_t s = 0; s < logn; s++) {
for (uint32_t i = 0; i < n; i += m) {
uint32_t shifted_m = m >> 1;
uint32_t number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
uint32_t number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
int shifted_m = m >> 1;
int number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
int number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
}
m <<= 1;
@@ -195,23 +204,74 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
return arrReversed;
}
/**
* Cooley-Tukey Elliptic Curve NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type projective_t.
* @param n length of d_arr.
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
projective_t * ecntt(projective_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
return ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
}
/**
* Cooley-Tukey (scalar) NTT.
* @param arr input array of type E (element).
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type scalar_t.
* @param n length of d_arr.
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
scalar_t * ntt(scalar_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
return ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
}
/**
* Cooley-Tukey (scalar) NTT.
* @param arr input array of type scalar_t.
* @param n length of d_arr.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse) {
extern "C" uint32_t ntt_end2end(scalar_t * arr, uint32_t n, bool inverse) {
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else {
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
scalar_t * result = ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
for(int i = 0; i < n; i++){
arr[i] = result[i];
}
cudaFree(d_twiddles);
return 0;
}
/**
* Cooley-Tukey (scalar) NTT.
* @param arr input array of type projective_t.
* @param n length of d_arr.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
extern "C" uint32_t ecntt_end2end(projective_t * arr, uint32_t n, bool inverse) {
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n;
S * twiddles = new S[n_twiddles];
S * d_twiddles;
scalar_t * twiddles = new scalar_t[n_twiddles];
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse);
projective_t * result = ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
for(int i = 0; i < n; i++){
arr[i] = result[i];
}
@@ -229,14 +289,14 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
* @param logn log(n).
* @param task log(n).
*/
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
for (uint32_t i = 0; i < n; i++) {
uint32_t reversed = reverseBits(i, logn);
if (reversed > i) {
T tmp = arr[task * n + i];
arr[task * n + i] = arr[task * n + reversed];
arr[task * n + reversed] = tmp;
}
uint32_t reversed = reverseBits(i, logn);
if (reversed > i) {
T tmp = arr[task * n + i];
arr[task * n + i] = arr[task * n + reversed];
arr[task * n + reversed] = tmp;
}
}
}
@@ -327,32 +387,34 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
}
}
//TODO: batch ntt and ecntt can be unified into batch_template
/**
* Cooley-Tukey (scalar) NTT.
* This is a bached version - meaning it assumes than the input array
* consists of N arrays of size n. The function performs n-size NTT on each small array.
* @param arr input array of type BLS12_381::scalar_t.
* @param arr input array of type scalar_t.
* @param arr_size number of total elements = n * N.
* @param n size of batch.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse) {
extern "C" uint32_t ntt_end2end_batch(scalar_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
int batches = int(arr_size / n);
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(E);
S * d_twiddles;
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(scalar_t);
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
E * d_arr;
scalar_t * d_arr;
cudaMalloc( & d_arr, size_E);
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
int NUM_THREADS = MAX_THREADS_BATCH;
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
ntt_template_kernel_rev_ord<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int chunks = max(int((n / 2) / NUM_THREADS), 1);
@@ -361,13 +423,14 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
ntt_template_kernel<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
}
if (inverse == true)
{
NUM_THREADS = MAX_NUM_THREADS;
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, S::inv_log_size(logn));
template_normalize_kernel < scalar_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, scalar_t::inv_log_size(logn));
}
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
cudaFree(d_arr);
@@ -375,4 +438,50 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
return 0;
}
#endif
/**
* Cooley-Tukey (scalar) NTT.
* This is a bached version - meaning it assumes than the input array
* consists of N arrays of size n. The function performs n-size NTT on each small array.
* @param arr input array of type scalar_t.
* @param arr_size number of total elements = n * N.
* @param n size of batch.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
extern "C" uint32_t ecntt_end2end_batch(projective_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
int batches = int(arr_size / n);
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(projective_t);
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
projective_t * d_arr;
cudaMalloc( & d_arr, size_E);
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
int NUM_THREADS = MAX_THREADS_BATCH;
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
ntt_template_kernel_rev_ord<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int chunks = max(int((n / 2) / NUM_THREADS), 1);
int total_tasks = batches * chunks;
NUM_BLOCKS = total_tasks;
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
}
if (inverse == true)
{
NUM_THREADS = MAX_NUM_THREADS;
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < projective_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, scalar_t::inv_log_size(logn));
}
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
cudaFree(d_arr);
cudaFree(d_twiddles);
return 0;
}

View File

@@ -1,23 +1,21 @@
#ifndef _BLS12_377_VEC_MULT
#define _BLS12_377_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "../../curves/curve_config.cuh"
#include "ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
BLS12_377::scalar_t *scalar_vec,
extern "C" int32_t vec_mod_mult_point(projective_t *inout,
scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<projective_t, scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -27,15 +25,15 @@ extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
}
}
extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
BLS12_377::scalar_t *scalar_vec,
extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
vector_mod_mult<scalar_t, scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -45,16 +43,16 @@ extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
}
}
extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_flattened,
BLS12_377::scalar_t *input,
BLS12_377::scalar_t *output,
extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
scalar_t *input,
scalar_t *output,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments);
matrix_mod_mult<scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -63,4 +61,3 @@ extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_fla
return -1;
}
}
#endif

View File

@@ -1,5 +1,3 @@
#ifndef VEC_MULT
#define VEC_MULT
#pragma once
#include <stdexcept>
#include <cuda.h>
@@ -110,4 +108,3 @@ int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t di
return 0;
}
#endif

View File

@@ -1,22 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BLS12_377 {
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_377::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_re },
point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}

View File

@@ -1,308 +0,0 @@
#ifndef _BLS12_377_LDE
#define _BLS12_377_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_evaluations, BLS12_377::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_evaluations, BLS12_377::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,87 +0,0 @@
#ifndef _BLS12_377_MSM
#define _BLS12_377_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,155 +0,0 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BLS12_377{
struct fp_config{
static constexpr unsigned limbs_count = 8;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
static constexpr unsigned modulus_bits_count = 253;
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega4= {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega5= {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291, 0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
static constexpr storage<limbs_count> omega6= {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34, 0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
static constexpr storage<limbs_count> omega7= {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1, 0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
static constexpr storage<limbs_count> omega8= {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111, 0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
static constexpr storage<limbs_count> omega9= {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244, 0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
static constexpr storage<limbs_count> omega10= {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4, 0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
static constexpr storage<limbs_count> omega11= {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645, 0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
static constexpr storage<limbs_count> omega12= {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb, 0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
static constexpr storage<limbs_count> omega13= {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc, 0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
static constexpr storage<limbs_count> omega14= {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f, 0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
static constexpr storage<limbs_count> omega15= {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557, 0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
static constexpr storage<limbs_count> omega16= {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112, 0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv4= {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv5= {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63, 0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
static constexpr storage<limbs_count> omega_inv6= {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f, 0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
static constexpr storage<limbs_count> omega_inv7= {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6, 0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
static constexpr storage<limbs_count> omega_inv8= {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd, 0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
static constexpr storage<limbs_count> omega_inv9= {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b, 0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
static constexpr storage<limbs_count> omega_inv10= {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a, 0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
static constexpr storage<limbs_count> omega_inv11= {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69, 0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
static constexpr storage<limbs_count> omega_inv12= {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c, 0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
static constexpr storage<limbs_count> omega_inv13= {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3, 0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
static constexpr storage<limbs_count> omega_inv14= {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2, 0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
static constexpr storage<limbs_count> omega_inv15= {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3, 0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
static constexpr storage<limbs_count> omega_inv16= {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016, 0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
static constexpr storage<limbs_count> omega_inv17= {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571, 0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
static constexpr storage<limbs_count> omega_inv18= {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894, 0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
static constexpr storage<limbs_count> omega_inv19= {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c, 0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
static constexpr storage<limbs_count> omega_inv20= {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec, 0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
static constexpr storage<limbs_count> omega_inv21= {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc, 0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
static constexpr storage<limbs_count> omega_inv22= {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024, 0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
static constexpr storage<limbs_count> omega_inv23= {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7, 0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
static constexpr storage<limbs_count> omega_inv24= {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db, 0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
static constexpr storage<limbs_count> omega_inv25= {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3, 0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
static constexpr storage<limbs_count> omega_inv26= {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e, 0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
static constexpr storage<limbs_count> omega_inv27= {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1, 0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
static constexpr storage<limbs_count> omega_inv28= {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697, 0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
static constexpr storage<limbs_count> omega_inv29= {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf, 0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
static constexpr storage<limbs_count> inv4= {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
static constexpr storage<limbs_count> inv5= {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
static constexpr storage<limbs_count> inv6= {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
static constexpr storage<limbs_count> inv7= {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
static constexpr storage<limbs_count> inv8= {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
static constexpr storage<limbs_count> inv9= {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
static constexpr storage<limbs_count> inv10= {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
static constexpr storage<limbs_count> inv11= {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
static constexpr storage<limbs_count> inv12= {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
static constexpr storage<limbs_count> inv13= {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
static constexpr storage<limbs_count> inv14= {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
static constexpr storage<limbs_count> inv15= {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
static constexpr storage<limbs_count> inv16= {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
static constexpr storage<limbs_count> inv17= {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
static constexpr storage<limbs_count> inv18= {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
static constexpr storage<limbs_count> inv19= {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
static constexpr storage<limbs_count> inv20= {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
static constexpr storage<limbs_count> inv21= {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
static constexpr storage<limbs_count> inv22= {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
static constexpr storage<limbs_count> inv23= {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
static constexpr storage<limbs_count> inv24= {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
static constexpr storage<limbs_count> inv25= {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
static constexpr storage<limbs_count> inv26= {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
static constexpr storage<limbs_count> inv27= {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
static constexpr storage<limbs_count> inv28= {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
static constexpr storage<limbs_count> inv29= {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
};
struct fq_config{
static constexpr unsigned limbs_count = 12;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
static constexpr unsigned modulus_bits_count = 377;
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
static constexpr storage<limbs_count> generator_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// TODO: correct parameters for G2 here
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
}

View File

@@ -1,22 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
}
#endif

View File

@@ -1,5 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

176
icicle/curves/bls12_381.cuh Normal file
View File

@@ -0,0 +1,176 @@
#pragma once
#include "../utils/storage.cuh"
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8;
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
static constexpr unsigned modulus_bits_count = 255;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
// Quick fix for linking issue
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
};
struct fq_config {
// field structure size = 12 * 32 bit
static constexpr unsigned limbs_count = 12;
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
static constexpr unsigned modulus_bits_count = 381;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
};
struct group_generator {
static constexpr storage<fq_config::limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
static constexpr storage<fq_config::limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
};
static constexpr unsigned weierstrass_b = 4;

View File

@@ -1,22 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BLS12_381 {
typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_381::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_re },
point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}

View File

@@ -1,308 +0,0 @@
#ifndef _BLS12_381_LDE
#define _BLS12_381_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_evaluations, BLS12_381::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_evaluations, BLS12_381::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,89 +0,0 @@
#ifndef _BLS12_381_MSM
#define _BLS12_381_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[],
BLS12_381::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381::affine_t points[],
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,193 +0,0 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BLS12_381{
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8;
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
static constexpr unsigned modulus_bits_count = 255;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
// Quick fix for linking issue
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
};
struct fq_config {
// field structure size = 12 * 32 bit
static constexpr unsigned limbs_count = 12;
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
static constexpr unsigned modulus_bits_count = 381;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
static constexpr storage<limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
}

View File

@@ -1,19 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
}
#endif

View File

@@ -1,4 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,65 +0,0 @@
#ifndef _BLS12_381_VEC_MULT
#define _BLS12_381_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
BLS12_381::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_bls12_381(BLS12_381::scalar_t *inout,
BLS12_381::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bls12_381(BLS12_381::scalar_t *matrix_flattened,
BLS12_381::scalar_t *input,
BLS12_381::scalar_t *output,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

View File

@@ -1,22 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BN254 {
typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BN254::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_BN254::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BN254::weierstrass_b_g2_re },
point_field_t{ PARAMS_BN254::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}

View File

@@ -1,308 +0,0 @@
#ifndef _BN254_LDE
#define _BN254_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BN254::scalar_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BN254::projective_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BN254::scalar_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BN254::projective_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_evaluations, BN254::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_evaluations, BN254::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
unsigned n, BN254::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
unsigned n, BN254::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bn254(BN254::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,87 +0,0 @@
#ifndef _BN254_MSM
#define _BN254_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
BN254::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t points[],
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,155 +0,0 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BN254{
struct fp_config{
static constexpr unsigned limbs_count = 8;
////
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
static constexpr storage<limbs_count> omega_inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega_inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega_inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega_inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega_inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega_inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega_inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega_inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega_inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega_inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega_inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega_inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega_inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega_inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega_inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega_inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
////
static constexpr storage<limbs_count> inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bits_count = 254;
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
};
struct fq_config{
static constexpr unsigned limbs_count = 8;
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522, 0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45, 0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<2*limbs_count> modulus_wide = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bits_count = 254;
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17, 0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_x_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_x_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// TODO: correct parameters for G2 here
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
}

View File

@@ -1,19 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
}
#endif

View File

@@ -1,4 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,72 +0,0 @@
#ifndef _BN254_VEC_MULT
#define _BN254_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
BN254::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_bn254(BN254::scalar_t *inout,
BN254::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bn254(BN254::scalar_t *matrix_flattened,
BN254::scalar_t *input,
BN254::scalar_t *output,
size_t n_elments,
size_t device_id)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

View File

@@ -0,0 +1,14 @@
#pragma once
#include "../primitives/field.cuh"
#include "../primitives/projective.cuh"
#include "bls12_381.cuh"
// #include "bn254.cuh"
typedef Field<fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, group_generator, weierstrass_b> projective_t;
typedef Affine<point_field_t> affine_t;

View File

@@ -0,0 +1,86 @@
#pragma once
#include "../utils/storage.cuh"
// y^2 = weierstrass_a * x^3 + weierstrass_b
static constexpr unsigned weierstrass_b = 4;
// a generator of the elliptic curve group
struct group_generator {
static constexpr storage<fq_config::limbs_count> generator_x = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> generator_y = {0x4abe706c, 0x5ea93e35, 0x00e1de5d, 0x6346b8ed, 0x92848344, 0xda9dd85e,
0xc9926b26, 0xc760f988, 0xf3763e9b, 0xb33cffc3, 0xd40d6212, 0x0a989bad};
};
/// SCALAR FIELD
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8; // array size of 32bit int fo form a field element
static constexpr unsigned modulus_bits_count = 255; // field bit size
// field modulus split into array, ordered in Little-Endian
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513 -> 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// Scalar specific
static constexpr storage<limbs_count> omega = {0xa5d36306, 0xe206da11, 0x378fbf96, 0x0ad1347b, 0xe0f8245f, 0xfc3e8acf, 0xa0f704f4, 0x564c0a11};
static constexpr storage<limbs_count> omega_inv = {3629396834, 2518295853, 1679307267, 1346818424, 3118225798, 1256349690, 3322524792, 958081110};
static constexpr storage<limbs_count> inv_2 = {2147483649,2147483647,2147429887,2849952257,80800770,429714436,2496577188,972477353};
static constexpr storage<limbs_count> inv_4 = {1073741825,1073741823,1073661183,4274928386,121201155,644571654,1597382134,1458716030};
static constexpr storage<limbs_count> inv_256 = {16777217,16777215,4244528547,1315563102,26752557,3943079472,3597918154,1937357227};
static constexpr storage<limbs_count> inv_512 = {8388609,8388607,4269694161,1360250160,94177049,2401254172,2148052617,1941155967};
static constexpr storage<limbs_count> inv_4096 = {1048577,1074790399,3217972249,3546834984,2300657127,1589027946,3026903920,1944479864};
};
/// BASE FIELD
struct fq_config {
// field structure size = 12 * 32 bit
static constexpr unsigned limbs_count = 12; // array size of 32bit int fo form a field element
static constexpr unsigned modulus_bits_count = 381; // field bit size
// field modulus split into array, ordered in Little-Endian
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787 -> 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<2 * limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
};

View File

@@ -1,14 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BN254 {
typedef Field<CURVE_NAME_U::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<CURVE_NAME_U::fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, CURVE_NAME_U::group_generator, CURVE_NAME_U::weierstrass_b> projective_t;
typedef Affine<point_field_t> affine_t;
}

View File

@@ -1,308 +0,0 @@
#ifndef _CURVE_NAME_U_LDE
#define _CURVE_NAME_U_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" CURVE_NAME_U::scalar_t* build_domain_cuda_CURVE_NAME_L(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,94 +0,0 @@
#ifndef _CURVE_NAME_U_MSM
#define _CURVE_NAME_U_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *out, CURVE_NAME_U::affine_t points[],
CURVE_NAME_U::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
if (count>256){
large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
}
else{
short_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
}
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* out, CURVE_NAME_U::affine_t points[],
CURVE_NAME_U::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,8 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2, size_t device_id = 0)
{
return (*point1 == *point2);
}

View File

@@ -1,4 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,66 +0,0 @@
#ifndef _CURVE_NAME_U_VEC_MULT
#define _CURVE_NAME_U_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_CURVE_NAME_L(CURVE_NAME_U::projective_t *inout,
CURVE_NAME_U::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<CURVE_NAME_U::projective_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_CURVE_NAME_L(CURVE_NAME_U::scalar_t *inout,
CURVE_NAME_U::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<CURVE_NAME_U::scalar_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_CURVE_NAME_L(CURVE_NAME_U::scalar_t *matrix_flattened,
CURVE_NAME_U::scalar_t *input,
CURVE_NAME_U::scalar_t *output,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<CURVE_NAME_U::scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

View File

@@ -1,3 +0,0 @@
#include "bls12_381/supported_operations.cu"
#include "bls12_377/supported_operations.cu"
#include "bn254/supported_operations.cu"

View File

@@ -1,149 +0,0 @@
#pragma once
#include "field.cuh"
#define HOST_INLINE __host__ __forceinline__
#define DEVICE_INLINE __device__ __forceinline__
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
template <typename CONFIG> class ExtensionField {
private:
typedef typename Field<CONFIG>::Wide FWide;
struct ExtensionWide {
FWide real;
FWide imaginary;
ExtensionField HOST_DEVICE_INLINE get_lower() {
return ExtensionField { real.get_lower(), imaginary.get_lower() };
}
ExtensionField HOST_DEVICE_INLINE get_higher_with_slack() {
return ExtensionField { real.get_higher_with_slack(), imaginary.get_higher_with_slack() };
}
};
friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys) {
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
}
// an incomplete impl that assumes that xs > ys
friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys) {
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
}
public:
typedef Field<CONFIG> FF;
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
FF real;
FF imaginary;
static constexpr HOST_DEVICE_INLINE ExtensionField zero() {
return ExtensionField { FF::zero(), FF::zero() };
}
static constexpr HOST_DEVICE_INLINE ExtensionField one() {
return ExtensionField { FF::one(), FF::zero() };
}
static constexpr HOST_DEVICE_INLINE ExtensionField generator_x() {
return ExtensionField { FF { CONFIG::generator_x_re }, FF { CONFIG::generator_x_im } };
}
static constexpr HOST_DEVICE_INLINE ExtensionField generator_y() {
return ExtensionField { FF { CONFIG::generator_y_re }, FF { CONFIG::generator_y_im } };
}
static HOST_INLINE ExtensionField rand_host() {
return ExtensionField { FF::rand_host(), FF::rand_host() };
}
template <unsigned REDUCTION_SIZE = 1> static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionField &xs) {
return ExtensionField { FF::reduce<REDUCTION_SIZE>(&xs.real), FF::reduce<REDUCTION_SIZE>(&xs.imaginary) };
}
friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs) {
os << "{ Real: " << xs.real << " }; { Imaginary: " << xs.imaginary << " }";
return os;
}
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys) {
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
}
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys) {
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys) {
FWide real_prod = FF::mul_wide(xs.real * ys.real);
FWide imaginary_prod = FF::mul_wide(xs.imaginary * ys.imaginary);
FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
FWide i_sq_times_im = FF::mul_unsigned<CONFIG::i_squared>(imaginary_prod);
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
}
friend HOST_DEVICE_INLINE ExtensionField operator*(const ExtensionField& xs, const ExtensionField& ys) {
FF real_prod = xs.real * ys.real;
FF imaginary_prod = xs.imaginary * ys.imaginary;
FF prod_of_sums = (xs.real + xs.imaginary) * (ys.real + ys.imaginary);
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
}
friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys) {
return (xs.real == ys.real) && (xs.imaginary == ys.imaginary);
}
friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) {
return !(xs == ys);
}
template <const ExtensionField& mutliplier>
static constexpr HOST_DEVICE_INLINE ExtensionField mul_const(const ExtensionField &xs) {
constexpr uint32_t mul_real = mutliplier.real.limbs_storage.limbs[0];
constexpr uint32_t mul_imaginary = mutliplier.imaginary.limbs_storage.limbs[0];
FF real_prod = FF::template mul_unsigned<mul_real>(xs.real);
FF imaginary_prod = FF::template mul_unsigned<mul_imaginary>(xs.imaginary);
FF re_im = FF::template mul_unsigned<mul_real>(xs.imaginary);
FF im_re = FF::template mul_unsigned<mul_imaginary>(xs.real);
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
return ExtensionField { real_prod + i_sq_times_im, re_im + im_re };
}
template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField &xs) {
return { FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary) };
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs) {
// TODO: change to a more efficient squaring
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs) {
// TODO: change to a more efficient squaring
return xs * xs;
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs) {
return ExtensionField { FF::neg(xs.real), FF::neg(xs.imaginary) };
}
// inverse assumes that xs is nonzero
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs) {
ExtensionField xs_conjugate = { xs.real, FF::neg(xs.imaginary) };
// TODO: wide here
FF xs_norm_squared = FF::sqr(xs.real) + FF::sqr(xs.imaginary);
return xs_conjugate * ExtensionField { FF::inverse(xs_norm_squared), FF::zero() };
}
};

View File

@@ -23,14 +23,6 @@ template <class CONFIG> class Field {
return Field { CONFIG::one };
}
static constexpr HOST_DEVICE_INLINE Field generator_x() {
return Field { CONFIG::generator_x };
}
static constexpr HOST_DEVICE_INLINE Field generator_y() {
return Field { CONFIG::generator_y };
}
static constexpr HOST_INLINE Field omega(uint32_t log_size) {
// Quick fix to linking issue, permanent fix will follow
switch (log_size) {
@@ -101,7 +93,6 @@ template <class CONFIG> class Field {
case 32:
return Field { CONFIG::omega32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::omega[log_size-1] };
}
@@ -175,7 +166,6 @@ template <class CONFIG> class Field {
case 32:
return Field { CONFIG::omega_inv32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::omega_inv[log_size-1] };
}
@@ -247,7 +237,6 @@ template <class CONFIG> class Field {
case 32:
return Field { CONFIG::inv32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::inv[log_size-1] };
}
@@ -255,13 +244,14 @@ template <class CONFIG> class Field {
return Field { CONFIG::modulus };
}
// private:
typedef storage<TLC> ff_storage;
typedef storage<2*TLC> ff_wide_storage;
static constexpr unsigned slack_bits = 32 * TLC - NBITS;
struct Wide {
struct wide {
ff_wide_storage limbs_storage;
Field HOST_DEVICE_INLINE get_lower() {
@@ -290,15 +280,15 @@ template <class CONFIG> class Field {
}
};
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys) {
Wide rs = {};
friend HOST_DEVICE_INLINE wide operator+(wide xs, const wide& ys) {
wide rs = {};
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return rs;
}
// an incomplete impl that assumes that xs > ys
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys) {
Wide rs = {};
friend HOST_DEVICE_INLINE wide operator-(wide xs, const wide& ys) {
wide rs = {};
sub_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return rs;
}
@@ -347,9 +337,7 @@ template <class CONFIG> class Field {
const uint32_t *y = ys.limbs;
uint32_t *r = rs.limbs;
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
#ifdef __CUDA_ARCH__
#pragma unroll
#endif
for (unsigned i = 1; i < (CARRY_OUT ? TLC : TLC - 1); i++)
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
if (!CARRY_OUT) {
@@ -365,9 +353,7 @@ template <class CONFIG> class Field {
const uint32_t *y = ys.limbs;
uint32_t *r = rs.limbs;
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
#ifdef __CUDA_ARCH__
#pragma unroll
#endif
for (unsigned i = 1; i < (CARRY_OUT ? 2 * TLC : 2 * TLC - 1); i++)
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
if (!CARRY_OUT) {
@@ -401,6 +387,14 @@ template <class CONFIG> class Field {
return CARRY_OUT ? carry : 0;
}
static constexpr HOST_INLINE uint32_t sub_limbs_partial_host(uint32_t* x, uint32_t* y, uint32_t* r, uint32_t num_limbs) {
uint32_t carry = 0;
host_math::carry_chain<2 * TLC, false, true> chain;
for (unsigned i = 0; i < num_limbs; i++)
r[i] = chain.sub(x[i], y[i], carry);
return carry;
}
template <bool CARRY_OUT, typename T> static constexpr HOST_DEVICE_INLINE uint32_t add_limbs(const T &xs, const T &ys, T &rs) {
#ifdef __CUDA_ARCH__
return add_sub_limbs_device<false, CARRY_OUT>(xs, ys, rs);
@@ -425,17 +419,41 @@ template <class CONFIG> class Field {
}
}
static DEVICE_INLINE void mul_n_msb(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC, size_t start_i = 0) {
#pragma unroll
for (size_t i = start_i; i < n; i += 2) {
acc[i] = ptx::mul_lo(a[i], bi);
acc[i + 1] = ptx::mul_hi(a[i], bi);
}
}
static DEVICE_INLINE void cmad_n(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC) {
// multiply scalar by vector
// acc = acc + bi*A[::2]
acc[0] = ptx::mad_lo_cc(a[0], bi, acc[0]);
acc[1] = ptx::madc_hi_cc(a[0], bi, acc[1]);
#pragma unroll
#pragma unroll
for (size_t i = 2; i < n; i += 2) {
acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
}
}
static DEVICE_INLINE void cmad_n_msb(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC, size_t a_start_idx=0) {
// multiply scalar by vector
// acc = acc + bi*A[::2]
acc[a_start_idx] = ptx::mad_lo_cc(a[a_start_idx], bi, acc[a_start_idx]);
acc[a_start_idx + 1] = ptx::madc_hi_cc(a[a_start_idx], bi, acc[a_start_idx + 1]);
#pragma unroll
for (size_t i = a_start_idx + 2; i < n; i += 2) {
acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
}
}
static DEVICE_INLINE void mad_row(uint32_t *odd, uint32_t *even, const uint32_t *a, uint32_t bi, size_t n = TLC) {
// odd = odd + bi*A
// even = even + bi*A
cmad_n(odd, a + 1, bi, n - 2);
odd[n - 2] = ptx::madc_lo_cc(a[n - 1], bi, 0);
odd[n - 1] = ptx::madc_hi(a[n - 1], bi, 0);
@@ -443,6 +461,16 @@ template <class CONFIG> class Field {
odd[n - 1] = ptx::addc(odd[n - 1], 0);
}
static DEVICE_INLINE void mad_row_msb(uint32_t *odd, uint32_t *even, const uint32_t *a, uint32_t bi, size_t n = TLC, size_t a_start_idx = 0) {
// odd = odd + bi*A
// even = even + bi*A
cmad_n_msb(odd, a + 1, bi, n - 2, a_start_idx - 1);
odd[n - 2] = ptx::madc_lo_cc(a[n - 1], bi, 0);
odd[n - 1] = ptx::madc_hi(a[n - 1], bi, 0);
cmad_n_msb(even, a, bi, n, a_start_idx);
odd[n - 1] = ptx::addc(odd[n - 1], 0);
}
static DEVICE_INLINE void multiply_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
const uint32_t *a = as.limbs;
const uint32_t *b = bs.limbs;
@@ -464,13 +492,289 @@ template <class CONFIG> class Field {
even[i + 1] = ptx::addc(even[i + 1], 0);
}
static DEVICE_INLINE void mult_no_carry(uint32_t a, uint32_t b, uint32_t *r) {
r[0] = ptx::mul_lo(a, b);
r[1] = ptx::mul_hi(a, b);
}
static DEVICE_INLINE void ingo_multiply_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
const uint32_t *a = as.limbs;
const uint32_t *b = bs.limbs;
uint32_t *r = rs.limbs;
uint32_t i, j;
uint32_t *even = rs.limbs;
__align__(8) uint32_t odd[2 * TLC];
for (uint32_t i = 0; i < 2 * TLC; i++)
{
even[i] = 0;
odd[i] = 0;
}
// first row special case, no carry in no carry out. split to non parts, even and odd.
for (i = 0; i < TLC - 1; i+=2 )
{
mult_no_carry(b[0], a[i], &even[i]);
mult_no_carry(b[0], a[i + 1], &odd[i]);
}
// doing two rows at one loop
for (i = 1; i < TLC - 1; i+=2)
{
// odd bi's
// multiply accumulate even part of new row with odd part prev row (needs a carry)
// // j = 0, no carry in, only carry out
odd[i - 1] = ptx::mad_lo_cc(a[0], b[i], odd[i - 1]);
odd[i] = ptx::madc_hi_cc(a[0], b[i], odd[i]);
// for loop carry in carry out
for (j = 2; j < TLC; j+=2) // 2, 4, 6
{
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
}
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
// multiply accumulate odd part of new row with even part prev row (doesnt need a carry)
// j = 1, no carry in, only carry out
even[i + 1] = ptx::mad_lo_cc(a[1], b[i], even[i + 1]);
even[i + 2] = ptx::madc_hi_cc(a[1], b[i], even[i + 2]);
// for loop carry in carry out
for (j = 3; j < TLC; j+=2)
{
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
}
// even bi's
// multiply accumulate even part of new row with even part of prev row // needs a carry
// j = 0, no carry in, only carry out
even[i + 1] = ptx::mad_lo_cc(a[0], b[i + 1], even[i + 1]);
even[i + 2] = ptx::madc_hi_cc(a[0], b[i + 1], even[i + 2]);
// for loop, carry in, carry out.
for (j = 2; j < TLC; j+=2)
{
even[i + j + 1] = ptx::madc_lo_cc(a[j], b[i + 1], even[i + j + 1]);
even[i + j + 2] = ptx::madc_hi_cc(a[j], b[i + 1], even[i + j + 2]);
}
even[i + j + 1] = ptx::addc(even[i + j + 1], 0); // handling last carry
// multiply accumulate odd part of new row with odd part of prev row
// j = 1, no carry in, only carry out
odd[i + 1] = ptx::mad_lo_cc(a[1], b[i + 1], odd[i + 1]);
odd[i + 2] = ptx::madc_hi_cc(a[1], b[i + 1], odd[i + 2]);
// for loop, carry in, carry out.
for (j = 3; j < TLC; j+=2)
{
odd[i + j] = ptx::madc_lo_cc(a[j], b[i + 1], odd[i + j]);
odd[i + j + 1] = ptx::madc_hi_cc(a[j], b[i + 1], odd[i + j + 1]);
}
}
odd[i - 1] = ptx::mad_lo_cc(a[0], b[i], odd[i - 1]);
odd[i] = ptx::madc_hi_cc(a[0], b[i], odd[i]);
// for loop carry in carry out
for (j = 2; j < TLC; j+=2)
{
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
}
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
// multiply accumulate odd part of new row with even part prev row
// j = 1, no carry in, only carry out
even[i + 1] = ptx::mad_lo_cc(a[1], b[i], even[i + 1]);
even[i + 2] = ptx::madc_hi_cc(a[1], b[i], even[i + 2]);
// for loop carry in carry out
for (j = 3; j < TLC; j+=2)
{
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
}
// add even and odd parts
even[1] = ptx::add_cc(even[1], odd[0]);
for (i = 1; i < 2 * TLC - 2; i++)
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
even[i + 1] = ptx::addc(even[i + 1], 0);
}
static DEVICE_INLINE void ingo_msb_multiply_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
const uint32_t *a = as.limbs;
const uint32_t *b = bs.limbs;
uint32_t *r = rs.limbs;
uint32_t i, j;
uint32_t *even = rs.limbs;
__align__(8) uint32_t odd[2 * TLC];
for (uint32_t i = 0; i < 2 * TLC; i++)
{
even[i] = 0;
odd[i] = 0;
}
// only last element from first row.
mult_no_carry(b[0], a[TLC - 1], &odd[TLC - 2]);
// doing two rows at one loop
#pragma unroll
for (i = 1; i < TLC - 1; i+=2)
{
const uint32_t first_active_j = TLC - 1 - i;
const uint32_t first_active_j_odd = first_active_j + (1 - (first_active_j % 2));
const uint32_t first_active_j_even = first_active_j + first_active_j % 2 ;
// odd bi's
// multiply accumulate even part of new row with odd part prev row (needs a carry)
// j = 0, no carry in, only carry out
odd[first_active_j_even + i - 1] = ptx::mad_lo_cc(a[first_active_j_even], b[i], odd[first_active_j_even + i - 1]);
odd[first_active_j_even + i] = ptx::madc_hi_cc(a[first_active_j_even], b[i], odd[first_active_j_even + i]);
// for loop carry in carry out
#pragma unroll
for (j = first_active_j_even + 2; j < TLC; j+=2)
{
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
}
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
// multiply accumulate odd part of new row with even part prev row (doesnt need a carry)
// j = 1, no carry in, only carry out
even[i + first_active_j_odd] = ptx::mad_lo_cc(a[first_active_j_odd], b[i], even[i + first_active_j_odd]);
even[i + first_active_j_odd + 1] = ptx::madc_hi_cc(a[first_active_j_odd], b[i], even[i + first_active_j_odd + 1]);
// for loop carry in carry out
#pragma unroll
for (j = first_active_j_odd + 2; j < TLC; j+=2)
{
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
}
// even bi's
uint32_t const first_active_j1 = TLC - 1 - (i + 1) ;
uint32_t const first_active_j_odd1 = first_active_j1 + (1 - (first_active_j1 % 2));
uint32_t const first_active_j_even1 = first_active_j1 + first_active_j1 % 2;
// multiply accumulate even part of new row with even part of prev row // needs a carry
// j = 0, no carry in, only carry out
even[first_active_j_even1 + i + 1] = ptx::mad_lo_cc(a[first_active_j_even1], b[i + 1], even[first_active_j_even1 + i + 1]);
even[first_active_j_even1 + i + 2] = ptx::madc_hi_cc(a[first_active_j_even1], b[i + 1], even[first_active_j_even1 + i + 2]);
// for loop, carry in, carry out.
#pragma unroll
for (j = first_active_j_even1 + 2; j < TLC; j+=2)
{
even[i + j + 1] = ptx::madc_lo_cc(a[j], b[i + 1], even[i + j + 1]);
even[i + j + 2] = ptx::madc_hi_cc(a[j], b[i + 1], even[i + j + 2]);
}
even[i + j + 1] = ptx::addc(even[i + j + 1], 0); // handling last carry
// multiply accumulate odd part of new row with odd part of prev row
// j = 1, no carry in, only carry out
odd[first_active_j_odd1 + i] = ptx::mad_lo_cc(a[first_active_j_odd1], b[i + 1], odd[first_active_j_odd1 + i]);
odd[first_active_j_odd1+ i + 1] = ptx::madc_hi_cc(a[first_active_j_odd1], b[i + 1], odd[first_active_j_odd1 + i + 1]);
// for loop, carry in, carry out.
#pragma unroll
for (j = first_active_j_odd1 + 2; j < TLC; j+=2)
{
odd[i + j] = ptx::madc_lo_cc(a[j], b[i + 1], odd[i + j]);
odd[i + j + 1] = ptx::madc_hi_cc(a[j], b[i + 1], odd[i + j + 1]);
}
}
// last round, i = TLC - 1
odd[i - 1] = ptx::mad_lo_cc(a[0], b[i], odd[i - 1]);
odd[i] = ptx::madc_hi_cc(a[0], b[i], odd[i]);
// for loop carry in carry out
#pragma unroll
for (j = 2; j < TLC; j+=2)
{
odd[i + j - 1] = ptx::madc_lo_cc(a[j], b[i], odd[i + j - 1]);
odd[i + j] = ptx::madc_hi_cc(a[j], b[i], odd[i + j]);
}
odd[i + j - 1] = ptx::addc(odd[i + j - 1], 0); // handling last carry
// multiply accumulate odd part of new row with even part prev row
// j = 1, no carry in, only carry out
even[i + 1] = ptx::mad_lo_cc(a[1], b[i], even[i + 1]);
even[i + 2] = ptx::madc_hi_cc(a[1], b[i], even[i + 2]);
// for loop carry in carry out
#pragma unroll
for (j = 3; j < TLC; j+=2)
{
even[i + j] = ptx::madc_lo_cc(a[j], b[i], even[i + j]);
even[i + j + 1] = ptx::madc_hi_cc(a[j], b[i], even[i + j + 1]);
}
// add even and odd parts
even[1] = ptx::add_cc(even[1], odd[0]);
#pragma unroll
for (i = 1; i < 2 * TLC - 2; i++)
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
even[i + 1] = ptx::addc(even[i + 1], 0);
}
static DEVICE_INLINE void multiply_lsb_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
// r = a * b is correcrt for the first TLC + 1 digits. (not computing from TLC + 1 to 2*TLC - 2).
const uint32_t *a = as.limbs;
const uint32_t *b = bs.limbs;
uint32_t *even = rs.limbs;
__align__(8) uint32_t odd[2 * TLC - 2];
mul_n(even, a, b[0]);
mul_n(odd, a + 1, b[0]);
mad_row(&even[2], &odd[0], a, b[1]);
size_t i;
#pragma unroll
for (i = 2; i < TLC - 1; i += 2) {
mad_row(&odd[i], &even[i], a, b[i], TLC - i + 2);
mad_row(&even[i + 2], &odd[i], a, b[i + 1], TLC - i + 2);
}
// merge |even| and |odd|
even[1] = ptx::add_cc(even[1], odd[0]);
for (i = 1; i < TLC + 1; i++)
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
even[i + 1] = ptx::addc(even[i + 1], 0);
}
static DEVICE_INLINE void multiply_msb_raw_device(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
const uint32_t *a = as.limbs;
const uint32_t *b = bs.limbs;
uint32_t *even = rs.limbs;
__align__(8) uint32_t odd[2 * TLC - 2];
for (int i=0; i<2*TLC - 1; i++)
{
even[i] = 0;
odd[i] = 0;
}
uint32_t min_indexes_sum = TLC - 1;
// only diagonal
mul_n_msb(even, a, b[0], TLC, min_indexes_sum);
mul_n_msb(odd, a + 1, b[0], TLC, min_indexes_sum - 1);
mad_row_msb(&even[2], &odd[0], a, b[1], TLC, min_indexes_sum - 1);
size_t i;
#pragma unroll
for (i = 2; i < TLC - 1; i += 2) {
mad_row(&odd[i], &even[i], a, b[i]);
mad_row(&even[i + 2], &odd[i], a, b[i + 1]);
}
// merge |even| and |odd|
even[1] = ptx::add_cc(even[1], odd[0]);
for (i = 1; i < 2 * TLC - 2; i++)
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
even[i + 1] = ptx::addc(even[i + 1], 0);
}
static HOST_INLINE void multiply_raw_host(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
const uint32_t *a = as.limbs;
const uint32_t *b = bs.limbs;
uint32_t *r = rs.limbs;
for (unsigned i = 0; i < TLC; i++) {
uint32_t carry = 0;
for (unsigned j = 0; j < TLC; j++)
for (unsigned j = 0; j < TLC; j++)
r[j + i] = host_math::madc_cc(a[j], b[i], r[j + i], carry);
r[TLC + i] = carry;
}
@@ -484,6 +788,22 @@ template <class CONFIG> class Field {
#endif
}
static HOST_DEVICE_INLINE void multiply_raw_lsb(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
#ifdef __CUDA_ARCH__
return multiply_lsb_raw_device(as, bs, rs);
#else
return multiply_raw_host(as, bs, rs);
#endif
}
static HOST_DEVICE_INLINE void multiply_raw_msb(const ff_storage &as, const ff_storage &bs, ff_wide_storage &rs) {
#ifdef __CUDA_ARCH__
return ingo_msb_multiply_raw_device(as, bs, rs);
#else
return multiply_raw_host(as, bs, rs);
#endif
}
public:
ff_storage limbs_storage;
@@ -495,6 +815,8 @@ template <class CONFIG> class Field {
const uint32_t limb_lsb_idx = (digit_num*digit_width) / 32;
const uint32_t shift_bits = (digit_num*digit_width) % 32;
unsigned rv = limbs_storage.limbs[limb_lsb_idx] >> shift_bits;
// printf("get_scalar_func digit %u rv %u\n",digit_num,rv);
// if (shift_bits + digit_width > 32) {
if ((shift_bits + digit_width > 32) && (limb_lsb_idx+1 < TLC)) {
rv += limbs_storage.limbs[limb_lsb_idx + 1] << (32 - shift_bits);
}
@@ -505,7 +827,6 @@ template <class CONFIG> class Field {
static HOST_INLINE Field rand_host() {
std::random_device rd;
std::mt19937_64 generator(rd());
// std::mt19937_64 generator(0);
std::uniform_int_distribution<unsigned> distribution;
Field value{};
for (unsigned i = 0; i < TLC; i++)
@@ -531,13 +852,13 @@ template <class CONFIG> class Field {
return os;
}
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
Field rs = {};
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return reduce<1>(rs);
}
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
Field rs = {};
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
if (carry == 0)
@@ -548,26 +869,53 @@ template <class CONFIG> class Field {
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE Wide mul_wide(const Field& xs, const Field& ys) {
Wide rs = {};
static constexpr HOST_DEVICE_INLINE wide mul_wide(const Field& xs, const Field& ys) {
wide rs = {};
multiply_raw(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return rs;
}
static constexpr DEVICE_INLINE uint32_t sub_limbs_partial_device(uint32_t *x, uint32_t *y, uint32_t *r, uint32_t num_limbs) {
r[0] = ptx::sub_cc(x[0], y[0]);
#pragma unroll
for (unsigned i = 1; i < num_limbs; i++)
r[i] = ptx::subc_cc(x[i], y[i]);
return ptx::subc(0, 0);
}
static constexpr HOST_DEVICE_INLINE uint32_t sub_limbs_partial(uint32_t *x, uint32_t *y, uint32_t *r, uint32_t num_limbs) {
#ifdef __CUDA_ARCH__
return sub_limbs_partial_device(x, y, r, num_limbs);
#else
return sub_limbs_partial_host(x, y, r, num_limbs);
#endif
}
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys) {
Wide xy = mul_wide(xs, ys);
Field xy_hi = xy.get_higher_with_slack();
Wide l = {};
multiply_raw(xy_hi.limbs_storage, get_m(), l.limbs_storage);
//printf("operator* called \n");
wide xy = mul_wide(xs, ys); // full mult
Field xy_hi = xy.get_higher_with_slack(); // xy << slack_bits
wide l = {};
multiply_raw_msb(xy_hi.limbs_storage, get_m(), l.limbs_storage); // MSB mult
Field l_hi = l.get_higher_with_slack();
Wide lp = {};
multiply_raw(l_hi.limbs_storage, get_modulus(), lp.limbs_storage);
Wide r_wide = xy - lp;
Wide r_wide_reduced = {};
uint32_t reduced = sub_limbs<true>(r_wide.limbs_storage, modulus_wide(), r_wide_reduced.limbs_storage);
r_wide = reduced ? r_wide : r_wide_reduced;
wide lp = {};
multiply_raw_lsb(l_hi.limbs_storage, get_modulus(), lp.limbs_storage); // LSB mult
wide r_wide = xy - lp;
wide r_wide_reduced = {};
// uint32_t reduced = sub_limbs<true>(r_wide.limbs_storage, modulus_wide(), r_wide_reduced.limbs_storage);
// r_wide = reduced ? r_wide : r_wide_reduced;
for (unsigned i = 0; i < TLC + 1; i++)
{
uint32_t carry = sub_limbs_partial(r_wide.limbs_storage.limbs, modulus_wide().limbs, r_wide_reduced.limbs_storage.limbs, TLC + 1);
if (carry == 0) // continue to reduce
r_wide = r_wide_reduced;
else // done
break;
}
// number of wrap around is bounded by TLC + 1 times.
Field r = r_wide.get_lower();
return reduce<1>(r);
return (r);
}
friend HOST_DEVICE_INLINE bool operator==(const Field& xs, const Field& ys) {
@@ -591,24 +939,22 @@ template <class CONFIG> class Field {
return !(xs == ys);
}
template <const Field& multiplier, class T> static constexpr HOST_DEVICE_INLINE T mul_const(const T &xs) {
return mul_unsigned<multiplier.limbs_storage.limbs[0], T>(xs);
}
template <uint32_t mutliplier, class T, unsigned REDUCTION_SIZE = 1>
static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T &xs) {
T rs = {};
T temp = xs;
template <unsigned REDUCTION_SIZE = 1>
static constexpr HOST_DEVICE_INLINE Field mul(const unsigned scalar, const Field &xs) {
Field rs = {};
Field temp = xs;
unsigned l = scalar;
bool is_zero = true;
#ifdef __CUDA_ARCH__
#pragma unroll
#endif
for (unsigned i = 0; i < 32; i++) {
if (mutliplier & (1 << i)) {
if (l & 1) {
rs = is_zero ? temp : (rs + temp);
is_zero = false;
}
if (mutliplier & ((1 << (31 - i) - 1) << (i + 1)))
l >>= 1;
if (l == 0)
break;
temp = temp + temp;
}
@@ -616,7 +962,7 @@ template <class CONFIG> class Field {
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const Field& xs) {
static constexpr HOST_DEVICE_INLINE wide sqr_wide(const Field& xs) {
// TODO: change to a more efficient squaring
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
}

View File

@@ -1,49 +1,8 @@
#include <cuda.h>
#include "../curves/bls12_381/curve_config.cuh"
#include "../curves/bls12_377/curve_config.cuh"
#include "../curves/bn254/curve_config.cuh"
#include "../curves/curve_config.cuh"
#include "projective.cuh"
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
extern "C" bool eq(projective_t *point1, projective_t *point2, size_t device_id = 0)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
}
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
}
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
}
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
}
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
}
#endif
return (*point1 == *point2);
}

View File

@@ -2,7 +2,7 @@
#include "affine.cuh"
template <typename FF, class SCALAR_FF, const FF& B_VALUE>
template <class FF, class SCALAR_FF, class GEN, unsigned B_VALUE>
class Projective {
friend Affine<FF>;
@@ -11,6 +11,10 @@ class Projective {
FF y;
FF z;
static HOST_DEVICE_INLINE Projective generator() {
return { FF { GEN::generator_x }, FF { GEN::generator_y }, FF::one()};
}
static HOST_DEVICE_INLINE Projective zero() {
return {FF::zero(), FF::one(), FF::zero()};
}
@@ -24,56 +28,50 @@ class Projective {
return {point.x, point.y, FF::one()};
}
static HOST_DEVICE_INLINE Projective generator() {
return {FF::generator_x(), FF::generator_y(), FF::one()};
}
static HOST_DEVICE_INLINE Projective neg(const Projective &point) {
return {point.x, FF::neg(point.y), point.z};
}
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2) {
const FF X1 = p1.x; // < 2
const FF Y1 = p1.y; // < 2
const FF Z1 = p1.z; // < 2
const FF X2 = p2.x; // < 2
const FF Y2 = p2.y; // < 2
const FF Z2 = p2.z; // < 2
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
const FF t07 = t05 - t06; // t05 ← t05 t06 < 2
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
const FF t12 = t10 - t11; // t12 ← t10 t11 < 2
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
const FF t17 = t15 - t16; // t17 ← t15 t16 < 2
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
const FF t20 = FF::template mul_unsigned<3>(
FF::template mul_const<B_VALUE>(t02)); // t20b3 · t02 < 2
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
const FF t22 = t01 - t20; // t22t01 t20 < 2
const FF t23 = FF::template mul_unsigned<3>(
FF::template mul_const<B_VALUE>(t17)); // t23b3 · t17 < 2
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
const FF t25 = t07 * t22; // t25 ← t07 · t22 < 2
const FF X3 = t25 - t24; // X3 ← t25 t24 < 2
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
const FF X1 = p1.x; // < 2
const FF Y1 = p1.y; // < 2
const FF Z1 = p1.z; // < 2
const FF X2 = p2.x; // < 2
const FF Y2 = p2.y; // < 2
const FF Z2 = p2.z; // < 2
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
const FF t07 = t05 - t06; // t05 ← t05 t06 < 2
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
const FF t12 = t10 - t11; // t12 ← t10 t11 < 2
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
const FF t17 = t15 - t16; // t17 ← t15 t16 < 2
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
const FF t20 = FF::mul(3 * B_VALUE, t02); // t20 ← b3 · t02 < 2
const FF t21 = t01 + t20; // t21t01 + t20 < 2
const FF t22 = t01 - t20; // t22 ← t01 t20 < 2
const FF t23 = FF::mul(3 * B_VALUE, t17); // t23b3 · t17 < 2
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
const FF t25 = t07 * t22; // t25t07 · t22 < 2
const FF X3 = t25 - t24; // X3 ← t25 t24 < 2
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
return {X3, Y3, Z3};
}
@@ -87,7 +85,7 @@ class Projective {
}
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point) {
os << "Point { x: " << point.x << "; y: " << point.y << "; z: " << point.z << " }";
os << "x: " << point.x << "; y: " << point.y << "; z: " << point.z;
return os;
}
@@ -97,7 +95,7 @@ class Projective {
friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point) {
Projective res = zero();
#ifdef __CUDA_ARCH__
#ifdef CUDA_ARCH
#pragma unroll
#endif
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
@@ -122,7 +120,7 @@ class Projective {
static HOST_DEVICE_INLINE bool is_on_curve(const Projective &point) {
if (is_zero(point))
return true;
bool eq_holds = (FF::template mul_const<B_VALUE>(FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
bool eq_holds = (FF::mul(B_VALUE, FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
return point.z != FF::zero() && eq_holds;
}

View File

@@ -1,8 +1,9 @@
#include <cuda_runtime.h>
#include <gtest/gtest.h>
#include "test_kernels.cuh"
#include <iostream>
#include <boost/multiprecision/cpp_int.hpp>
namespace mp = boost::multiprecision;
template <class T>
int device_populate_random(T* d_elements, unsigned n) {
@@ -20,92 +21,90 @@ int device_set(T* d_elements, T el, unsigned n) {
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
}
mp::int1024_t convert_to_boost_mp(uint32_t *a, uint32_t length)
{
mp::int1024_t res = 0;
for (uint32_t i = 0; i < length; i++)
{
res += (mp::int1024_t)(a[i]) << 32 * i;
}
return res;
}
class PrimitivesTest : public ::testing::Test {
protected:
static const unsigned n = 1 << 5;
static const unsigned n = 1 << 10;
proj *points1{};
proj *points2{};
g2_proj *g2_points1{};
g2_proj *g2_points2{};
scalar_field *scalars1{};
scalar_field *scalars2{};
proj *zero_points{};
g2_proj *g2_zero_points{};
scalar_field *zero_scalars{};
scalar_field *one_scalars{};
affine *aff_points{};
g2_affine *g2_aff_points{};
proj *res_points1{};
proj *res_points2{};
g2_proj *g2_res_points1{};
g2_proj *g2_res_points2{};
scalar_field *res_scalars1{};
scalar_field *res_scalars2{};
scalar_field::wide *res_scalars_wide{};
scalar_field::wide *res_scalars_wide_full{};
PrimitivesTest() {
assert(!cudaDeviceReset());
assert(!cudaMallocManaged(&points1, n * sizeof(proj)));
assert(!cudaMallocManaged(&points2, n * sizeof(proj)));
assert(!cudaMallocManaged(&g2_points1, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&g2_points2, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&scalars1, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&scalars2, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&zero_points, n * sizeof(proj)));
assert(!cudaMallocManaged(&g2_zero_points, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&zero_scalars, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&one_scalars, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&aff_points, n * sizeof(affine)));
assert(!cudaMallocManaged(&g2_aff_points, n * sizeof(g2_affine)));
assert(!cudaMallocManaged(&res_points1, n * sizeof(proj)));
assert(!cudaMallocManaged(&res_points2, n * sizeof(proj)));
assert(!cudaMallocManaged(&g2_res_points1, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&g2_res_points2, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&res_scalars1, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&res_scalars2, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&res_scalars_wide, n * sizeof(scalar_field::wide)));
assert(!cudaMallocManaged(&res_scalars_wide_full, n * sizeof(scalar_field::wide)));
}
~PrimitivesTest() override {
cudaFree(points1);
cudaFree(points2);
cudaFree(g2_points1);
cudaFree(g2_points2);
cudaFree(scalars1);
cudaFree(scalars2);
cudaFree(zero_points);
cudaFree(g2_zero_points);
cudaFree(zero_scalars);
cudaFree(one_scalars);
cudaFree(aff_points);
cudaFree(g2_aff_points);
cudaFree(res_points1);
cudaFree(res_points2);
cudaFree(g2_res_points1);
cudaFree(g2_res_points2);
cudaFree(res_scalars1);
cudaFree(res_scalars2);
cudaFree(res_scalars_wide);
cudaFree(res_scalars_wide_full);
cudaDeviceReset();
}
void SetUp() override {
ASSERT_EQ(device_populate_random<proj>(points1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<proj>(points2, n), cudaSuccess);
ASSERT_EQ(device_populate_random<g2_proj>(g2_points1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<g2_proj>(g2_points2, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_field>(scalars1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_field>(scalars2, n), cudaSuccess);
ASSERT_EQ(device_set<proj>(zero_points, proj::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<g2_proj>(g2_zero_points, g2_proj::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_field>(zero_scalars, scalar_field::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_field>(one_scalars, scalar_field::one(), n), cudaSuccess);
ASSERT_EQ(cudaMemset(aff_points, 0, n * sizeof(affine)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_aff_points, 0, n * sizeof(g2_affine)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_points1, 0, n * sizeof(proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_points2, 0, n * sizeof(proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_res_points1, 0, n * sizeof(g2_proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_res_points2, 0, n * sizeof(g2_proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars1, 0, n * sizeof(scalar_field)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars2, 0, n * sizeof(scalar_field)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars_wide, 0, n * sizeof(scalar_field::wide)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars_wide_full, 0, n * sizeof(scalar_field::wide)), cudaSuccess);
}
};
@@ -279,103 +278,188 @@ TEST_F(PrimitivesTest, ECMixedAdditionOfNegatedPointEqSubtraction) {
ASSERT_EQ(res_points1[i], points1[i] + res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECRandomPointsAreOnCurve) {
for (unsigned i = 0; i < 2; i++)
ASSERT_PRED1(g2_proj::is_on_curve, g2_points1[i]);
TEST_F(PrimitivesTest, MP_LSB_MULT) {
// LSB multiply, check correctness of first TLC + 1 digits result.
ASSERT_EQ(mp_lsb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
std::cout << "first GPU lsb mult output = 0x";
for (int i=0; i<2*scalar_field::TLC; i++)
{
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i];
}
std::cout << std::endl;
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
std::cout << "first GPU full mult output = 0x";
for (int i=0; i<2*scalar_field::TLC; i++)
{
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i];
}
std::cout << std::endl;
for (int j = 0; j < n; j++)
{
for (int i=0; i<scalar_field::TLC + 1; i++)
{
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
}
}
}
TEST_F(PrimitivesTest, G2ECPointAdditionSubtractionCancel) {
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_sub(g2_res_points1, g2_points2, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
TEST_F(PrimitivesTest, MP_MSB_MULT) {
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
ASSERT_EQ(mp_msb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
std::cout << "first GPU msb mult output = 0x";
for (int i=2*scalar_field::TLC - 1; i >=0 ; i--)
{
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
std::cout << "first GPU full mult output = 0x";
for (int i=2*scalar_field::TLC - 1; i >=0 ; i--)
{
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
for (int i=0; i < 2*scalar_field::TLC - 1; i++)
{
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
std::cout << "matched word idx = " << i << std::endl;
}
}
TEST_F(PrimitivesTest, G2ECPointZeroAddition) {
ASSERT_EQ(vec_add(g2_points1, g2_zero_points, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
TEST_F(PrimitivesTest, INGO_MP_MULT) {
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
ASSERT_EQ(ingo_mp_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
std::cout << "INGO = 0x";
for (int i=0; i < 2*scalar_field::TLC ; i++)
{
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
std::cout << "ZKSYNC = 0x";
for (int i=0; i < 2*scalar_field::TLC ; i++)
{
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
for (int i=0; i < 2*scalar_field::TLC - 1; i++)
{
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
std::cout << "matched word idx = " << i << std::endl;
}
for (int j=0; j<n; j++)
{
for (int i=0; i < 2*scalar_field::TLC - 1; i++)
{
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
}
}
}
TEST_F(PrimitivesTest, G2ECPointAdditionHostDeviceEq) {
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i] + g2_points2[i], g2_res_points1[i]);
TEST_F(PrimitivesTest, INGO_MP_MSB_MULT) {
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
ASSERT_EQ(ingo_mp_msb_mult(scalars1, scalars2, res_scalars_wide, n), cudaSuccess);
std::cout << "INGO MSB = 0x";
for (int i=2*scalar_field::TLC - 1; i >= 0 ; i--)
{
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
std::cout << "ZKSYNC = 0x";
for (int i=2*scalar_field::TLC - 1; i >= 0 ; i--)
{
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
// for (int i=scalar_field::TLC; i < 2*scalar_field::TLC - 1; i++)
// {
// ASSERT_EQ(in_bound, true);
// }
// for (int j=0; j<n; j++)
// {
// for (int i=0; i < 2*scalar_field::TLC - 1; i++)
// {
// ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
// }
// }
// mp testing
mp::int1024_t scalar_1_mp = 0;
mp::int1024_t scalar_2_mp = 0;
mp::int1024_t res_mp = 0;
mp::int1024_t res_gpu = 0;
uint32_t num_limbs = scalar_field::TLC;
for (int j=0; j<n; j++)
{
uint32_t* scalar1_limbs = scalars1[j].limbs_storage.limbs;
uint32_t* scalar2_limbs = scalars2[j].limbs_storage.limbs;
scalar_1_mp = convert_to_boost_mp(scalar1_limbs, num_limbs);
scalar_2_mp = convert_to_boost_mp(scalar2_limbs, num_limbs);
res_mp = scalar_1_mp * scalar_2_mp;
res_mp = res_mp >> (num_limbs * 32);
res_gpu = convert_to_boost_mp(&(res_scalars_wide[j]).limbs_storage.limbs[num_limbs], num_limbs);
std::cout << "res mp = " << res_mp << std::endl;
std::cout << "res gpu = " << res_gpu << std::endl;
std::cout << "error = " << res_mp - res_gpu << std::endl;
bool upper_bound = res_gpu <= res_mp;
bool lower_bound = res_gpu > (res_mp - num_limbs);
bool in_bound = upper_bound && lower_bound;
ASSERT_EQ(in_bound, true);
}
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationHostDeviceEq) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(scalars1[i] * g2_points1[i], g2_res_points1[i]);
TEST_F(PrimitivesTest, INGO_MP_MOD_MULT) {
std::cout << " taking num limbs " << std::endl;
uint32_t num_limbs = scalar_field::TLC;
std::cout << " calling gpu... = " << std::endl;
ASSERT_EQ(ingo_mp_mod_mult(scalars1, scalars2, res_scalars1, n), cudaSuccess);
std::cout << " gpu call done " << std::endl;
// mp testing
mp::int1024_t scalar_1_mp = 0;
mp::int1024_t scalar_2_mp = 0;
mp::int1024_t res_mp = 0;
mp::int1024_t res_gpu = 0;
mp::int1024_t p = convert_to_boost_mp(scalar_field::get_modulus().limbs, num_limbs);
std::cout << " p = " << p << std::endl;
for (int j=0; j<n; j++)
{
uint32_t* scalar1_limbs = scalars1[j].limbs_storage.limbs;
uint32_t* scalar2_limbs = scalars2[j].limbs_storage.limbs;
scalar_1_mp = convert_to_boost_mp(scalar1_limbs, num_limbs);
scalar_2_mp = convert_to_boost_mp(scalar2_limbs, num_limbs);
// std::cout << " s1 = " << scalar_1_mp << std::endl;
// std::cout << " s2 = " << scalar_2_mp << std::endl;
res_mp = (scalar_1_mp * scalar_2_mp) % p;
res_gpu = convert_to_boost_mp((res_scalars1[j]).limbs_storage.limbs, num_limbs);
std::cout << "res mp = " << res_mp << std::endl;
std::cout << "res gpu = " << res_gpu << std::endl;
std::cout << "error = " << res_mp - res_gpu << std::endl;
ASSERT_EQ(res_gpu, res_mp);
}
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByOne) {
ASSERT_EQ(vec_mul(one_scalars, points1, res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByMinusOne) {
ASSERT_EQ(vec_neg(one_scalars, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_neg(g2_points1, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByTwo) {
ASSERT_EQ(vec_add(one_scalars, one_scalars, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ((one_scalars[i] + one_scalars[i]) * g2_points1[i], g2_res_points1[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationInverseCancel) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(field_vec_inv(scalars1, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_res_points1, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverMultiplication) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_mul(scalars2, g2_res_points1, g2_res_points2, n), cudaSuccess);
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverAddition) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_mul(scalars2, g2_points1, g2_res_points2, n), cudaSuccess);
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(res_scalars1[i] * g2_points1[i], g2_res_points1[i] + g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECProjectiveToAffine) {
ASSERT_EQ(point_vec_to_affine(g2_points1, g2_aff_points, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_proj::from_affine(g2_aff_points[i]));
}
TEST_F(PrimitivesTest, G2ECMixedPointAddition) {
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
ASSERT_EQ(vec_add(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECMixedAdditionOfNegatedPointEqSubtraction) {
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
ASSERT_EQ(vec_sub(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_neg(g2_points2, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_points1[i] + g2_res_points2[i]);
}
int main(int argc, char **argv) {

View File

@@ -3,17 +3,12 @@
// TODO: change the curve depending on env variable
#include "../curves/bls12_381.cuh"
#include "projective.cuh"
#include "extension_field.cuh"
#include "field.cuh"
typedef Field<fp_config> scalar_field;
typedef Field<fq_config> base_field;
typedef Affine<base_field> affine;
static constexpr base_field b = base_field{ weierstrass_b };
typedef Projective<base_field, scalar_field, b> proj;
typedef ExtensionField<fq_config> base_extension_field;
typedef Affine<base_extension_field> g2_affine;
static constexpr base_extension_field b2 = base_extension_field{ base_field {b_re}, base_field {b_im}};
typedef Projective<base_extension_field, scalar_field, b2> g2_proj;
typedef Projective<base_field, scalar_field, group_generator, weierstrass_b> proj;
template <class T1, class T2>
@@ -98,16 +93,99 @@ int field_vec_sqr(const scalar_field *x, scalar_field *result, const unsigned co
return error ? error : cudaDeviceSynchronize();
}
template <class P, class A>
__global__ void to_affine_points_kernel(const P *x, A *result, const unsigned count) {
__global__ void to_affine_points_kernel(const proj *x, affine *result, const unsigned count) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
if (gid >= count)
return;
result[gid] = P::to_affine(x[gid]);
result[gid] = proj::to_affine(x[gid]);
}
template <class P, class A> int point_vec_to_affine(const P *x, A *result, const unsigned count) {
to_affine_points_kernel<P, A><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
int point_vec_to_affine(const proj *x, affine *result, const unsigned count) {
to_affine_points_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void mp_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field::multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int mp_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
{
mp_mult_kernel<<<1, 32>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void mp_lsb_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field::multiply_lsb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int mp_lsb_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
{
mp_lsb_mult_kernel<<<1, 32>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void mp_msb_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field::multiply_msb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int mp_msb_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
{
mp_msb_mult_kernel<<<1, 1>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void ingo_mp_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field::ingo_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int ingo_mp_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result)
{
ingo_mp_mult_kernel<<<1, 32>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void ingo_mp_msb_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field::wide *result) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field::ingo_msb_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int ingo_mp_msb_mult(const scalar_field *x, scalar_field *y, scalar_field::wide *result, const unsigned n)
{
ingo_mp_msb_mult_kernel<<<1, n>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void ingo_mp_mod_mult_kernel(const scalar_field *x, const scalar_field *y, scalar_field *result) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
result[gid] = x[gid] * y[gid];
}
int ingo_mp_mod_mult(const scalar_field *x, scalar_field *y, scalar_field *result, const unsigned n)
{
ingo_mp_mod_mult_kernel<<<1, n>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}

View File

@@ -1,334 +0,0 @@
use std::ffi::c_uint;
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger_limbs_q, BigInteger_limbs_p, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_CURVE_NAME_U<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_CURVE_NAME_U<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_CURVE_NAME_U<NUM_LIMBS> {
fn default() -> Self {
Field_CURVE_NAME_U::zero()
}
}
impl<const NUM_LIMBS: usize> Field_CURVE_NAME_U<NUM_LIMBS> {
pub fn zero() -> Self {
Field_CURVE_NAME_U {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_CURVE_NAME_U { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_CURVE_NAME_U: usize = limbs_q;
pub const SCALAR_LIMBS_CURVE_NAME_U: usize = limbs_p;
pub type BaseField_CURVE_NAME_U = Field_CURVE_NAME_U<BASE_LIMBS_CURVE_NAME_U>;
pub type ScalarField_CURVE_NAME_U = Field_CURVE_NAME_U<SCALAR_LIMBS_CURVE_NAME_U>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
//
impl BaseField_CURVE_NAME_U {
pub fn limbs(&self) -> [u32; BASE_LIMBS_CURVE_NAME_U] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger_limbs_q {
BigInteger_limbs_q::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger_limbs_q) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
//
impl ScalarField_CURVE_NAME_U {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_CURVE_NAME_U] {
self.s
}
pub fn to_ark(&self) -> BigInteger_limbs_p {
BigInteger_limbs_p::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger_limbs_p) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger_limbs_p {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger_limbs_p) -> ScalarField_CURVE_NAME_U {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
pub z: BaseField_CURVE_NAME_U,
}
impl Default for Point_CURVE_NAME_U {
fn default() -> Self {
Point_CURVE_NAME_U::zero()
}
}
impl Point_CURVE_NAME_U {
pub fn zero() -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::one(),
z: BaseField_CURVE_NAME_U::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_CURVE_NAME_U {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_CURVE_NAME_U {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_CURVE_NAME_U::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_CURVE_NAME_U) -> Point_CURVE_NAME_U {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_CURVE_NAME_U::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_CURVE_NAME_U::one(),
}
}
}
extern "C" {
fn eq_CURVE_NAME_L(point1: *const Point_CURVE_NAME_U, point2: *const Point_CURVE_NAME_U) -> c_uint;
}
impl PartialEq for Point_CURVE_NAME_U {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_CURVE_NAME_L(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
}
impl Default for PointAffineNoInfinity_CURVE_NAME_U {
fn default() -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::zero(),
}
}
}
impl PointAffineNoInfinity_CURVE_NAME_U {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_CURVE_NAME_U {
Point_CURVE_NAME_U {
x: self.x,
y: self.y,
z: BaseField_CURVE_NAME_U::one(),
}
}
pub fn to_ark(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(Fq_CURVE_NAME_U::new(self.x.to_ark()), Fq_CURVE_NAME_U::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(
Fq_CURVE_NAME_U::from_repr(self.x.to_ark()).unwrap(),
Fq_CURVE_NAME_U::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_CURVE_NAME_U) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(p.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(p.y.into_repr()),
}
}
}
impl Point_CURVE_NAME_U {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
z: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_CURVE_NAME_U {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_CURVE_NAME_U, "length must be 3 * {}", BASE_LIMBS_CURVE_NAME_U);
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: value[..BASE_LIMBS_CURVE_NAME_U].try_into().unwrap(),
},
y: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U..BASE_LIMBS_CURVE_NAME_U * 2].try_into().unwrap(),
},
z: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(ark_affine.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
PointAffineNoInfinity_CURVE_NAME_U {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_CURVE_NAME_U {
pub fn from_limbs(value: &[u32]) -> ScalarField_CURVE_NAME_U {
ScalarField_CURVE_NAME_U {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_CURVE_NAME_L::{Fr as Fr_CURVE_NAME_U};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::CURVE_NAME_L::{Point_CURVE_NAME_U, ScalarField_CURVE_NAME_U}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_CURVE_NAME_U::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_CURVE_NAME_U::zero();
let right = Point_CURVE_NAME_U::zero();
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,312 +0,0 @@
use std::ffi::c_uint;
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger_limbs_p, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_CURVE_NAME_U<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_CURVE_NAME_U<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_CURVE_NAME_U<NUM_LIMBS> {
fn default() -> Self {
Field_CURVE_NAME_U::zero()
}
}
impl<const NUM_LIMBS: usize> Field_CURVE_NAME_U<NUM_LIMBS> {
pub fn zero() -> Self {
Field_CURVE_NAME_U {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_CURVE_NAME_U { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_CURVE_NAME_U: usize = limbs_p;
pub const SCALAR_LIMBS_CURVE_NAME_U: usize = limbs_p;
pub type BaseField_CURVE_NAME_U = Field_CURVE_NAME_U<BASE_LIMBS_CURVE_NAME_U>;
pub type ScalarField_CURVE_NAME_U = Field_CURVE_NAME_U<SCALAR_LIMBS_CURVE_NAME_U>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl ScalarField_CURVE_NAME_U {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_CURVE_NAME_U] {
self.s
}
pub fn to_ark(&self) -> BigInteger_limbs_p {
BigInteger_limbs_p::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger_limbs_p) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger_limbs_p {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger_limbs_p) -> ScalarField_CURVE_NAME_U {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
pub z: BaseField_CURVE_NAME_U,
}
impl Default for Point_CURVE_NAME_U {
fn default() -> Self {
Point_CURVE_NAME_U::zero()
}
}
impl Point_CURVE_NAME_U {
pub fn zero() -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::one(),
z: BaseField_CURVE_NAME_U::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_CURVE_NAME_U {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_CURVE_NAME_U {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_CURVE_NAME_U::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_CURVE_NAME_U) -> Point_CURVE_NAME_U {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_CURVE_NAME_U::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_CURVE_NAME_U::one(),
}
}
}
extern "C" {
fn eq_CURVE_NAME_L(point1: *const Point_CURVE_NAME_U, point2: *const Point_CURVE_NAME_U) -> c_uint;
}
impl PartialEq for Point_CURVE_NAME_U {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_CURVE_NAME_L(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
}
impl Default for PointAffineNoInfinity_CURVE_NAME_U {
fn default() -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::zero(),
}
}
}
impl PointAffineNoInfinity_CURVE_NAME_U {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_CURVE_NAME_U {
Point_CURVE_NAME_U {
x: self.x,
y: self.y,
z: BaseField_CURVE_NAME_U::one(),
}
}
pub fn to_ark(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(Fq_CURVE_NAME_U::new(self.x.to_ark()), Fq_CURVE_NAME_U::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(
Fq_CURVE_NAME_U::from_repr(self.x.to_ark()).unwrap(),
Fq_CURVE_NAME_U::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_CURVE_NAME_U) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(p.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(p.y.into_repr()),
}
}
}
impl Point_CURVE_NAME_U {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
z: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_CURVE_NAME_U {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_CURVE_NAME_U, "length must be 3 * {}", BASE_LIMBS_CURVE_NAME_U);
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: value[..BASE_LIMBS_CURVE_NAME_U].try_into().unwrap(),
},
y: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U..BASE_LIMBS_CURVE_NAME_U * 2].try_into().unwrap(),
},
z: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(ark_affine.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
PointAffineNoInfinity_CURVE_NAME_U {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_CURVE_NAME_U {
pub fn from_limbs(value: &[u32]) -> ScalarField_CURVE_NAME_U {
ScalarField_CURVE_NAME_U {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_CURVE_NAME_L::{Fr as Fr_CURVE_NAME_U};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::CURVE_NAME_L::{Point_CURVE_NAME_U, ScalarField_CURVE_NAME_U}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_CURVE_NAME_U::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_CURVE_NAME_U::zero();
let right = Point_CURVE_NAME_U::zero();
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(&[0; 8], &[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8]);
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0],
&[0; 8],
&[1, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,332 +0,0 @@
use std::ffi::c_uint;
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_BLS12_377<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BLS12_377<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_BLS12_377<NUM_LIMBS> {
fn default() -> Self {
Field_BLS12_377::zero()
}
}
impl<const NUM_LIMBS: usize> Field_BLS12_377<NUM_LIMBS> {
pub fn zero() -> Self {
Field_BLS12_377 {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_BLS12_377 { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_BLS12_377: usize = 12;
pub const SCALAR_LIMBS_BLS12_377: usize = 8;
pub type BaseField_BLS12_377 = Field_BLS12_377<BASE_LIMBS_BLS12_377>;
pub type ScalarField_BLS12_377 = Field_BLS12_377<SCALAR_LIMBS_BLS12_377>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl BaseField_BLS12_377 {
pub fn limbs(&self) -> [u32; BASE_LIMBS_BLS12_377] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger384 {
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger384) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
impl ScalarField_BLS12_377 {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BLS12_377] {
self.s
}
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BLS12_377 {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_BLS12_377 {
pub x: BaseField_BLS12_377,
pub y: BaseField_BLS12_377,
pub z: BaseField_BLS12_377,
}
impl Default for Point_BLS12_377 {
fn default() -> Self {
Point_BLS12_377::zero()
}
}
impl Point_BLS12_377 {
pub fn zero() -> Self {
Point_BLS12_377 {
x: BaseField_BLS12_377::zero(),
y: BaseField_BLS12_377::one(),
z: BaseField_BLS12_377::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_BLS12_377 {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_BLS12_377 {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_BLS12_377::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_BLS12_377) -> Point_BLS12_377 {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_BLS12_377 {
x: BaseField_BLS12_377::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_BLS12_377::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_BLS12_377::one(),
}
}
}
extern "C" {
fn eq_bls12_377(point1: *const Point_BLS12_377, point2: *const Point_BLS12_377) -> c_uint;
}
impl PartialEq for Point_BLS12_377 {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_bls12_377(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_BLS12_377 {
pub x: BaseField_BLS12_377,
pub y: BaseField_BLS12_377,
}
impl Default for PointAffineNoInfinity_BLS12_377 {
fn default() -> Self {
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377::zero(),
y: BaseField_BLS12_377::zero(),
}
}
}
impl PointAffineNoInfinity_BLS12_377 {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_377 {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_BLS12_377 {
Point_BLS12_377 {
x: self.x,
y: self.y,
z: BaseField_BLS12_377::one(),
}
}
pub fn to_ark(&self) -> G1Affine_BLS12_377 {
G1Affine_BLS12_377::new(Fq_BLS12_377::new(self.x.to_ark()), Fq_BLS12_377::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_BLS12_377 {
G1Affine_BLS12_377::new(
Fq_BLS12_377::from_repr(self.x.to_ark()).unwrap(),
Fq_BLS12_377::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_BLS12_377) -> Self {
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377::from_ark(p.x.into_repr()),
y: BaseField_BLS12_377::from_ark(p.y.into_repr()),
}
}
}
impl Point_BLS12_377 {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_BLS12_377 {
x: BaseField_BLS12_377 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_377 {
s: get_fixed_limbs(y),
},
z: BaseField_BLS12_377 {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_377 {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_BLS12_377, "length must be 3 * {}", BASE_LIMBS_BLS12_377);
Point_BLS12_377 {
x: BaseField_BLS12_377 {
s: value[..BASE_LIMBS_BLS12_377].try_into().unwrap(),
},
y: BaseField_BLS12_377 {
s: value[BASE_LIMBS_BLS12_377..BASE_LIMBS_BLS12_377 * 2].try_into().unwrap(),
},
z: BaseField_BLS12_377 {
s: value[BASE_LIMBS_BLS12_377 * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_377 {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377::from_ark(ark_affine.x.into_repr()),
y: BaseField_BLS12_377::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_377 {
PointAffineNoInfinity_BLS12_377 {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_BLS12_377 {
pub fn from_limbs(value: &[u32]) -> ScalarField_BLS12_377 {
ScalarField_BLS12_377 {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_bls12_377::{Fr as Fr_BLS12_377};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_377::{Point_BLS12_377, ScalarField_BLS12_377}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_BLS12_377::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_BLS12_377::zero();
let right = Point_BLS12_377::zero();
assert_eq!(left, right);
let right = Point_BLS12_377::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point_BLS12_377::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,332 +0,0 @@
use std::ffi::c_uint;
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_BLS12_381<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BLS12_381<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_BLS12_381<NUM_LIMBS> {
fn default() -> Self {
Field_BLS12_381::zero()
}
}
impl<const NUM_LIMBS: usize> Field_BLS12_381<NUM_LIMBS> {
pub fn zero() -> Self {
Field_BLS12_381 {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_BLS12_381 { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_BLS12_381: usize = 12;
pub const SCALAR_LIMBS_BLS12_381: usize = 8;
pub type BaseField_BLS12_381 = Field_BLS12_381<BASE_LIMBS_BLS12_381>;
pub type ScalarField_BLS12_381 = Field_BLS12_381<SCALAR_LIMBS_BLS12_381>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl BaseField_BLS12_381 {
pub fn limbs(&self) -> [u32; BASE_LIMBS_BLS12_381] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger384 {
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger384) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
impl ScalarField_BLS12_381 {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BLS12_381] {
self.s
}
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BLS12_381 {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_BLS12_381 {
pub x: BaseField_BLS12_381,
pub y: BaseField_BLS12_381,
pub z: BaseField_BLS12_381,
}
impl Default for Point_BLS12_381 {
fn default() -> Self {
Point_BLS12_381::zero()
}
}
impl Point_BLS12_381 {
pub fn zero() -> Self {
Point_BLS12_381 {
x: BaseField_BLS12_381::zero(),
y: BaseField_BLS12_381::one(),
z: BaseField_BLS12_381::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_BLS12_381 {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_BLS12_381 {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_BLS12_381::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_BLS12_381) -> Point_BLS12_381 {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_BLS12_381 {
x: BaseField_BLS12_381::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_BLS12_381::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_BLS12_381::one(),
}
}
}
extern "C" {
fn eq_bls12_381(point1: *const Point_BLS12_381, point2: *const Point_BLS12_381) -> c_uint;
}
impl PartialEq for Point_BLS12_381 {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_bls12_381(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_BLS12_381 {
pub x: BaseField_BLS12_381,
pub y: BaseField_BLS12_381,
}
impl Default for PointAffineNoInfinity_BLS12_381 {
fn default() -> Self {
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381::zero(),
y: BaseField_BLS12_381::zero(),
}
}
}
impl PointAffineNoInfinity_BLS12_381 {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_381 {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_BLS12_381 {
Point_BLS12_381 {
x: self.x,
y: self.y,
z: BaseField_BLS12_381::one(),
}
}
pub fn to_ark(&self) -> G1Affine_BLS12_381 {
G1Affine_BLS12_381::new(Fq_BLS12_381::new(self.x.to_ark()), Fq_BLS12_381::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_BLS12_381 {
G1Affine_BLS12_381::new(
Fq_BLS12_381::from_repr(self.x.to_ark()).unwrap(),
Fq_BLS12_381::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_BLS12_381) -> Self {
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381::from_ark(p.x.into_repr()),
y: BaseField_BLS12_381::from_ark(p.y.into_repr()),
}
}
}
impl Point_BLS12_381 {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_BLS12_381 {
x: BaseField_BLS12_381 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_381 {
s: get_fixed_limbs(y),
},
z: BaseField_BLS12_381 {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_381 {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_BLS12_381, "length must be 3 * {}", BASE_LIMBS_BLS12_381);
Point_BLS12_381 {
x: BaseField_BLS12_381 {
s: value[..BASE_LIMBS_BLS12_381].try_into().unwrap(),
},
y: BaseField_BLS12_381 {
s: value[BASE_LIMBS_BLS12_381..BASE_LIMBS_BLS12_381 * 2].try_into().unwrap(),
},
z: BaseField_BLS12_381 {
s: value[BASE_LIMBS_BLS12_381 * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_381 {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381::from_ark(ark_affine.x.into_repr()),
y: BaseField_BLS12_381::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_381 {
PointAffineNoInfinity_BLS12_381 {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_BLS12_381 {
pub fn from_limbs(value: &[u32]) -> ScalarField_BLS12_381 {
ScalarField_BLS12_381 {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_bls12_381::{Fr as Fr_BLS12_381};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_381::{Point_BLS12_381, ScalarField_BLS12_381}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_BLS12_381::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_BLS12_381::zero();
let right = Point_BLS12_381::zero();
assert_eq!(left, right);
let right = Point_BLS12_381::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point_BLS12_381::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,312 +0,0 @@
use std::ffi::c_uint;
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger256, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_BN254<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BN254<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_BN254<NUM_LIMBS> {
fn default() -> Self {
Field_BN254::zero()
}
}
impl<const NUM_LIMBS: usize> Field_BN254<NUM_LIMBS> {
pub fn zero() -> Self {
Field_BN254 {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_BN254 { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_BN254: usize = 8;
pub const SCALAR_LIMBS_BN254: usize = 8;
pub type BaseField_BN254 = Field_BN254<BASE_LIMBS_BN254>;
pub type ScalarField_BN254 = Field_BN254<SCALAR_LIMBS_BN254>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl ScalarField_BN254 {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BN254] {
self.s
}
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BN254 {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_BN254 {
pub x: BaseField_BN254,
pub y: BaseField_BN254,
pub z: BaseField_BN254,
}
impl Default for Point_BN254 {
fn default() -> Self {
Point_BN254::zero()
}
}
impl Point_BN254 {
pub fn zero() -> Self {
Point_BN254 {
x: BaseField_BN254::zero(),
y: BaseField_BN254::one(),
z: BaseField_BN254::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_BN254 {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_BN254 {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_BN254::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_BN254::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_BN254::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_BN254::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_BN254) -> Point_BN254 {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_BN254 {
x: BaseField_BN254::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_BN254::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_BN254::one(),
}
}
}
extern "C" {
fn eq_bn254(point1: *const Point_BN254, point2: *const Point_BN254) -> c_uint;
}
impl PartialEq for Point_BN254 {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_bn254(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_BN254 {
pub x: BaseField_BN254,
pub y: BaseField_BN254,
}
impl Default for PointAffineNoInfinity_BN254 {
fn default() -> Self {
PointAffineNoInfinity_BN254 {
x: BaseField_BN254::zero(),
y: BaseField_BN254::zero(),
}
}
}
impl PointAffineNoInfinity_BN254 {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_BN254 {
x: BaseField_BN254 {
s: get_fixed_limbs(x),
},
y: BaseField_BN254 {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_BN254 {
Point_BN254 {
x: self.x,
y: self.y,
z: BaseField_BN254::one(),
}
}
pub fn to_ark(&self) -> G1Affine_BN254 {
G1Affine_BN254::new(Fq_BN254::new(self.x.to_ark()), Fq_BN254::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_BN254 {
G1Affine_BN254::new(
Fq_BN254::from_repr(self.x.to_ark()).unwrap(),
Fq_BN254::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_BN254) -> Self {
PointAffineNoInfinity_BN254 {
x: BaseField_BN254::from_ark(p.x.into_repr()),
y: BaseField_BN254::from_ark(p.y.into_repr()),
}
}
}
impl Point_BN254 {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_BN254 {
x: BaseField_BN254 {
s: get_fixed_limbs(x),
},
y: BaseField_BN254 {
s: get_fixed_limbs(y),
},
z: BaseField_BN254 {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_BN254 {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_BN254, "length must be 3 * {}", BASE_LIMBS_BN254);
Point_BN254 {
x: BaseField_BN254 {
s: value[..BASE_LIMBS_BN254].try_into().unwrap(),
},
y: BaseField_BN254 {
s: value[BASE_LIMBS_BN254..BASE_LIMBS_BN254 * 2].try_into().unwrap(),
},
z: BaseField_BN254 {
s: value[BASE_LIMBS_BN254 * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_BN254 {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_BN254 {
x: BaseField_BN254::from_ark(ark_affine.x.into_repr()),
y: BaseField_BN254::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BN254 {
PointAffineNoInfinity_BN254 {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_BN254 {
pub fn from_limbs(value: &[u32]) -> ScalarField_BN254 {
ScalarField_BN254 {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_bn254::{Fr as Fr_BN254};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bn254::{Point_BN254, ScalarField_BN254}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_BN254::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_BN254::zero();
let right = Point_BN254::zero();
assert_eq!(left, right);
let right = Point_BN254::from_limbs(&[0; 8], &[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8]);
assert_eq!(left, right);
let right = Point_BN254::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0],
&[0; 8],
&[1, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,3 +0,0 @@
pub mod bls12_381;
pub mod bls12_377;
pub mod bn254;

336
src/field.rs Normal file
View File

@@ -0,0 +1,336 @@
use std::ffi::c_uint;
use std::mem::transmute;
use ark_bls12_381::{Fq, G1Affine, G1Projective};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field<NUM_LIMBS> {
fn default() -> Self {
Field::zero()
}
}
impl<const NUM_LIMBS: usize> Field<NUM_LIMBS> {
pub fn zero() -> Self {
Field {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS: usize = 12;
pub const SCALAR_LIMBS: usize = 8;
#[cfg(feature = "bn254")]
pub const BASE_LIMBS: usize = 8;
#[cfg(feature = "bn254")]
pub const SCALAR_LIMBS: usize = 8;
pub type BaseField = Field<BASE_LIMBS>;
pub type ScalarField = Field<SCALAR_LIMBS>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl BaseField {
pub fn limbs(&self) -> [u32; BASE_LIMBS] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger384 {
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger384) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
impl ScalarField {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS] {
self.s
}
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point {
pub x: BaseField,
pub y: BaseField,
pub z: BaseField,
}
impl Default for Point {
fn default() -> Self {
Point::zero()
}
}
impl Point {
pub fn zero() -> Self {
Point {
x: BaseField::zero(),
y: BaseField::one(),
z: BaseField::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective) -> Point {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point {
x: BaseField::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField::one(),
}
}
}
extern "C" {
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
}
impl PartialEq for Point {
fn eq(&self, other: &Self) -> bool {
unsafe { eq(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity {
pub x: BaseField,
pub y: BaseField,
}
impl Default for PointAffineNoInfinity {
fn default() -> Self {
PointAffineNoInfinity {
x: BaseField::zero(),
y: BaseField::zero(),
}
}
}
impl PointAffineNoInfinity {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity {
x: BaseField {
s: get_fixed_limbs(x),
},
y: BaseField {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point {
Point {
x: self.x,
y: self.y,
z: BaseField::one(),
}
}
pub fn to_ark(&self) -> G1Affine {
G1Affine::new(Fq::new(self.x.to_ark()), Fq::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine {
G1Affine::new(
Fq::from_repr(self.x.to_ark()).unwrap(),
Fq::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine) -> Self {
PointAffineNoInfinity {
x: BaseField::from_ark(p.x.into_repr()),
y: BaseField::from_ark(p.y.into_repr()),
}
}
}
impl Point {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point {
x: BaseField {
s: get_fixed_limbs(x),
},
y: BaseField {
s: get_fixed_limbs(y),
},
z: BaseField {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS, "length must be 3 * {}", BASE_LIMBS);
Point {
x: BaseField {
s: value[..BASE_LIMBS].try_into().unwrap(),
},
y: BaseField {
s: value[BASE_LIMBS..BASE_LIMBS * 2].try_into().unwrap(),
},
z: BaseField {
s: value[BASE_LIMBS * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity {
x: BaseField::from_ark(ark_affine.x.into_repr()),
y: BaseField::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity {
PointAffineNoInfinity {
x: self.x,
y: self.y,
}
}
}
impl ScalarField {
pub fn from_limbs(value: &[u32]) -> ScalarField {
ScalarField {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_bls12_381::Fr;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, field::{Point, ScalarField}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point::zero();
let right = Point::zero();
assert_eq!(left, right);
let right = Point::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

1488
src/lib.rs

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -33,7 +33,7 @@ pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
mod tests {
use ark_ff::BigInteger256;
use crate::curves::bls12_381::{ScalarField_BLS12_381 as ScalarField};
use crate::field::ScalarField;
use super::*;
@@ -46,7 +46,7 @@ mod tests {
))
.limbs();
assert_eq!(arr_u32.to_vec(), s);
assert_eq!(arr_u32, s);
let arr_u64_expected = [
0x0FFFFFFF00000001,