Compare commits

..

2 Commits

Author SHA1 Message Date
Vitalii
ae5c0681ad batch mult 2023-07-12 17:42:12 +02:00
Vitalii
58a2492d60 enforce c++17 standard 2023-06-26 21:10:07 +02:00
72 changed files with 2052 additions and 11822 deletions

1
.gitignore vendored
View File

@@ -11,4 +11,3 @@
**/__pycache__/
**/.DS_Store
**/Cargo.lock
**/icicle/build/

View File

@@ -3,43 +3,26 @@ name = "icicle-utils"
version = "0.1.0"
edition = "2021"
authors = [ "Ingonyama" ]
description = "An implementation of the Ingonyama CUDA Library"
description = "An implementation of the Ingonyama Cuda Library"
homepage = "https://www.ingonyama.com"
repository = "https://github.com/ingonyama-zk/icicle"
[[bench]]
name = "ntt"
path = "benches/ntt.rs"
harness = false
[[bench]]
name = "msm"
path = "benches/msm.rs"
harness = false
[dependencies]
hex = "*"
hex="*"
ark-std = "0.3.0"
ark-ff = "0.3.0"
ark-poly = "0.3.0"
ark-ec = { version = "0.3.0", features = [ "parallel" ] }
ark-bls12-381 = "0.3.0"
ark-bls12-377 = "0.3.0"
ark-bn254 = "0.3.0"
ark-bls12-381 = { version = "0.3.0", optional = true }
rustacuda = "0.1"
rustacuda_core = "0.1"
rustacuda_derive = "0.1"
rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
rand="*" #TODO: sort dependencies that are not required by the release
[build-dependencies]
cc = { version = "1.0", features = ["parallel"] }
[dev-dependencies]
"criterion" = "0.4.0"
rand="*"
[features]
default = ["bls12_381"]
bls12_381 = ["ark-bls12-381/curve"]
g2 = []

View File

@@ -23,8 +23,8 @@ ICICLE is a CUDA implementation of general functions widely used in ZKP. ICICLE
- Affine: {x, y}
- Curves
- [BLS12-381]
- [BLS12-377]
- [BN254]
> NOTE: _Support for BN254 and BLS12-377 are planned_
## Build and usage
@@ -41,7 +41,7 @@ nvcc -o build/<ENTER_DIR_NAME> ./icicle/appUtils/ntt/ntt.cu ./icicle/appUtils/ms
### Testing the CUDA code
We are using [googletest] library for testing. To build and run [the test suite](./icicle/README.md) for finite field and elliptic curve arithmetic, run from the `icicle` folder:
We are using [googletest] library for testing. To build and run the test suite for finite field and elliptic curve arithmetic, run from the `icicle` folder:
```sh
mkdir -p build
@@ -76,76 +76,14 @@ cargo build --release
You'll find a release ready library at `target/release/libicicle_utils.rlib`.
To benchmark and test the functionality available in RUST, run:
```
cargo bench
cargo test -- --test-threads=1
```
The flag `--test-threads=1` is needed because currently some tests might interfere with one another inside the GPU.
### Example Usage
An example of using the Rust bindings library can be found in our [fast-danksharding implementation][FDI]
### Supporting Additional Curves
Supporting additional curves can be done as follows:
Create a JSON file with the curve parameters. The curve is defined by the following parameters:
- ``curve_name`` - e.g. ``bls12_381``.
- ``modolus_p`` - scalar field modolus (in decimal).
- ``bit_count_p`` - number of bits needed to represent `` modolus_p`` .
- ``limb_p`` - number of bytes needed to represent `` modolus_p`` (rounded).
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
- ``modolus_q`` - base field modulus (in decimal).
- ``bit_count_q`` - number of bits needed to represent `` modolus_q`` .
- ``limb_q`` number of bytes needed to represent `` modolus_p`` (rounded).
- ``weierstrass_b`` - Weierstrauss constant of the curve.
- ``gen_x`` - x-value of a generator element for the curve.
- ``gen_y`` - y-value of a generator element for the curve.
Here's an example for BLS12-381.
```
{
"curve_name" : "bls12_381",
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"bit_count_p" : 255,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"bit_count_q" : 381,
"limb_q" : 12,
"weierstrass_b" : 4,
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
}
```
Save the parameters JSON file in ``curve_parameters``.
Then run the Python script ``new_curve_script.py `` from the main icicle folder:
```
python3 ./curve_parameters/new_curve_script_rust.py ./curve_parameters/bls12_381.json
```
The script does the following:
- Creates a folder in ``icicle/curves`` with the curve name, which contains all of the files needed for the supported operations in cuda.
- Adds the curve exported operations to ``icicle/curves/index.cu``.
- Creates a file with the curve name in ``src/curves`` with the relevant objects for the curve.
- Creates a test file with the curve name in ``src``.
Testing the new curve could be done by running the tests in ``tests_curve_name`` (e.g. ``tests_bls12_381``).
## Contributions
Join our [Discord Server](https://discord.gg/Y4SkbDf2Ff) and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
### Hall of Fame
- [Robik](https://github.com/robik75), for his on-going support and mentorship
## License
ICICLE is distributed under the terms of the MIT License.

View File

@@ -1,50 +0,0 @@
extern crate criterion;
use criterion::{criterion_group, criterion_main, Criterion};
use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng, field::BaseField};
#[cfg(feature = "g2")]
use icicle_utils::{commit_batch_g2, field::ExtensionField};
use rustacuda::prelude::*;
const LOG_MSM_SIZES: [usize; 1] = [12];
const BATCH_SIZES: [usize; 2] = [128, 256];
fn bench_msm(c: &mut Criterion) {
let mut group = c.benchmark_group("MSM");
for log_msm_size in LOG_MSM_SIZES {
for batch_size in BATCH_SIZES {
let msm_size = 1 << log_msm_size;
let (scalars, _, _) = set_up_scalars(msm_size, 0, false);
let batch_scalars = vec![scalars; batch_size].concat();
let mut d_scalars = DeviceBuffer::from_slice(&batch_scalars[..]).unwrap();
let points = generate_random_points::<BaseField>(msm_size, get_rng(None));
let batch_points = vec![points; batch_size].concat();
let mut d_points = DeviceBuffer::from_slice(&batch_points[..]).unwrap();
#[cfg(feature = "g2")]
let g2_points = generate_random_points::<ExtensionField>(msm_size, get_rng(None));
#[cfg(feature = "g2")]
let g2_batch_points = vec![g2_points; batch_size].concat();
#[cfg(feature = "g2")]
let mut d_g2_points = DeviceBuffer::from_slice(&g2_batch_points[..]).unwrap();
group.sample_size(30).bench_function(
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch(&mut d_points, &mut d_scalars, batch_size))
);
#[cfg(feature = "g2")]
group.sample_size(10).bench_function(
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size))
);
}
}
}
criterion_group!(msm_benches, bench_msm);
criterion_main!(msm_benches);

View File

@@ -1,33 +0,0 @@
extern crate criterion;
use criterion::{criterion_group, criterion_main, Criterion};
use icicle_utils::{interpolate_scalars_batch, interpolate_points_batch, set_up_scalars, set_up_points};
const LOG_NTT_SIZES: [usize; 1] = [15];
const BATCH_SIZES: [usize; 2] = [8, 16];
fn bench_ntt(c: &mut Criterion) {
let mut group = c.benchmark_group("NTT");
for log_ntt_size in LOG_NTT_SIZES {
for batch_size in BATCH_SIZES {
let ntt_size = 1 << log_ntt_size;
let (_, mut d_evals, mut d_domain) = set_up_scalars(ntt_size * batch_size, log_ntt_size, true);
let (_, mut d_points_evals, _) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
group.sample_size(100).bench_function(
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size))
);
group.sample_size(10).bench_function(
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_points_batch(&mut d_points_evals, &mut d_domain, batch_size))
);
}
}
}
criterion_group!(ntt_benches, bench_ntt);
criterion_main!(ntt_benches);

View File

@@ -7,7 +7,8 @@ fn main() {
println!("cargo:rerun-if-env-changed=CXXFLAGS");
println!("cargo:rerun-if-changed=./icicle");
let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
let arch_type = env::var("ARCH_TYPE")
.unwrap_or(String::from("native"));
let mut arch = String::from("-arch=");
arch.push_str(&arch_type);
@@ -16,14 +17,15 @@ fn main() {
println!("Compiling icicle library using arch: {}", &arch);
if cfg!(feature = "g2") {
nvcc.define("G2_DEFINED", None);
}
nvcc.cuda(true);
nvcc.debug(false);
nvcc.flag(&arch);
nvcc.flag("--std=c++17");
nvcc.files([
"./icicle/curves/index.cu",
"./icicle/appUtils/ntt/ntt.cu",
"./icicle/appUtils/msm/msm.cu",
"./icicle/appUtils/vector_manipulation/ve_mod_mult.cu",
"./icicle/primitives/projective.cu",
]);
nvcc.compile("ingo_icicle"); //TODO: extension??
}

View File

@@ -1,13 +0,0 @@
{
"curve_name" : "bls12_377",
"modolus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
"bit_count_p" : 253,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
"bit_count_q" : 377,
"limb_q" : 12,
"weierstrass_b" : 1,
"gen_x" : 81937999373150964239938255573465948239988671502647976594219695644855304257327692006745978603320413799295628339695,
"gen_y" : 241266749859715473739788878240585681733927191168601896383759122102112907357779751001206799952863815012735208165030
}

View File

@@ -1,13 +0,0 @@
{
"curve_name" : "bls12_381",
"modolus_p" : 52435875175126190479447740508185965837690552500527637822603658699938581184513,
"bit_count_p" : 255,
"limb_p" : 8,
"ntt_size" : 32,
"modolus_q" : 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787,
"bit_count_q" : 381,
"limb_q" : 12,
"weierstrass_b" : 4,
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569
}

View File

@@ -1,13 +0,0 @@
{
"curve_name" : "bn254",
"modolus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
"bit_count_p" : 254,
"limb_p" : 8,
"ntt_size" : 16,
"modolus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
"bit_count_q" : 254,
"limb_q" : 8,
"weierstrass_b" : 3,
"gen_x" : 1,
"gen_y" : 2
}

View File

@@ -1,203 +0,0 @@
import json
import math
import os
from sympy.ntheory import isprime, primitive_root
import subprocess
import random
import sys
data = None
with open(sys.argv[1]) as json_file:
data = json.load(json_file)
curve_name = data["curve_name"]
modolus_p = data["modolus_p"]
bit_count_p = data["bit_count_p"]
limb_p = data["limb_p"]
ntt_size = data["ntt_size"]
modolus_q = data["modolus_q"]
bit_count_q = data["bit_count_q"]
limb_q = data["limb_q"]
weierstrass_b = data["weierstrass_b"]
gen_x = data["gen_x"]
gen_y = data["gen_y"]
def to_hex(val, length):
x = str(hex(val))[2:]
if len(x) % 8 != 0:
x = "0" * (8-len(x) % 8) + x
if len(x) != length:
x = "0" * (length-len(x)) + x
n = 8
chunks = [x[i:i+n] for i in range(0, len(x), n)][::-1]
s = ""
for c in chunks:
s += "0x" + c + ", "
return s
def get_root_of_unity(order: int) -> int:
assert (modolus_p - 1) % order == 0
return pow(5, (modolus_p - 1) // order, modolus_p)
def create_field_parameters_struct(modulus, modulus_bits_count,limbs,ntt,size,name):
s = " struct "+name+"{\n"
s += " static constexpr unsigned limbs_count = " + str(limbs)+";\n"
s += " static constexpr storage<limbs_count> modulus = {"+to_hex(modulus,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<limbs_count> modulus_2 = {"+to_hex(modulus*2,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<limbs_count> modulus_4 = {"+to_hex(modulus*4,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_wide = {"+to_hex(modulus,8*limbs*2)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared = {"+to_hex(modulus*modulus,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared_2 = {"+to_hex(modulus*modulus*2,8*limbs)[:-2]+"};\n"
s += " static constexpr storage<2*limbs_count> modulus_sqared_4 = {"+to_hex(modulus*modulus*2*2,8*limbs)[:-2]+"};\n"
s += " static constexpr unsigned modulus_bits_count = "+str(modulus_bits_count)+";\n"
m = int(math.floor(int(pow(2,2*modulus_bits_count) // modulus)))
s += " static constexpr storage<limbs_count> m = {"+ to_hex(m,8*limbs)[:-2] +"};\n"
s += " static constexpr storage<limbs_count> one = {"+ to_hex(1,8*limbs)[:-2] +"};\n"
s += " static constexpr storage<limbs_count> zero = {"+ to_hex(0,8*limbs)[:-2] +"};\n"
if ntt:
for k in range(size):
omega = get_root_of_unity(int(pow(2,k+1)))
s += " static constexpr storage<limbs_count> omega"+str(k+1)+"= {"+ to_hex(omega,8*limbs)[:-2]+"};\n"
for k in range(size):
omega = get_root_of_unity(int(pow(2,k+1)))
s += " static constexpr storage<limbs_count> omega_inv"+str(k+1)+"= {"+ to_hex(pow(omega, -1, modulus),8*limbs)[:-2]+"};\n"
for k in range(size):
s += " static constexpr storage<limbs_count> inv"+str(k+1)+"= {"+ to_hex(pow(int(pow(2,k+1)), -1, modulus),8*limbs)[:-2]+"};\n"
s+=" };\n"
return s
def create_gen():
s = " struct group_generator {\n"
s += " static constexpr storage<fq_config::limbs_count> generator_x = {"+to_hex(gen_x,8*limb_q)[:-2]+ "};\n"
s += " static constexpr storage<fq_config::limbs_count> generator_y = {"+to_hex(gen_y,8*limb_q)[:-2]+ "};\n"
s+=" };\n"
return s
def get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b):
file_content = ""
file_content += "#pragma once\n#include \"../../utils/storage.cuh\"\n"
file_content += "namespace PARAMS_"+curve_name.upper()+"{\n"
file_content += create_field_parameters_struct(modolus_p,bit_count_p,limb_p,True,ntt_size,"fp_config")
file_content += create_field_parameters_struct(modolus_q,bit_count_q,limb_q,False,0,"fq_config")
file_content += " static constexpr unsigned weierstrass_b = " + str(weierstrass_b)+ ";\n"
file_content += create_gen()
file_content+="}\n"
return file_content
# Create Cuda interface
newpath = "./icicle/curves/"+curve_name
if not os.path.exists(newpath):
os.makedirs(newpath)
fc = get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b)
text_file = open("./icicle/curves/"+curve_name+"/params.cuh", "w")
n = text_file.write(fc)
text_file.close()
with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
content = lde_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/lde.cu", "w")
n = text_file.write(content)
text_file.close()
with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
content = msm_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/msm.cu", "w")
n = text_file.write(content)
text_file.close()
with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
content = ve_mod_mult_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./icicle/curves/"+curve_name+"/ve_mod_mult.cu", "w")
n = text_file.write(content)
text_file.close()
namespace = '#include "params.cuh"\n'+'''namespace CURVE_NAME_U {
typedef Field<PARAMS_CURVE_NAME_U::fp_config> scalar_field_t;\
typedef scalar_field_t scalar_t;\
typedef Field<PARAMS_CURVE_NAME_U::fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, PARAMS_CURVE_NAME_U::group_generator, PARAMS_CURVE_NAME_U::weierstrass_b> projective_t;
typedef Affine<point_field_t> affine_t;
}'''
with open('./icicle/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
f.write(namespace.replace("CURVE_NAME_U",curve_name.upper()))
eq = '''
#include <cuda.h>\n
#include "curve_config.cuh"\n
#include "../../primitives/projective.cuh"\n
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2)
{
return (*point1 == *point2);
}'''
with open('./icicle/curves/'+curve_name+'/projective.cu', 'w') as f:
f.write(eq.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
supported_operations = '''
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"
'''
with open('./icicle/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
f.write(supported_operations.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
with open('./icicle/curves/index.cu', 'a') as f:
f.write('\n#include "'+curve_name.lower()+'/supported_operations.cu"')
# Create Rust interface and tests
if limb_p == limb_q:
with open("./src/curve_templates/curve_same_limbs.rs", "r") as curve_file:
content = curve_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
content = content.replace("limbs_p",str(limb_p))
text_file = open("./src/curves/"+curve_name+".rs", "w")
n = text_file.write(content)
text_file.close()
else:
with open("./src/curve_templates/curve_different_limbs.rs", "r") as curve_file:
content = curve_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
content = content.replace("_limbs_p",str(limb_p * 8 * 4))
content = content.replace("limbs_p",str(limb_p))
content = content.replace("_limbs_q",str(limb_q * 8 * 4))
content = content.replace("limbs_q",str(limb_q))
text_file = open("./src/curves/"+curve_name+".rs", "w")
n = text_file.write(content)
text_file.close()
with open("./src/curve_templates/test.rs", "r") as test_file:
content = test_file.read()
content = content.replace("CURVE_NAME_U",curve_name.upper())
content = content.replace("CURVE_NAME_L",curve_name.lower())
text_file = open("./src/test_"+curve_name+".rs", "w")
n = text_file.write(content)
text_file.close()
with open('./src/curves/mod.rs', 'a') as f:
f.write('\n pub mod ' + curve_name + ';')
with open('./src/lib.rs', 'a') as f:
f.write('\npub mod ' + curve_name + ';')

View File

@@ -1,4 +1,5 @@
cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.14)
project(icicle)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 17)
@@ -8,9 +9,9 @@ set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
# add the target cuda architectures
# each additional architecture increases the compilation time and output file size
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
set(CMAKE_CUDA_ARCHITECTURES 80)
endif ()
project(icicle LANGUAGES CUDA CXX)
project(bellman-cuda LANGUAGES CUDA CXX)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS_RELEASE "")

View File

@@ -1,81 +0,0 @@
# Tests
```sh
mkdir -p build; cmake -S . -B build; cmake --build build; cd build && ctest; cd ..
```
## Prerequisites on Ubuntu
Before proceeding, make sure the following software installed:
1. CMake at least version 3.16, which can be downloaded from [cmake.org](https://cmake.org/files/)
It is recommended to have the latest version installed.
2. [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu) version 12.0 or newer.
3. GCC - version 9 or newer recommended.
## Troubleshooting
In case you encounter problems during the build, please follow the points below to troubleshoot:
### 1 - Check CMake log files
If there are issues with the CMake configuration, please check the logs which are located in the `./build/CMakeFiles` directory. Depending on the version of CMake, the log file may have a different name. For example, for CMake version 3.20, one of log files is called `CMakeConfigureLog.yaml`.
### 2 - Check for conflicting GCC versions
Make sure that there are no conflicting versions of GCC installed. You can use the following commands to install and switch between different versions:
```sh
sudo update-alternatives --install /usr/bin/gcc gcc /home/linuxbrew/.linuxbrew/bin/gcc-12 12
sudo update-alternatives --install /usr/bin/g++ g++ /home/linuxbrew/.linuxbrew/bin/g++-12 12
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 9
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 9
```
Then you can select with the following command
```sh
sudo update-alternatives --config gcc
```
### 3 - Check for conflicting binaries in PATH
Make sure that there are no conflicting binaries in the PATH environment variable. For example, if `/home/linuxbrew/.linuxbrew/bin` precedes `/usr/bin/` in the PATH, it will override the `update-alternatives` settings.
### 4 - Add nvvm path to PATH
If you encounter the error `cicc not found`, make sure to add the nvvm path to PATH. For example, for CUDA version 12.1, the nvvm path is `/usr/local/cuda-12.1/nvvm/bin`.
### 5 - Add CUDA libraries to the project
If you encounter the error `Failed to extract nvcc implicit link line`, add the following code to the CMakeLists.txt file after enabling CUDA:
```c
check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
find_package(CUDAToolkit)
target_link_libraries(project CUDA::cudart)
target_link_libraries(project CUDA::cuda_driver)
else()
message(STATUS "No CUDA compiler found")
endif()
```
### 6 - Fix update alternatives
If the `update-alternatives` settings are broken, you can try to fix them with the following command:
`yes '' | update-alternatives --force --all`
### 7 - ..bin/crt/link.stub: No such file or directory
If you encounter the error, check if the `$CUDA_HOME/bin/crt/link.stub` file is available.
Othrewise create a symlink. For example, if the CUDA toolkit is installed with apt-get to the default path, you can create a symlink with the following command:
`ln -sf /usr/local/cuda-12.1/bin/crt/link.stub /usr/lib/nvidia-cuda-toolkit/bin/crt/link.stub`
Alternatively, you can replace the old CUDA root with a symlink to the new CUDA installation with the following command:
`ln -sf /usr/local/cuda-12.1/ /usr/lib/nvidia-cuda-toolkit/`

View File

@@ -1,4 +0,0 @@
test_msm:
mkdir -p work
nvcc -o work/test_msm -I. tests/msm_test.cu
work/test_msm

View File

@@ -1,9 +1,3 @@
#ifndef MSM
#define MSM
#pragma once
#include <stdexcept>
#include <cuda.h>
#include "../../primitives/affine.cuh"
#include <iostream>
#include <vector>
#include <cub/device/device_radix_sort.cuh>
@@ -12,6 +6,7 @@
#include "../../utils/cuda_utils.cuh"
#include "../../primitives/projective.cuh"
#include "../../primitives/field.cuh"
#include "../../curves/curve_config.cuh"
#include "msm.cuh"
@@ -83,17 +78,16 @@ __global__ void split_scalars_kernel(unsigned *buckets_indices, unsigned *point_
//this kernel adds up the points in each bucket
template <typename P, typename A>
// __global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
// unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
__global__ void accumulate_buckets_kernel(P *buckets, unsigned *bucket_offsets, unsigned *bucket_sizes, unsigned *single_bucket_indices, unsigned *point_indices, A *points, unsigned nof_buckets, unsigned *nof_buckets_to_compute, unsigned msm_idx_shift){
__global__ void accumulate_buckets_kernel(P *__restrict__ buckets, unsigned *__restrict__ bucket_offsets,
unsigned *__restrict__ bucket_sizes, unsigned *__restrict__ single_bucket_indices, unsigned *__restrict__ point_indices, A *__restrict__ points, unsigned nof_buckets, unsigned batch_size, unsigned msm_idx_shift){
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid ==0) printf("nof buckets to comp: %u", *nof_buckets_to_compute);
if (tid>=*nof_buckets_to_compute){
return;
}
unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
unsigned bucket_index = msm_index * nof_buckets + (single_bucket_indices[tid]&((1<<msm_idx_shift)-1));
unsigned bucket_size = bucket_sizes[tid];
if (tid>=nof_buckets*batch_size || bucket_size == 0){ //if the bucket is empty we don't need to continue
return;
}
unsigned bucket_offset = bucket_offsets[tid];
for (unsigned i = 0; i < bucket_sizes[tid]; i++) //add the relevant points starting from the relevant offset up to the bucket size
{
@@ -107,8 +101,7 @@ template <typename P>
__global__ void big_triangle_sum_kernel(P* buckets, P* final_sums, unsigned nof_bms, unsigned c){
unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid>=nof_bms) return;
// printf("%u",tid);
if (tid>nof_bms) return;
P line_sum = buckets[(tid+1)*(1<<c)-1];
final_sums[tid] = line_sum;
for (unsigned i = (1<<c)-2; i >0; i--)
@@ -154,21 +147,16 @@ __global__ void final_accumulation_kernel(P* final_sums, P* final_results, unsig
//this function computes msm using the bucket method
template <typename S, typename P, typename A>
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device) {
void bucket_method_msm(unsigned bitsize, unsigned c, S *h_scalars, A *h_points, unsigned size, P*h_final_result){
S *d_scalars;
A *d_points;
if (!on_device) {
//copy scalars and point to gpu
cudaMalloc(&d_scalars, sizeof(S) * size);
cudaMalloc(&d_points, sizeof(A) * size);
cudaMemcpy(d_scalars, scalars, sizeof(S) * size, cudaMemcpyHostToDevice);
cudaMemcpy(d_points, points, sizeof(A) * size, cudaMemcpyHostToDevice);
}
else {
d_scalars = scalars;
d_points = points;
}
//copy scalars and point to gpu
S *scalars;
A *points;
cudaMalloc(&scalars, sizeof(S) * size);
cudaMalloc(&points, sizeof(A) * size);
cudaMemcpy(scalars, h_scalars, sizeof(S) * size, cudaMemcpyHostToDevice);
cudaMemcpy(points, h_points, sizeof(A) * size, cudaMemcpyHostToDevice);
P *buckets;
//compute number of bucket modules and number of buckets in each module
@@ -180,7 +168,7 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
nof_bms++;
}
unsigned nof_buckets = nof_bms<<c;
cudaMalloc(&buckets, sizeof(P) * nof_buckets);
cudaMalloc(&buckets, sizeof(P) * nof_buckets);
// launch the bucket initialization kernel with maximum threads
unsigned NUM_THREADS = 1 << 10;
@@ -195,15 +183,13 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
//split scalars into digits
NUM_THREADS = 1 << 10;
NUM_BLOCKS = (size * (nof_bms+1) + NUM_THREADS - 1) / NUM_THREADS;
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size,
nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + size, point_indices + size, scalars, size, msm_log_size, nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
unsigned *sort_indices_temp_storage{};
size_t sort_indices_temp_storage_bytes;
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + size, bucket_indices,
point_indices + size, point_indices, size);
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
for (unsigned i = 0; i < nof_bms; i++) {
unsigned offset_out = i * size;
@@ -239,10 +225,10 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets);
cudaFree(offsets_temp_storage);
NUM_THREADS = 1 << 8;
//launch the accumulation kernel with maximum threads
NUM_THREADS = 1 << 8;
NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices,
d_points, nof_buckets, nof_buckets_to_compute, c+bm_bitsize);
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices, points, nof_buckets, 1, c+bm_bitsize);
#ifdef SSM_SUM
//sum each bucket
@@ -267,26 +253,19 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, final_results, nof_bms, c);
#endif
P* d_final_result;
if (!on_device)
cudaMalloc(&d_final_result, sizeof(P));
P* final_result;
cudaMalloc(&final_result, sizeof(P));
//launch the double and add kernel, a single thread
final_accumulation_kernel<P, S><<<1,1>>>(final_results, on_device ? final_result : d_final_result, 1, nof_bms, c);
final_accumulation_kernel<P, S><<<1,1>>>(final_results, final_result,1, nof_bms, c);
//copy final result to host
cudaDeviceSynchronize();
if (!on_device)
cudaMemcpy(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost);
std::cout<<"final res "<<(*final_result)<<std::endl;
cudaMemcpy(h_final_result, final_result, sizeof(P), cudaMemcpyDeviceToHost);
//free memory
if (!on_device) {
cudaFree(d_points);
cudaFree(d_scalars);
cudaFree(d_final_result);
}
cudaFree(buckets);
cudaFree(points);
cudaFree(scalars);
cudaFree(bucket_indices);
cudaFree(point_indices);
cudaFree(single_bucket_indices);
@@ -294,26 +273,23 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
cudaFree(nof_buckets_to_compute);
cudaFree(bucket_offsets);
cudaFree(final_results);
cudaFree(final_result);
}
//this function computes msm using the bucket method
template <typename S, typename P, typename A>
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device){
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *h_scalars, A *h_points, unsigned batch_size, unsigned msm_size, P*h_final_results){
//copy scalars and point to gpu
S *scalars;
A *points;
unsigned total_size = batch_size * msm_size;
S *d_scalars;
A *d_points;
if (!on_device) {
//copy scalars and point to gpu
cudaMalloc(&d_scalars, sizeof(S) * total_size);
cudaMalloc(&d_points, sizeof(A) * total_size);
cudaMemcpy(d_scalars, scalars, sizeof(S) * total_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_points, points, sizeof(A) * total_size, cudaMemcpyHostToDevice);
}
else {
d_scalars = scalars;
d_points = points;
}
cudaMalloc(&scalars, sizeof(S) * total_size);
cudaMalloc(&points, sizeof(A) * total_size);
cudaMemcpy(scalars, h_scalars, sizeof(S) * total_size, cudaMemcpyHostToDevice);
cudaMemcpy(points, h_points, sizeof(A) * total_size, cudaMemcpyHostToDevice);
P *buckets;
//compute number of bucket modules and number of buckets in each module
@@ -340,8 +316,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
//split scalars into digits
NUM_THREADS = 1 << 8;
NUM_BLOCKS = (total_size * nof_bms + msm_size + NUM_THREADS - 1) / NUM_THREADS;
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + msm_size, point_indices + msm_size, d_scalars, total_size,
msm_log_size, nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + msm_size, point_indices + msm_size, scalars, total_size, msm_log_size, nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
//sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
unsigned *sorted_bucket_indices;
@@ -354,6 +329,12 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
// for (unsigned i = 0; i < nof_bms*batch_size; i++) {
// unsigned offset_out = i * msm_size;
// unsigned offset_in = offset_out + msm_size;
// cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in,
// bucket_indices + offset_out, point_indices + offset_in, point_indices + offset_out, msm_size);
// }
cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
cudaFree(sort_indices_temp_storage);
@@ -387,8 +368,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
//launch the accumulation kernel with maximum threads
NUM_THREADS = 1 << 8;
NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
d_points, nof_buckets, total_nof_buckets_to_compute, c+bm_bitsize);
accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices, points, nof_buckets, batch_size, c+bm_bitsize);
#ifdef SSM_SUM
//sum each bucket
@@ -413,27 +393,21 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bm_sums, nof_bms*batch_size, c);
#endif
P* d_final_results;
if (!on_device)
cudaMalloc(&d_final_results, sizeof(P)*batch_size);
P* final_results;
cudaMalloc(&final_results, sizeof(P)*batch_size);
//launch the double and add kernel, a single thread for each msm
NUM_THREADS = 1<<8;
NUM_BLOCKS = (batch_size + NUM_THREADS - 1) / NUM_THREADS;
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS>>>(bm_sums, final_results, batch_size, nof_bms, c);
//copy final result to host
cudaDeviceSynchronize();
if (!on_device)
cudaMemcpy(final_results, d_final_results, sizeof(P)*batch_size, cudaMemcpyDeviceToHost);
cudaMemcpy(h_final_results, final_results, sizeof(P)*batch_size, cudaMemcpyDeviceToHost);
//free memory
if (!on_device) {
cudaFree(d_points);
cudaFree(d_scalars);
cudaFree(d_final_results);
}
cudaFree(buckets);
cudaFree(points);
cudaFree(scalars);
cudaFree(bucket_indices);
cudaFree(point_indices);
cudaFree(sorted_bucket_indices);
@@ -443,6 +417,7 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
cudaFree(total_nof_buckets_to_compute);
cudaFree(bucket_offsets);
cudaFree(bm_sums);
cudaFree(final_results);
}
@@ -459,6 +434,7 @@ __global__ void to_proj_kernel(A* affine_points, P* proj_points, unsigned N){
//the function computes msm using ssm
template <typename S, typename P, typename A>
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result){ //works up to 2^8
S *scalars;
A *a_points;
P *p_points;
@@ -503,12 +479,12 @@ void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result){ //w
template <typename A, typename S, typename P>
void reference_msm(S* scalars, A* a_points, unsigned size){
P *points = new P[size];
// P points[size];
P points[size];
for (unsigned i = 0; i < size ; i++)
{
points[i] = P::from_affine(a_points[i]);
}
P res = P::zero();
@@ -522,30 +498,60 @@ void reference_msm(S* scalars, A* a_points, unsigned size){
}
unsigned get_optimal_c(const unsigned size) {
if (size < 17)
return 1;
// return 15;
return ceil(log2(size))-4;
}
//this function is used to compute msms of size larger than 256
template <typename S, typename P, typename A>
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device){
unsigned c = get_optimal_c(size);
void large_msm(S* scalars, A* points, unsigned size, P* result){
unsigned c = 10;
// unsigned c = 6;
// unsigned bitsize = 32;
unsigned bitsize = 255;
bucket_method_msm(bitsize, c, scalars, points, size, result, on_device);
bucket_method_msm(bitsize, c, scalars, points, size, result);
}
// this function is used to compute a batches of msms of size larger than 256
template <typename S, typename P, typename A>
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device){
unsigned c = get_optimal_c(msm_size);
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result){
unsigned c = 10;
// unsigned c = 6;
// unsigned bitsize = 32;
unsigned bitsize = 255;
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device);
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result);
}
extern "C"
int msm_cuda(projective_t *out, affine_t points[],
scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
if (count>256){
large_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out);
}
else{
short_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out);
}
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<scalar_t, projective_t, affine_t>(scalars, points, batch_size, msm_size, out);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,22 +1,16 @@
#ifndef MSM_H
#define MSM_H
#pragma once
#include <stdexcept>
#include <cuda.h>
#include "../../primitives/projective.cuh"
#include "../../primitives/affine.cuh"
#include "../../curves/curve_config.cuh"
template <typename S, typename P, typename A>
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device);
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result);
template <typename S, typename P, typename A>
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device);
void large_msm(S* scalars, A* points, unsigned size, P* result);
template <typename S, typename P, typename A>
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device);
template <typename S, typename P, typename A>
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device);
template <typename S, typename P, typename A>
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device);
template <typename A, typename S, typename P>
void reference_msm(S* scalars, A* a_points, unsigned size);
#endif
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result);

View File

@@ -1,256 +0,0 @@
#include <iostream>
#include <chrono>
#include <vector>
#include "msm.cu"
#include "../../utils/cuda_utils.cuh"
#include "../../primitives/projective.cuh"
#include "../../primitives/field.cuh"
#include "../../curves/bls12_381/curve_config.cuh"
using namespace BLS12_381;
struct fake_point
{
unsigned val = 0;
__host__ __device__ inline fake_point operator+(fake_point fp) {
return {val+fp.val};
}
__host__ __device__ fake_point zero() {
fake_point p;
return p;
}
};
std::ostream& operator<<(std::ostream &strm, const fake_point &a) {
return strm <<a.val;
}
struct fake_scalar
{
unsigned val = 0;
unsigned bitsize = 32;
// __host__ __device__ unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width){
// return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
// }
__host__ __device__ int get_scalar_digit(int digit_num, int digit_width){
return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
}
__host__ __device__ inline fake_point operator*(fake_point fp) {
fake_point p1;
fake_point p2;
unsigned x = val;
if (x == 0) return fake_point().zero();
unsigned i = 1;
unsigned c_bit = (x & (1<<(bitsize-1)))>>(bitsize-1);
while (c_bit==0 && i<bitsize){
i++;
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
}
p1 = fp;
p2 = p1+p1;
while (i<bitsize){
i++;
c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
if (c_bit){
p1 = p1 + p2;
p2 = p2 + p2;
}
else {
p2 = p1 + p2;
p1 = p1 + p1;
}
}
return p1;
}
};
class Dummy_Scalar {
public:
static constexpr unsigned NBITS = 32;
unsigned x;
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
os << scalar.x;
return os;
}
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) {
return (x>>(digit_num*digit_width))&((1<<digit_width)-1);
}
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
return {p1.x+p2.x};
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
return (p1.x == p2.x);
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) {
return (p1.x == p2);
}
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
// return {Dummy_Scalar::neg(point.x)};
// }
static HOST_INLINE Dummy_Scalar rand_host() {
return {(unsigned)rand()};
}
};
class Dummy_Projective {
public:
Dummy_Scalar x;
static HOST_DEVICE_INLINE Dummy_Projective zero() {
return {0};
}
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
return {point.x};
}
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective &point) {
return {point.x};
}
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
// return {Dummy_Scalar::neg(point.x)};
// }
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
return {p1.x+p2.x};
}
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
// return p1 + neg(p2);
// }
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point) {
os << point.x;
return os;
}
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point) {
Dummy_Projective res = zero();
#ifdef CUDA_ARCH
#pragma unroll
#endif
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
if (i > 0) {
res = res + res;
}
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) {
res = res + point;
}
}
return res;
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2) {
return (p1.x == p2.x);
}
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective &point) {
return point.x == 0;
}
static HOST_INLINE Dummy_Projective rand_host() {
return {(unsigned)rand()};
}
};
//switch between dummy and real:
typedef scalar_t test_scalar;
typedef projective_t test_projective;
typedef affine_t test_affine;
// typedef Dummy_Scalar test_scalar;
// typedef Dummy_Projective test_projective;
// typedef Dummy_Projective test_affine;
int main()
{
unsigned batch_size = 4;
unsigned msm_size = 1<<15;
unsigned N = batch_size*msm_size;
test_scalar *scalars = new test_scalar[N];
test_affine *points = new test_affine[N];
for (unsigned i=0;i<N;i++){
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
// scalars[i] = test_scalar::rand_host();
// points[i] = test_projective::to_affine(test_projective::rand_host());
}
std::cout<<"finished generating"<<std::endl;
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
test_projective large_res[batch_size];
test_projective batched_large_res[batch_size];
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
// fake_point batched_large_res[256];
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
for (unsigned i=0;i<batch_size;i++){
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
// std::cout<<"final result large"<<std::endl;
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
}
auto begin = std::chrono::high_resolution_clock::now();
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
std::cout<<"final results batched large"<<std::endl;
bool success = true;
for (unsigned i = 0; i < batch_size; i++)
{
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
std::cout<<"good"<<std::endl;
}
else{
std::cout<<"miss"<<std::endl;
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
success = false;
}
}
if (success){
std::cout<<"success!"<<std::endl;
}
// std::cout<<batched_large_res[0]<<std::endl;
// std::cout<<batched_large_res[1]<<std::endl;
// std::cout<<projective_t::to_affine(batched_large_res[0])<<std::endl;
// std::cout<<projective_t::to_affine(batched_large_res[1])<<std::endl;
// std::cout<<"final result short"<<std::endl;
// std::cout<<pr<<std::endl;
return 0;
}

View File

@@ -1,182 +0,0 @@
#ifndef LDE
#define LDE
#include <cuda.h>
#include "ntt.cuh"
#include "lde.cuh"
#include "../vector_manipulation/ve_mod_mult.cuh"
/**
* Interpolate a batch of polynomials from their evaluations on the same subgroup.
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
* @param d_out The variable to write coefficients of the resulting polynomials into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
* @param d_evaluations Input array of evaluations of all polynomials of type E (elements).
* @param d_domain Domain on which the polynomials are evaluated. Must be a subgroup.
* @param n Length of `d_domain` array, also equal to the number of evaluations of each polynomial.
* @param batch_size The size of the batch; the length of `d_evaluations` is `n` * `batch_size`.
*/
template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size) {
uint32_t logn = uint32_t(log(n) / log(2));
cudaMemcpy(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice);
int NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int NUM_BLOCKS = batch_size * max(int((n / 2) / NUM_THREADS), 1);
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
}
NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, S::inv_log_size(logn));
return 0;
}
/**
* Interpolate a polynomial from its evaluations on a subgroup.
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
* @param d_out The variable to write coefficients of the resulting polynomial into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
* @param d_evaluations Input array of evaluations that have type E (elements).
* @param d_domain Domain on which the polynomial is evaluated. Must be a subgroup.
* @param n Length of `d_evaluations` and the size `d_domain` arrays (they should have equal length).
*/
template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n) {
return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1);
}
template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid < n) {
arr[tid] = val;
}
}
/**
* Evaluate a batch of polynomials on the same coset.
* @param d_out The evaluations of the polynomials on coset `u` * `d_domain`.
* @param d_coefficients Input array of coefficients of all polynomials of type E (elements) to be evaluated in-place on a coset.
* @param d_domain Domain on which the polynomials are evaluated (see `coset` flag). Must be a subgroup.
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
* @param n The number of coefficients, which might be different from `domain_size`.
* @param batch_size The size of the batch; the length of `d_coefficients` is `n` * `batch_size`.
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
*/
template <typename E, typename S>
int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers) {
uint32_t logn = uint32_t(log(domain_size) / log(2));
if (domain_size > n) {
// allocate and initialize an array of stream handles to parallelize data copying across batches
cudaStream_t *memcpy_streams = (cudaStream_t *) malloc(batch_size * sizeof(cudaStream_t));
for (unsigned i = 0; i < batch_size; i++)
{
cudaStreamCreate(&(memcpy_streams[i]));
cudaMemcpyAsync(&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
uint32_t NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
fill_array <E> <<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>> (&d_out[i * domain_size + n], E::zero(), domain_size - n);
cudaStreamSynchronize(memcpy_streams[i]);
cudaStreamDestroy(memcpy_streams[i]);
}
} else
cudaMemcpy(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice);
if (coset)
batch_vector_mult(coset_powers, d_out, domain_size, batch_size);
int NUM_THREADS = min(domain_size / 2, MAX_THREADS_BATCH);
int chunks = max(int((domain_size / 2) / NUM_THREADS), 1);
int NUM_BLOCKS = batch_size * chunks;
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
}
return 0;
}
/**
* Evaluate a polynomial on a coset.
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs, so the order of outputs is bit-reversed.
* @param d_out The evaluations of the polynomial on coset `u` * `d_domain`.
* @param d_coefficients Input array of coefficients of a polynomial of type E (elements).
* @param d_domain Domain on which the polynomial is evaluated (see `coset` flag). Must be a subgroup.
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
* @param n The number of coefficients, which might be different from `domain_size`.
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
*/
template <typename E, typename S>
int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers) {
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers);
}
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n) {
return interpolate(d_out, d_evaluations, d_domain, n);
}
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n) {
return interpolate(d_out, d_evaluations, d_domain, n);
}
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
S* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
S* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
S* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size) {
S* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
template <typename E, typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers) {
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers) {
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
#endif

View File

@@ -1,46 +0,0 @@
#ifndef LDE_H
#define LDE_H
#pragma once
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n);
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n);
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size);
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers);
template <typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers);
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers);
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers);
#endif

View File

@@ -0,0 +1,55 @@
#include <cuda.h>
#include "ntt.cuh"
extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ecntt_end2end(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ecntt_end2end_batch(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}

View File

@@ -1,10 +1,22 @@
#ifndef NTT
#define NTT
#pragma once
#include "../../curves/curve_config.cuh"
const uint32_t MAX_NUM_THREADS = 1024;
const uint32_t MAX_THREADS_BATCH = 256;
/**
* Copy twiddle factors array to device (returns a pointer to the device allocated array).
* @param twiddles input empty array.
* @param n_twiddles length of twiddle factors.
*/
scalar_t * copy_twiddle_factors_to_device(scalar_t * twiddles, uint32_t n_twiddles) {
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
scalar_t * d_twiddles;
cudaMalloc( & d_twiddles, size_twiddles);
cudaMemcpy(d_twiddles, twiddles, size_twiddles, cudaMemcpyHostToDevice);
return d_twiddles;
}
/**
* Computes the twiddle factors.
* Outputs: d_twiddles[i] = omega^i.
@@ -12,11 +24,11 @@ const uint32_t MAX_THREADS_BATCH = 256;
* @param n_twiddles number of twiddle factors.
* @param omega multiplying factor.
*/
template < typename S > __global__ void twiddle_factors_kernel(S * d_twiddles, uint32_t n_twiddles, S omega) {
__global__ void twiddle_factors_kernel(scalar_t * d_twiddles, uint32_t n_twiddles, scalar_t omega) {
for (uint32_t i = 0; i < n_twiddles; i++) {
d_twiddles[i] = S::zero();
d_twiddles[i] = scalar_t::zero();
}
d_twiddles[0] = S::one();
d_twiddles[0] = scalar_t::one();
for (uint32_t i = 0; i < n_twiddles - 1; i++) {
d_twiddles[i + 1] = omega * d_twiddles[i];
}
@@ -28,36 +40,37 @@ const uint32_t MAX_THREADS_BATCH = 256;
* @param n_twiddles number of twiddle factors.
* @param omega multiplying factor.
*/
template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega) {
size_t size_twiddles = n_twiddles * sizeof(S);
S * d_twiddles;
scalar_t * fill_twiddle_factors_array(uint32_t n_twiddles, scalar_t omega) {
size_t size_twiddles = n_twiddles * sizeof(scalar_t);
scalar_t * d_twiddles;
cudaMalloc( & d_twiddles, size_twiddles);
twiddle_factors_kernel<S> <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
twiddle_factors_kernel <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
return d_twiddles;
}
/**
* Returns the bit reversed order of a number.
* Returens the bit reversed order of a number.
* for example: on inputs num = 6 (110 in binary) and logn = 3
* the function should return 3 (011 in binary.)
* @param num some number with bit representation of size logn.
* @param logn length of bit representation of `num`.
* @return bit reveresed order or `num`.
* @param logn length of bit representation of num.
* @return bit reveresed order or num.
*/
__device__ __host__ uint32_t reverseBits(uint32_t num, uint32_t logn) {
unsigned int reverse_num = 0;
for (uint32_t i = 0; i < logn; i++) {
int i;
for (i = 0; i < logn; i++) {
if ((num & (1 << i))) reverse_num |= 1 << ((logn - 1) - i);
}
return reverse_num;
}
/**
* Returns the bit reversal ordering of the input array.
* Returens the bit reversal ordering of the input array.
* for example: on input ([a[0],a[1],a[2],a[3]], 4, 2) it returns
* [a[0],a[3],a[2],a[1]] (elements in indices 3,1 swhich places).
* @param arr array of some object of type T of size which is a power of 2.
* @param n length of `arr`.
* @param n length of arr.
* @param logn log(n).
* @return A new array which is the bit reversed version of input array.
*/
@@ -70,55 +83,14 @@ template < typename T > T * template_reverse_order(T * arr, uint32_t n, uint32_t
return arrReversed;
}
template < typename T > __global__ void reverse_order_kernel(T* arr, T* arr_reversed, uint32_t n, uint32_t logn, uint32_t batch_size) {
int threadId = (blockIdx.x * blockDim.x) + threadIdx.x;
if (threadId < n * batch_size) {
int idx = threadId % n;
int batch_idx = threadId / n;
int idx_reversed = __brev(idx) >> (32 - logn);
arr_reversed[batch_idx * n + idx_reversed] = arr[batch_idx * n + idx];
}
}
/**
* Bit-reverses a batch of input arrays in-place inside GPU.
* for example: on input array ([a[0],a[1],a[2],a[3]], 4, 2) it returns
* [a[0],a[3],a[2],a[1]] (elements at indices 3 and 1 swhich places).
* @param arr batch of arrays of some object of type T. Should be on GPU.
* @param n length of `arr`.
* @param logn log(n).
* @param batch_size the size of the batch.
*/
template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size) {
T* arr_reversed;
cudaMalloc(&arr_reversed, n * batch_size * sizeof(T));
int number_of_threads = MAX_THREADS_BATCH;
int number_of_blocks = (n * batch_size + number_of_threads - 1) / number_of_threads;
reverse_order_kernel <<<number_of_blocks, number_of_threads>>> (arr, arr_reversed, n, logn, batch_size);
cudaMemcpy(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice);
cudaFree(arr_reversed);
}
/**
* Bit-reverses an input array in-place inside GPU.
* for example: on array ([a[0],a[1],a[2],a[3]], 4, 2) it returns
* [a[0],a[3],a[2],a[1]] (elements at indices 3 and 1 swhich places).
* @param arr array of some object of type T of size which is a power of 2. Should be on GPU.
* @param n length of `arr`.
* @param logn log(n).
*/
template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn) {
reverse_order_batch(arr, n, logn, 1);
}
/**
* Cooley-Tukey butterfly kernel.
* Cooley-Tuckey butterfly kernel.
* @param arr array of objects of type E (elements).
* @param twiddles array of twiddle factors of type S (scalars).
* @param n size of arr.
* @param n_twiddles size of omegas.
* @param m "pair distance" - indicate distance of butterflies inputs.
* @param i Cooley-Tukey FFT stage number.
* @param i Cooley-TUckey FFT stage number.
* @param max_thread_num maximal number of threads in stage.
*/
template < typename E, typename S > __global__ void template_butterfly_kernel(E * arr, S * twiddles, uint32_t n, uint32_t n_twiddles, uint32_t m, uint32_t i, uint32_t max_thread_num) {
@@ -134,34 +106,35 @@ template < typename E, typename S > __global__ void template_butterfly_kernel(E
}
/**
* Multiply the elements of an input array by a scalar in-place.
* @param arr input array.
* @param n size of arr.
* @param n_inv scalar of type S (scalar).
* Set the elements of arr to be the elements of res multiplied by some scalar.
* @param arr input array.
* @param res result array.
* @param n size of arr.
* @param n_inv scalar of type S (scalar).
*/
template < typename E, typename S > __global__ void template_normalize_kernel(E * arr, uint32_t n, S scalar) {
template < typename E, typename S > __global__ void template_normalize_kernel(E * arr, E * res, uint32_t n, S scalar) {
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid < n) {
arr[tid] = scalar * arr[tid];
res[tid] = scalar * arr[tid];
}
}
/**
* Cooley-Tukey NTT.
* Cooley-Tuckey NTT.
* NOTE! this function assumes that d_arr and d_twiddles are located in the device memory.
* @param d_arr input array of type E (elements) allocated on the device memory.
* @param d_arr input array of type E (elements) allocated on the device memory.
* @param n length of d_arr.
* @param logn log(n).
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
* @param logn log(n).
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
*/
template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles) {
uint32_t m = 2;
for (uint32_t s = 0; s < logn; s++) {
for (uint32_t i = 0; i < n; i += m) {
uint32_t shifted_m = m >> 1;
uint32_t number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
uint32_t number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
int shifted_m = m >> 1;
int number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
int number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
}
m <<= 1;
@@ -169,7 +142,7 @@ template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr
}
/**
* Cooley-Tukey NTT.
* Cooley-Tuckey NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type E (elements).
* @param n length of d_arr.
@@ -185,10 +158,10 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
cudaMalloc( & d_arrReversed, size_E);
cudaMemcpy(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice);
template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles);
if (inverse) {
if (inverse == true) {
int NUM_THREADS = MAX_NUM_THREADS;
int NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arrReversed, n, S::inv_log_size(logn));
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arrReversed, d_arrReversed, n, S::inv_log_size(logn));
}
cudaMemcpy(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost);
cudaFree(d_arrReversed);
@@ -196,22 +169,73 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
}
/**
* Cooley-Tukey (scalar) NTT.
* @param arr input array of type E (element).
* Cooley-Tuckey Elliptic Curve NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type projective_t.
* @param n length of d_arr.
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
projective_t * ecntt(projective_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
return ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
}
/**
* Cooley-Tuckey (scalar) NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type scalar_t.
* @param n length of d_arr.
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
scalar_t * ntt(scalar_t * arr, uint32_t n, scalar_t * d_twiddles, uint32_t n_twiddles, bool inverse) {
return ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
}
/**
* Cooley-Tuckey (scalar) NTT.
* @param arr input array of type scalar_t.
* @param n length of d_arr.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse) {
extern "C" uint32_t ntt_end2end(scalar_t * arr, uint32_t n, bool inverse) {
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
scalar_t * result = ntt_template < scalar_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
for(int i = 0; i < n; i++){
arr[i] = result[i];
}
cudaFree(d_twiddles);
return 0;
}
/**
* Cooley-Tuckey (scalar) NTT.
* @param arr input array of type projective_t.
* @param n length of d_arr.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
extern "C" uint32_t ecntt_end2end(projective_t * arr, uint32_t n, bool inverse) {
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n;
S * twiddles = new S[n_twiddles];
S * d_twiddles;
scalar_t * twiddles = new scalar_t[n_twiddles];
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse);
projective_t * result = ntt_template < projective_t, scalar_t > (arr, n, d_twiddles, n_twiddles, inverse);
for(int i = 0; i < n; i++){
arr[i] = result[i];
}
@@ -229,20 +253,20 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
* @param logn log(n).
* @param task log(n).
*/
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
for (uint32_t i = 0; i < n; i++) {
uint32_t reversed = reverseBits(i, logn);
if (reversed > i) {
T tmp = arr[task * n + i];
arr[task * n + i] = arr[task * n + reversed];
arr[task * n + reversed] = tmp;
}
uint32_t reversed = reverseBits(i, logn);
if (reversed > i) {
T tmp = arr[task * n + i];
arr[task * n + i] = arr[task * n + reversed];
arr[task * n + reversed] = tmp;
}
}
}
/**
* Cooley-Tukey butterfly kernel.
* Cooley-Tuckey butterfly kernel.
* @param arr array of objects of type E (elements).
* @param twiddles array of twiddle factors of type S (scalars).
* @param n size of arr.
@@ -261,7 +285,7 @@ template < typename E, typename S > __device__ __host__ void butterfly(E * arrRe
}
/**
* Cooley-Tukey NTT.
* Cooley-Tuckey NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type E (elements).
* @param n length of d_arr.
@@ -271,7 +295,7 @@ template < typename E, typename S > __device__ __host__ void butterfly(E * arrRe
* @param s log2(n) loop index.
*/
template <typename E, typename S>
__global__ void ntt_template_kernel(E *arr, uint32_t n, S *twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t s, bool rev)
__global__ void ntt_template_kernel(E *arr, uint32_t n, S *twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t s)
{
int task = blockIdx.x;
int chunks = n / (blockDim.x * 2);
@@ -298,18 +322,16 @@ __global__ void ntt_template_kernel(E *arr, uint32_t n, S *twiddles, uint32_t n_
uint32_t offset = (task / chunks) * n;
E u = arr[offset + i + j];
E v = rev ? arr[offset + k] : twiddles[j * n_twiddles_div] * arr[offset + k];
E v = twiddles[j * n_twiddles_div] * arr[offset + k];
arr[offset + i + j] = u + v;
arr[offset + k] = u - v;
if (rev)
arr[offset + k] = twiddles[j * n_twiddles_div] * arr[offset + k];
}
}
}
/**
* Cooley-Tukey NTT.
* Cooley-Tuckey NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type E (elements).
* @param n length of arr.
@@ -327,32 +349,34 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
}
}
//TODO: batch ntt and ecntt can be unified into batch_template
/**
* Cooley-Tukey (scalar) NTT.
* Cooley-Tuckey (scalar) NTT.
* This is a bached version - meaning it assumes than the input array
* consists of N arrays of size n. The function performs n-size NTT on each small array.
* @param arr input array of type BLS12_381::scalar_t.
* @param arr input array of type scalar_t.
* @param arr_size number of total elements = n * N.
* @param n size of batch.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse) {
extern "C" uint32_t ntt_end2end_batch(scalar_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
int batches = int(arr_size / n);
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(E);
S * d_twiddles;
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(scalar_t);
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
E * d_arr;
scalar_t * d_arr;
cudaMalloc( & d_arr, size_E);
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
int NUM_THREADS = MAX_THREADS_BATCH;
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
ntt_template_kernel_rev_ord<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int chunks = max(int((n / 2) / NUM_THREADS), 1);
@@ -361,13 +385,14 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
ntt_template_kernel<scalar_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s);
}
if (inverse == true)
{
NUM_THREADS = MAX_NUM_THREADS;
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, S::inv_log_size(logn));
template_normalize_kernel < scalar_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, d_arr, arr_size, scalar_t::inv_log_size(logn));
}
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
cudaFree(d_arr);
@@ -375,4 +400,51 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
return 0;
}
#endif
/**
* Cooley-Tuckey (scalar) NTT.
* This is a bached version - meaning it assumes than the input array
* consists of N arrays of size n. The function performs n-size NTT on each small array.
* @param arr input array of type scalar_t.
* @param arr_size number of total elements = n * N.
* @param n size of batch.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
extern "C" uint32_t ecntt_end2end_batch(projective_t * arr, uint32_t arr_size, uint32_t n, bool inverse) {
int batches = int(arr_size / n);
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(projective_t);
scalar_t * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega_inv(logn));
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, scalar_t::omega(logn));
}
projective_t * d_arr;
cudaMalloc( & d_arr, size_E);
cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
int NUM_THREADS = MAX_THREADS_BATCH;
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
ntt_template_kernel_rev_ord<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int chunks = max(int((n / 2) / NUM_THREADS), 1);
int total_tasks = batches * chunks;
NUM_BLOCKS = total_tasks;
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel<projective_t, scalar_t><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s);
}
if (inverse == true)
{
NUM_THREADS = MAX_NUM_THREADS;
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
// TODO: no correctnes when swapped NUM_THREADS, NUM_BLOCKS
template_normalize_kernel < projective_t, scalar_t > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, d_arr, arr_size, scalar_t::inv_log_size(logn));
}
cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
cudaFree(d_arr);
cudaFree(d_twiddles);
return 0;
}

View File

@@ -0,0 +1,226 @@
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "../../curves/curve_config.cuh"
#include "ve_mod_mult.cuh"
#define MAX_THREADS_PER_BLOCK 256
// TODO: headers for prototypes and .c .cpp .cu files for implementations
template <typename E, typename S>
__global__ void vectorModMult(S *scalar_vec, E *element_vec, E *result, size_t n_elments)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_elments)
{
result[tid] = scalar_vec[tid] * element_vec[tid];
}
}
template <typename E, typename S>
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments) // TODO: in place so no need for third result vector
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Allocate memory on the device for the input vectors, the output vector, and the modulus
S *d_vec_a;
E *d_vec_b, *d_result;
cudaMalloc(&d_vec_a, n_elments * sizeof(S));
cudaMalloc(&d_vec_b, n_elments * sizeof(E));
cudaMalloc(&d_result, n_elments * sizeof(E));
// Copy the input vectors and the modulus from the host to the device
cudaMemcpy(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice);
cudaMemcpy(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice);
// Call the kernel to perform element-wise modular multiplication
vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);
cudaMemcpy(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost);
cudaFree(d_vec_a);
cudaFree(d_vec_b);
cudaFree(d_result);
return 0;
}
template <typename E>
__global__ void matrixVectorMult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < dim)
{
result[tid] = E::zero();
for (int i = 0; i < dim; i++)
result[tid] = result[tid] + matrix_elements[tid * dim + i] * vector_elements[i];
}
}
template <typename E>
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Allocate memory on the device for the input vectors, the output vector, and the modulus
E *d_matrix, *d_vector, *d_result;
cudaMalloc(&d_matrix, (dim * dim) * sizeof(E));
cudaMalloc(&d_vector, dim * sizeof(E));
cudaMalloc(&d_result, dim * sizeof(E));
// Copy the input vectors and the modulus from the host to the device
cudaMemcpy(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice);
cudaMemcpy(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice);
// Call the kernel to perform element-wise modular multiplication
matrixVectorMult<<<num_blocks, threads_per_block>>>(d_matrix, d_vector, d_result, dim);
cudaMemcpy(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost);
cudaFree(d_matrix);
cudaFree(d_vector);
cudaFree(d_result);
return 0;
}
template <typename E, typename S>
__global__ void batch_vector_mult_kernel(E *element_vec, S *mult_vec, unsigned n_mult, unsigned batch_size)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_mult * batch_size)
{
int mult_id = tid % n_mult;
element_vec[tid] = mult_vec[mult_id] * element_vec[tid];
}
}
template <typename E, typename S>
int batch_vector_mult_template(E *element_vec, S *mult_vec, unsigned n_mult, unsigned batch_size)
{
// Set the grid and block dimensions
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
int NUM_BLOCKS = (n_mult * batch_size + NUM_THREADS - 1) / NUM_THREADS;
// Allocate memory on the device for the input vectors, the output vector, and the modulus
S *d_mult_vec;
E *d_element_vec;
size_t n_mult_size = n_mult * sizeof(S);
size_t full_size = n_mult * batch_size * sizeof(E);
cudaMalloc(&d_mult_vec, n_mult_size);
cudaMalloc(&d_element_vec, full_size);
// Copy the input vectors and the modulus from the host to the device
cudaMemcpy(d_mult_vec, mult_vec, n_mult_size, cudaMemcpyHostToDevice);
cudaMemcpy(d_element_vec, element_vec, full_size, cudaMemcpyHostToDevice);
batch_vector_mult_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(d_element_vec, d_mult_vec, n_mult, batch_size);
cudaMemcpy(element_vec, d_element_vec, full_size, cudaMemcpyDeviceToHost);
cudaFree(d_mult_vec);
cudaFree(d_element_vec);
return 0;
}
extern "C" int32_t batch_vector_mult_proj_cuda(projective_t *inout,
scalar_t *scalar_vec,
size_t n_scalars,
size_t batch_size,
size_t device_id)
{
try
{
// TODO: device_id
batch_vector_mult_template(inout, scalar_vec, n_scalars, batch_size);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t batch_vector_mult_scalar_cuda(scalar_t *inout,
scalar_t *mult_vec,
size_t n_mult,
size_t batch_size,
size_t device_id)
{
try
{
// TODO: device_id
batch_vector_mult_template(inout, mult_vec, n_mult, batch_size);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_point(projective_t *inout,
scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<projective_t, scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
scalar_t *scalar_vec,
size_t n_elments,
size_t device_id) //TODO: unify with batch mult as batch_size=1
{
try
{
// TODO: device_id
vector_mod_mult<scalar_t, scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
scalar_t *input,
scalar_t *output,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}

View File

@@ -1,113 +1,15 @@
#ifndef VEC_MULT
#define VEC_MULT
#pragma once
#include <stdexcept>
#include <cuda.h>
#include "../../primitives/field.cuh"
#include "../../curves/curve_config.cuh"
#include "../../primitives/projective.cuh"
#define MAX_THREADS_PER_BLOCK 256
// TODO: headers for prototypes and .c .cpp .cu files for implementations
template <typename E, typename S>
__global__ void vectorModMult(S *scalar_vec, E *element_vec, E *result, size_t n_elments)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_elments)
{
result[tid] = scalar_vec[tid] * element_vec[tid];
}
}
template <typename E, typename S>
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments) // TODO: in place so no need for third result vector
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Allocate memory on the device for the input vectors, the output vector, and the modulus
S *d_vec_a;
E *d_vec_b, *d_result;
cudaMalloc(&d_vec_a, n_elments * sizeof(S));
cudaMalloc(&d_vec_b, n_elments * sizeof(E));
cudaMalloc(&d_result, n_elments * sizeof(E));
// Copy the input vectors and the modulus from the host to the device
cudaMemcpy(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice);
cudaMemcpy(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice);
// Call the kernel to perform element-wise modular multiplication
vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);
cudaMemcpy(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost);
cudaFree(d_vec_a);
cudaFree(d_vec_b);
cudaFree(d_result);
return 0;
}
template <typename E, typename S>
__global__ void batchVectorMult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_scalars * batch_size)
{
int scalar_id = tid % n_scalars;
element_vec[tid] = scalar_vec[scalar_id] * element_vec[tid];
}
}
template <typename E, typename S>
int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size)
{
// Set the grid and block dimensions
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS>>>(scalar_vec, element_vec, n_scalars, batch_size);
return 0;
}
int vector_mod_mult(S *scalar_vec, E *element_vec, E *result, size_t n_elments);
template <typename E>
__global__ void matrixVectorMult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < dim)
{
result[tid] = E::zero();
for (int i = 0; i < dim; i++)
result[tid] = result[tid] + matrix_elements[tid * dim + i] * vector_elements[i];
}
}
template <typename E>
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Allocate memory on the device for the input vectors, the output vector, and the modulus
E *d_matrix, *d_vector, *d_result;
cudaMalloc(&d_matrix, (dim * dim) * sizeof(E));
cudaMalloc(&d_vector, dim * sizeof(E));
cudaMalloc(&d_result, dim * sizeof(E));
// Copy the input vectors and the modulus from the host to the device
cudaMemcpy(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice);
cudaMemcpy(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice);
// Call the kernel to perform element-wise modular multiplication
matrixVectorMult<<<num_blocks, threads_per_block>>>(d_matrix, d_vector, d_result, dim);
cudaMemcpy(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost);
cudaFree(d_matrix);
cudaFree(d_vector);
cudaFree(d_result);
return 0;
}
#endif
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim);

View File

@@ -1,22 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BLS12_377 {
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_377::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_re },
point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}

View File

@@ -1,308 +0,0 @@
#ifndef _BLS12_377_LDE
#define _BLS12_377_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_evaluations, BLS12_377::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_evaluations, BLS12_377::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,87 +0,0 @@
#ifndef _BLS12_377_MSM
#define _BLS12_377_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,155 +0,0 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BLS12_377{
struct fp_config{
static constexpr unsigned limbs_count = 8;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
static constexpr unsigned modulus_bits_count = 253;
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega4= {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega5= {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291, 0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
static constexpr storage<limbs_count> omega6= {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34, 0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
static constexpr storage<limbs_count> omega7= {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1, 0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
static constexpr storage<limbs_count> omega8= {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111, 0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
static constexpr storage<limbs_count> omega9= {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244, 0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
static constexpr storage<limbs_count> omega10= {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4, 0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
static constexpr storage<limbs_count> omega11= {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645, 0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
static constexpr storage<limbs_count> omega12= {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb, 0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
static constexpr storage<limbs_count> omega13= {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc, 0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
static constexpr storage<limbs_count> omega14= {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f, 0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
static constexpr storage<limbs_count> omega15= {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557, 0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
static constexpr storage<limbs_count> omega16= {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112, 0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv4= {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv5= {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63, 0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
static constexpr storage<limbs_count> omega_inv6= {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f, 0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
static constexpr storage<limbs_count> omega_inv7= {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6, 0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
static constexpr storage<limbs_count> omega_inv8= {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd, 0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
static constexpr storage<limbs_count> omega_inv9= {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b, 0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
static constexpr storage<limbs_count> omega_inv10= {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a, 0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
static constexpr storage<limbs_count> omega_inv11= {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69, 0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
static constexpr storage<limbs_count> omega_inv12= {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c, 0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
static constexpr storage<limbs_count> omega_inv13= {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3, 0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
static constexpr storage<limbs_count> omega_inv14= {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2, 0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
static constexpr storage<limbs_count> omega_inv15= {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3, 0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
static constexpr storage<limbs_count> omega_inv16= {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016, 0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
static constexpr storage<limbs_count> omega_inv17= {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571, 0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
static constexpr storage<limbs_count> omega_inv18= {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894, 0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
static constexpr storage<limbs_count> omega_inv19= {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c, 0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
static constexpr storage<limbs_count> omega_inv20= {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec, 0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
static constexpr storage<limbs_count> omega_inv21= {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc, 0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
static constexpr storage<limbs_count> omega_inv22= {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024, 0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
static constexpr storage<limbs_count> omega_inv23= {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7, 0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
static constexpr storage<limbs_count> omega_inv24= {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db, 0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
static constexpr storage<limbs_count> omega_inv25= {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3, 0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
static constexpr storage<limbs_count> omega_inv26= {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e, 0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
static constexpr storage<limbs_count> omega_inv27= {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1, 0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
static constexpr storage<limbs_count> omega_inv28= {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697, 0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
static constexpr storage<limbs_count> omega_inv29= {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf, 0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
static constexpr storage<limbs_count> inv4= {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
static constexpr storage<limbs_count> inv5= {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
static constexpr storage<limbs_count> inv6= {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
static constexpr storage<limbs_count> inv7= {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
static constexpr storage<limbs_count> inv8= {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
static constexpr storage<limbs_count> inv9= {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
static constexpr storage<limbs_count> inv10= {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
static constexpr storage<limbs_count> inv11= {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
static constexpr storage<limbs_count> inv12= {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
static constexpr storage<limbs_count> inv13= {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
static constexpr storage<limbs_count> inv14= {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
static constexpr storage<limbs_count> inv15= {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
static constexpr storage<limbs_count> inv16= {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
static constexpr storage<limbs_count> inv17= {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
static constexpr storage<limbs_count> inv18= {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
static constexpr storage<limbs_count> inv19= {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
static constexpr storage<limbs_count> inv20= {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
static constexpr storage<limbs_count> inv21= {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
static constexpr storage<limbs_count> inv22= {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
static constexpr storage<limbs_count> inv23= {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
static constexpr storage<limbs_count> inv24= {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
static constexpr storage<limbs_count> inv25= {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
static constexpr storage<limbs_count> inv26= {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
static constexpr storage<limbs_count> inv27= {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
static constexpr storage<limbs_count> inv28= {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
static constexpr storage<limbs_count> inv29= {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
};
struct fq_config{
static constexpr unsigned limbs_count = 12;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
static constexpr unsigned modulus_bits_count = 377;
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
static constexpr storage<limbs_count> generator_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// TODO: correct parameters for G2 here
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
}

View File

@@ -1,22 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
}
#endif

View File

@@ -1,5 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,66 +0,0 @@
#ifndef _BLS12_377_VEC_MULT
#define _BLS12_377_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
BLS12_377::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
BLS12_377::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_flattened,
BLS12_377::scalar_t *input,
BLS12_377::scalar_t *output,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

176
icicle/curves/bls12_381.cuh Normal file
View File

@@ -0,0 +1,176 @@
#pragma once
#include "../utils/storage.cuh"
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8;
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
static constexpr unsigned modulus_bits_count = 255;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
// Quick fix for linking issue
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
};
struct fq_config {
// field structure size = 12 * 32 bit
static constexpr unsigned limbs_count = 12;
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
static constexpr unsigned modulus_bits_count = 381;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
};
struct group_generator {
static constexpr storage<fq_config::limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
static constexpr storage<fq_config::limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
};
static constexpr unsigned weierstrass_b = 4;

View File

@@ -1,22 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BLS12_381 {
typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_381::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_re },
point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}

View File

@@ -1,308 +0,0 @@
#ifndef _BLS12_381_LDE
#define _BLS12_381_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_evaluations, BLS12_381::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_evaluations, BLS12_381::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BLS12_381::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,89 +0,0 @@
#ifndef _BLS12_381_MSM
#define _BLS12_381_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[],
BLS12_381::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381::affine_t points[],
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,193 +0,0 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BLS12_381{
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8;
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
static constexpr unsigned modulus_bits_count = 255;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
// static constexpr storage<limbs_count> inv[32]={ {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9}, {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e}, {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268}, {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd}, {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18}, {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5}, {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04}, {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab}, {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f}, {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9}, {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e}, {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878}, {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5}, {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c}, {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77}, {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365}, {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c}, {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57}, {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5}, {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014}, {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3}, {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583}, {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b}, {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df}, {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719}, {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736}, {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744}, {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b}, {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f}, {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751}, {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752}, {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}};
// Quick fix for linking issue
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
};
struct fq_config {
// field structure size = 12 * 32 bit
static constexpr unsigned limbs_count = 12;
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2*limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
static constexpr unsigned modulus_bits_count = 381;
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
static constexpr storage<limbs_count> generator_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
static constexpr storage<limbs_count> generator_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0xbac0326, 0x7ae3d177, 0xb4510b64,
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x24aa2b2};
static constexpr storage<limbs_count> generator_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<limbs_count> generator_y_re = {0x8b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0xce5d527};
static constexpr storage<limbs_count> generator_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x606c4a0};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
}

View File

@@ -1,19 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
}
#endif

View File

@@ -1,4 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,65 +0,0 @@
#ifndef _BLS12_381_VEC_MULT
#define _BLS12_381_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
BLS12_381::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_bls12_381(BLS12_381::scalar_t *inout,
BLS12_381::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bls12_381(BLS12_381::scalar_t *matrix_flattened,
BLS12_381::scalar_t *input,
BLS12_381::scalar_t *output,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

View File

@@ -1,22 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BN254 {
typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BN254::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_BN254::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BN254::weierstrass_b_g2_re },
point_field_t{ PARAMS_BN254::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}

View File

@@ -1,308 +0,0 @@
#ifndef _BN254_LDE
#define _BN254_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BN254::scalar_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<BN254::projective_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BN254::scalar_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<BN254::projective_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_evaluations, BN254::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_evaluations, BN254::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
BN254::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
unsigned n, BN254::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
unsigned n, BN254::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bn254(BN254::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,87 +0,0 @@
#ifndef _BN254_MSM
#define _BN254_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
BN254::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t points[],
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,155 +0,0 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BN254{
struct fp_config{
static constexpr unsigned limbs_count = 8;
////
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
static constexpr storage<limbs_count> omega_inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega_inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega_inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega_inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega_inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega_inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega_inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega_inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega_inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega_inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega_inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega_inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega_inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega_inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega_inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega_inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
////
static constexpr storage<limbs_count> inv17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> inv18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> inv19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> inv20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> inv21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> inv22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> inv23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> inv24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> inv25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> inv26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> inv27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> inv28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> inv29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> inv30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> inv31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> inv32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
////
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bits_count = 254;
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega2= {0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega3= {0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1};
static constexpr storage<limbs_count> omega4= {0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2};
static constexpr storage<limbs_count> omega5= {0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6};
static constexpr storage<limbs_count> omega6= {0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d};
static constexpr storage<limbs_count> omega7= {0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd};
static constexpr storage<limbs_count> omega8= {0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561};
static constexpr storage<limbs_count> omega9= {0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e};
static constexpr storage<limbs_count> omega10= {0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1};
static constexpr storage<limbs_count> omega11= {0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584};
static constexpr storage<limbs_count> omega12= {0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596};
static constexpr storage<limbs_count> omega13= {0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49};
static constexpr storage<limbs_count> omega14= {0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651};
static constexpr storage<limbs_count> omega15= {0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f};
static constexpr storage<limbs_count> omega16= {0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb};
static constexpr storage<limbs_count> omega_inv1= {0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> omega_inv2= {0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711};
static constexpr storage<limbs_count> omega_inv4= {0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf};
static constexpr storage<limbs_count> omega_inv5= {0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136};
static constexpr storage<limbs_count> omega_inv6= {0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2};
static constexpr storage<limbs_count> omega_inv7= {0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053};
static constexpr storage<limbs_count> omega_inv8= {0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327};
static constexpr storage<limbs_count> omega_inv9= {0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821};
static constexpr storage<limbs_count> omega_inv10= {0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7};
static constexpr storage<limbs_count> omega_inv11= {0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21};
static constexpr storage<limbs_count> omega_inv12= {0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2};
static constexpr storage<limbs_count> omega_inv13= {0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58};
static constexpr storage<limbs_count> omega_inv14= {0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f};
static constexpr storage<limbs_count> omega_inv15= {0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766};
static constexpr storage<limbs_count> omega_inv16= {0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7};
static constexpr storage<limbs_count> inv1= {0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739};
static constexpr storage<limbs_count> inv2= {0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6};
static constexpr storage<limbs_count> inv3= {0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4};
static constexpr storage<limbs_count> inv4= {0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b};
static constexpr storage<limbs_count> inv5= {0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff};
static constexpr storage<limbs_count> inv6= {0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39};
static constexpr storage<limbs_count> inv7= {0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5};
static constexpr storage<limbs_count> inv8= {0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24};
static constexpr storage<limbs_count> inv9= {0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b};
static constexpr storage<limbs_count> inv10= {0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f};
static constexpr storage<limbs_count> inv11= {0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9};
static constexpr storage<limbs_count> inv12= {0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d};
static constexpr storage<limbs_count> inv13= {0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50};
static constexpr storage<limbs_count> inv14= {0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1};
static constexpr storage<limbs_count> inv15= {0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa};
static constexpr storage<limbs_count> inv16= {0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e};
};
struct fq_config{
static constexpr unsigned limbs_count = 8;
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522, 0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45, 0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
static constexpr storage<2*limbs_count> modulus_wide = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_sqared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
static constexpr storage<2*limbs_count> modulus_sqared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
static constexpr storage<2*limbs_count> modulus_sqared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
static constexpr unsigned modulus_bits_count = 254;
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17, 0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> generator_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_x_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_x_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> generator_y_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// TODO: correct parameters for G2 here
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
}

View File

@@ -1,19 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
}
#endif

View File

@@ -1,4 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,72 +0,0 @@
#ifndef _BN254_VEC_MULT
#define _BN254_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
BN254::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_bn254(BN254::scalar_t *inout,
BN254::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bn254(BN254::scalar_t *matrix_flattened,
BN254::scalar_t *input,
BN254::scalar_t *output,
size_t n_elments,
size_t device_id)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try
{
// TODO: device_id
matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

View File

@@ -0,0 +1,14 @@
#pragma once
#include "../primitives/field.cuh"
#include "../primitives/projective.cuh"
#include "bls12_381.cuh"
// #include "bn254.cuh"
typedef Field<fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, group_generator, weierstrass_b> projective_t;
typedef Affine<point_field_t> affine_t;

View File

@@ -0,0 +1,86 @@
#pragma once
#include "../utils/storage.cuh"
// y^2 = weierstrass_a * x^3 + weierstrass_b
static constexpr unsigned weierstrass_b = 4;
// a generator of the elliptic curve group
struct group_generator {
static constexpr storage<fq_config::limbs_count> generator_x = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> generator_y = {0x4abe706c, 0x5ea93e35, 0x00e1de5d, 0x6346b8ed, 0x92848344, 0xda9dd85e,
0xc9926b26, 0xc760f988, 0xf3763e9b, 0xb33cffc3, 0xd40d6212, 0x0a989bad};
};
/// SCALAR FIELD
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8; // array size of 32bit int fo form a field element
static constexpr unsigned modulus_bits_count = 255; // field bit size
// field modulus split into array, ordered in Little-Endian
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513 -> 0x73eda753299d7d483339d80809a1d80553bda402fffe5bfeffffffff00000001
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// Scalar specific
static constexpr storage<limbs_count> omega = {0xa5d36306, 0xe206da11, 0x378fbf96, 0x0ad1347b, 0xe0f8245f, 0xfc3e8acf, 0xa0f704f4, 0x564c0a11};
static constexpr storage<limbs_count> omega_inv = {3629396834, 2518295853, 1679307267, 1346818424, 3118225798, 1256349690, 3322524792, 958081110};
static constexpr storage<limbs_count> inv_2 = {2147483649,2147483647,2147429887,2849952257,80800770,429714436,2496577188,972477353};
static constexpr storage<limbs_count> inv_4 = {1073741825,1073741823,1073661183,4274928386,121201155,644571654,1597382134,1458716030};
static constexpr storage<limbs_count> inv_256 = {16777217,16777215,4244528547,1315563102,26752557,3943079472,3597918154,1937357227};
static constexpr storage<limbs_count> inv_512 = {8388609,8388607,4269694161,1360250160,94177049,2401254172,2148052617,1941155967};
static constexpr storage<limbs_count> inv_4096 = {1048577,1074790399,3217972249,3546834984,2300657127,1589027946,3026903920,1944479864};
};
/// BASE FIELD
struct fq_config {
// field structure size = 12 * 32 bit
static constexpr unsigned limbs_count = 12; // array size of 32bit int fo form a field element
static constexpr unsigned modulus_bits_count = 381; // field bit size
// field modulus split into array, ordered in Little-Endian
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787 -> 0x1a0111ea397fe69a4b1ba7b6434bacd764774b84f38512bf6730d2a0f6b0f6241eabfffeb153ffffb9feffffffffaaab
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<2 * limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2 * limbs_count> modulus_sqared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
// m = floor(2^(2*modulus_bits_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
};

View File

@@ -1,14 +0,0 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "params.cuh"
namespace BN254 {
typedef Field<CURVE_NAME_U::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<CURVE_NAME_U::fq_config> point_field_t;
typedef Projective<point_field_t, scalar_field_t, CURVE_NAME_U::group_generator, CURVE_NAME_U::weierstrass_b> projective_t;
typedef Affine<point_field_t> affine_t;
}

View File

@@ -1,308 +0,0 @@
#ifndef _CURVE_NAME_U_LDE
#define _CURVE_NAME_U_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "curve_config.cuh"
extern "C" CURVE_NAME_U::scalar_t* build_domain_cuda_CURVE_NAME_L(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
{
try
{
if (inverse) {
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega_inv(logn));
} else {
return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega(logn));
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
{
try
{
return ntt_end2end_batch_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, size_t device_id = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0)
{
try
{
CURVE_NAME_U::scalar_t* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, unsigned device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
unsigned n, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
{
try
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order(arr, n, logn);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, int batch_size, size_t device_id = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
reverse_order_batch(arr, n, logn, batch_size);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,94 +0,0 @@
#ifndef _CURVE_NAME_U_MSM
#define _CURVE_NAME_U_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
extern "C"
int msm_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *out, CURVE_NAME_U::affine_t points[],
CURVE_NAME_U::scalar_t scalars[], size_t count, size_t device_id = 0)
{
try
{
if (count>256){
large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
}
else{
short_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
}
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* out, CURVE_NAME_U::affine_t points[],
CURVE_NAME_U::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
{
try
{
batched_large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, batch_size, msm_size, out, false);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t device_id = 0)
{
try
{
large_msm(d_scalars, d_points, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
{
try
{
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,8 +0,0 @@
#include <cuda.h>
#include "curve_config.cuh"
#include "../../primitives/projective.cuh"
extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2, size_t device_id = 0)
{
return (*point1 == *point2);
}

View File

@@ -1,4 +0,0 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,66 +0,0 @@
#ifndef _CURVE_NAME_U_VEC_MULT
#define _CURVE_NAME_U_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
extern "C" int32_t vec_mod_mult_point_CURVE_NAME_L(CURVE_NAME_U::projective_t *inout,
CURVE_NAME_U::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<CURVE_NAME_U::projective_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_CURVE_NAME_L(CURVE_NAME_U::scalar_t *inout,
CURVE_NAME_U::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
vector_mod_mult<CURVE_NAME_U::scalar_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_CURVE_NAME_L(CURVE_NAME_U::scalar_t *matrix_flattened,
CURVE_NAME_U::scalar_t *input,
CURVE_NAME_U::scalar_t *output,
size_t n_elments,
size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<CURVE_NAME_U::scalar_t>(matrix_flattened, input, output, n_elments);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

View File

@@ -1,3 +0,0 @@
#include "bls12_381/supported_operations.cu"
#include "bls12_377/supported_operations.cu"
#include "bn254/supported_operations.cu"

View File

@@ -1,149 +0,0 @@
#pragma once
#include "field.cuh"
#define HOST_INLINE __host__ __forceinline__
#define DEVICE_INLINE __device__ __forceinline__
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
template <typename CONFIG> class ExtensionField {
private:
typedef typename Field<CONFIG>::Wide FWide;
struct ExtensionWide {
FWide real;
FWide imaginary;
ExtensionField HOST_DEVICE_INLINE get_lower() {
return ExtensionField { real.get_lower(), imaginary.get_lower() };
}
ExtensionField HOST_DEVICE_INLINE get_higher_with_slack() {
return ExtensionField { real.get_higher_with_slack(), imaginary.get_higher_with_slack() };
}
};
friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys) {
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
}
// an incomplete impl that assumes that xs > ys
friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys) {
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
}
public:
typedef Field<CONFIG> FF;
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
FF real;
FF imaginary;
static constexpr HOST_DEVICE_INLINE ExtensionField zero() {
return ExtensionField { FF::zero(), FF::zero() };
}
static constexpr HOST_DEVICE_INLINE ExtensionField one() {
return ExtensionField { FF::one(), FF::zero() };
}
static constexpr HOST_DEVICE_INLINE ExtensionField generator_x() {
return ExtensionField { FF { CONFIG::generator_x_re }, FF { CONFIG::generator_x_im } };
}
static constexpr HOST_DEVICE_INLINE ExtensionField generator_y() {
return ExtensionField { FF { CONFIG::generator_y_re }, FF { CONFIG::generator_y_im } };
}
static HOST_INLINE ExtensionField rand_host() {
return ExtensionField { FF::rand_host(), FF::rand_host() };
}
template <unsigned REDUCTION_SIZE = 1> static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionField &xs) {
return ExtensionField { FF::reduce<REDUCTION_SIZE>(&xs.real), FF::reduce<REDUCTION_SIZE>(&xs.imaginary) };
}
friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs) {
os << "{ Real: " << xs.real << " }; { Imaginary: " << xs.imaginary << " }";
return os;
}
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys) {
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
}
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys) {
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys) {
FWide real_prod = FF::mul_wide(xs.real * ys.real);
FWide imaginary_prod = FF::mul_wide(xs.imaginary * ys.imaginary);
FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
FWide i_sq_times_im = FF::mul_unsigned<CONFIG::i_squared>(imaginary_prod);
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
}
friend HOST_DEVICE_INLINE ExtensionField operator*(const ExtensionField& xs, const ExtensionField& ys) {
FF real_prod = xs.real * ys.real;
FF imaginary_prod = xs.imaginary * ys.imaginary;
FF prod_of_sums = (xs.real + xs.imaginary) * (ys.real + ys.imaginary);
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
return ExtensionField { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
}
friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys) {
return (xs.real == ys.real) && (xs.imaginary == ys.imaginary);
}
friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) {
return !(xs == ys);
}
template <const ExtensionField& mutliplier>
static constexpr HOST_DEVICE_INLINE ExtensionField mul_const(const ExtensionField &xs) {
constexpr uint32_t mul_real = mutliplier.real.limbs_storage.limbs[0];
constexpr uint32_t mul_imaginary = mutliplier.imaginary.limbs_storage.limbs[0];
FF real_prod = FF::template mul_unsigned<mul_real>(xs.real);
FF imaginary_prod = FF::template mul_unsigned<mul_imaginary>(xs.imaginary);
FF re_im = FF::template mul_unsigned<mul_real>(xs.imaginary);
FF im_re = FF::template mul_unsigned<mul_imaginary>(xs.real);
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
return ExtensionField { real_prod + i_sq_times_im, re_im + im_re };
}
template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField &xs) {
return { FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary) };
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs) {
// TODO: change to a more efficient squaring
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs) {
// TODO: change to a more efficient squaring
return xs * xs;
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs) {
return ExtensionField { FF::neg(xs.real), FF::neg(xs.imaginary) };
}
// inverse assumes that xs is nonzero
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs) {
ExtensionField xs_conjugate = { xs.real, FF::neg(xs.imaginary) };
// TODO: wide here
FF xs_norm_squared = FF::sqr(xs.real) + FF::sqr(xs.imaginary);
return xs_conjugate * ExtensionField { FF::inverse(xs_norm_squared), FF::zero() };
}
};

View File

@@ -23,19 +23,9 @@ template <class CONFIG> class Field {
return Field { CONFIG::one };
}
static constexpr HOST_DEVICE_INLINE Field generator_x() {
return Field { CONFIG::generator_x };
}
static constexpr HOST_DEVICE_INLINE Field generator_y() {
return Field { CONFIG::generator_y };
}
static constexpr HOST_INLINE Field omega(uint32_t log_size) {
// Quick fix to linking issue, permanent fix will follow
switch (log_size) {
case 0:
return Field { CONFIG::one };
case 1:
return Field { CONFIG::omega1 };
case 2:
@@ -101,15 +91,12 @@ template <class CONFIG> class Field {
case 32:
return Field { CONFIG::omega32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::omega[log_size-1] };
}
static constexpr HOST_INLINE Field omega_inv(uint32_t log_size) {
// Quick fix to linking issue, permanent fix will follow
switch (log_size) {
case 0:
return Field { CONFIG::one };
case 1:
return Field { CONFIG::omega_inv1 };
case 2:
@@ -175,7 +162,6 @@ template <class CONFIG> class Field {
case 32:
return Field { CONFIG::omega_inv32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::omega_inv[log_size-1] };
}
@@ -247,7 +233,6 @@ template <class CONFIG> class Field {
case 32:
return Field { CONFIG::inv32 };
}
return Field { CONFIG::one };
// return Field { CONFIG::inv[log_size-1] };
}
@@ -255,13 +240,14 @@ template <class CONFIG> class Field {
return Field { CONFIG::modulus };
}
// private:
private:
typedef storage<TLC> ff_storage;
typedef storage<2*TLC> ff_wide_storage;
static constexpr unsigned slack_bits = 32 * TLC - NBITS;
struct Wide {
struct wide {
ff_wide_storage limbs_storage;
Field HOST_DEVICE_INLINE get_lower() {
@@ -290,15 +276,15 @@ template <class CONFIG> class Field {
}
};
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys) {
Wide rs = {};
friend HOST_DEVICE_INLINE wide operator+(wide xs, const wide& ys) {
wide rs = {};
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return rs;
}
// an incomplete impl that assumes that xs > ys
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys) {
Wide rs = {};
friend HOST_DEVICE_INLINE wide operator-(wide xs, const wide& ys) {
wide rs = {};
sub_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return rs;
}
@@ -347,9 +333,7 @@ template <class CONFIG> class Field {
const uint32_t *y = ys.limbs;
uint32_t *r = rs.limbs;
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
#ifdef __CUDA_ARCH__
#pragma unroll
#endif
for (unsigned i = 1; i < (CARRY_OUT ? TLC : TLC - 1); i++)
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
if (!CARRY_OUT) {
@@ -365,9 +349,7 @@ template <class CONFIG> class Field {
const uint32_t *y = ys.limbs;
uint32_t *r = rs.limbs;
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
#ifdef __CUDA_ARCH__
#pragma unroll
#endif
for (unsigned i = 1; i < (CARRY_OUT ? 2 * TLC : 2 * TLC - 1); i++)
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
if (!CARRY_OUT) {
@@ -428,7 +410,7 @@ template <class CONFIG> class Field {
static DEVICE_INLINE void cmad_n(uint32_t *acc, const uint32_t *a, uint32_t bi, size_t n = TLC) {
acc[0] = ptx::mad_lo_cc(a[0], bi, acc[0]);
acc[1] = ptx::madc_hi_cc(a[0], bi, acc[1]);
#pragma unroll
#pragma unroll
for (size_t i = 2; i < n; i += 2) {
acc[i] = ptx::madc_lo_cc(a[i], bi, acc[i]);
acc[i + 1] = ptx::madc_hi_cc(a[i], bi, acc[i + 1]);
@@ -495,6 +477,8 @@ template <class CONFIG> class Field {
const uint32_t limb_lsb_idx = (digit_num*digit_width) / 32;
const uint32_t shift_bits = (digit_num*digit_width) % 32;
unsigned rv = limbs_storage.limbs[limb_lsb_idx] >> shift_bits;
// printf("get_scalar_func digit %u rv %u\n",digit_num,rv);
// if (shift_bits + digit_width > 32) {
if ((shift_bits + digit_width > 32) && (limb_lsb_idx+1 < TLC)) {
rv += limbs_storage.limbs[limb_lsb_idx + 1] << (32 - shift_bits);
}
@@ -505,7 +489,6 @@ template <class CONFIG> class Field {
static HOST_INLINE Field rand_host() {
std::random_device rd;
std::mt19937_64 generator(rd());
// std::mt19937_64 generator(0);
std::uniform_int_distribution<unsigned> distribution;
Field value{};
for (unsigned i = 0; i < TLC; i++)
@@ -531,13 +514,13 @@ template <class CONFIG> class Field {
return os;
}
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
friend HOST_DEVICE_INLINE Field operator+(Field xs, const Field& ys) {
Field rs = {};
add_limbs<false>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return reduce<1>(rs);
}
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
friend HOST_DEVICE_INLINE Field operator-(Field xs, const Field& ys) {
Field rs = {};
uint32_t carry = sub_limbs<true>(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
if (carry == 0)
@@ -548,22 +531,22 @@ template <class CONFIG> class Field {
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE Wide mul_wide(const Field& xs, const Field& ys) {
Wide rs = {};
static constexpr HOST_DEVICE_INLINE wide mul_wide(const Field& xs, const Field& ys) {
wide rs = {};
multiply_raw(xs.limbs_storage, ys.limbs_storage, rs.limbs_storage);
return rs;
}
friend HOST_DEVICE_INLINE Field operator*(const Field& xs, const Field& ys) {
Wide xy = mul_wide(xs, ys);
wide xy = mul_wide(xs, ys);
Field xy_hi = xy.get_higher_with_slack();
Wide l = {};
wide l = {};
multiply_raw(xy_hi.limbs_storage, get_m(), l.limbs_storage);
Field l_hi = l.get_higher_with_slack();
Wide lp = {};
wide lp = {};
multiply_raw(l_hi.limbs_storage, get_modulus(), lp.limbs_storage);
Wide r_wide = xy - lp;
Wide r_wide_reduced = {};
wide r_wide = xy - lp;
wide r_wide_reduced = {};
uint32_t reduced = sub_limbs<true>(r_wide.limbs_storage, modulus_wide(), r_wide_reduced.limbs_storage);
r_wide = reduced ? r_wide : r_wide_reduced;
Field r = r_wide.get_lower();
@@ -591,24 +574,22 @@ template <class CONFIG> class Field {
return !(xs == ys);
}
template <const Field& multiplier, class T> static constexpr HOST_DEVICE_INLINE T mul_const(const T &xs) {
return mul_unsigned<multiplier.limbs_storage.limbs[0], T>(xs);
}
template <uint32_t mutliplier, class T, unsigned REDUCTION_SIZE = 1>
static constexpr HOST_DEVICE_INLINE T mul_unsigned(const T &xs) {
T rs = {};
T temp = xs;
template <unsigned REDUCTION_SIZE = 1>
static constexpr HOST_DEVICE_INLINE Field mul(const unsigned scalar, const Field &xs) {
Field rs = {};
Field temp = xs;
unsigned l = scalar;
bool is_zero = true;
#ifdef __CUDA_ARCH__
#pragma unroll
#endif
for (unsigned i = 0; i < 32; i++) {
if (mutliplier & (1 << i)) {
if (l & 1) {
rs = is_zero ? temp : (rs + temp);
is_zero = false;
}
if (mutliplier & ((1 << (31 - i) - 1) << (i + 1)))
l >>= 1;
if (l == 0)
break;
temp = temp + temp;
}
@@ -616,7 +597,7 @@ template <class CONFIG> class Field {
}
template <unsigned MODULUS_MULTIPLE = 1>
static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const Field& xs) {
static constexpr HOST_DEVICE_INLINE wide sqr_wide(const Field& xs) {
// TODO: change to a more efficient squaring
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
}

View File

@@ -1,49 +1,8 @@
#include <cuda.h>
#include "../curves/bls12_381/curve_config.cuh"
#include "../curves/bls12_377/curve_config.cuh"
#include "../curves/bn254/curve_config.cuh"
#include "../curves/curve_config.cuh"
#include "projective.cuh"
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
extern "C" bool eq(projective_t *point1, projective_t *point2, size_t device_id = 0)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
}
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
}
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
}
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
}
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
{
return (*point1 == *point2) &&
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
}
#endif
return (*point1 == *point2);
}

View File

@@ -2,7 +2,7 @@
#include "affine.cuh"
template <typename FF, class SCALAR_FF, const FF& B_VALUE>
template <class FF, class SCALAR_FF, class GEN, unsigned B_VALUE>
class Projective {
friend Affine<FF>;
@@ -11,6 +11,10 @@ class Projective {
FF y;
FF z;
static HOST_DEVICE_INLINE Projective generator() {
return { FF { GEN::generator_x }, FF { GEN::generator_y }, FF::one()};
}
static HOST_DEVICE_INLINE Projective zero() {
return {FF::zero(), FF::one(), FF::zero()};
}
@@ -24,56 +28,50 @@ class Projective {
return {point.x, point.y, FF::one()};
}
static HOST_DEVICE_INLINE Projective generator() {
return {FF::generator_x(), FF::generator_y(), FF::one()};
}
static HOST_DEVICE_INLINE Projective neg(const Projective &point) {
return {point.x, FF::neg(point.y), point.z};
}
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2) {
const FF X1 = p1.x; // < 2
const FF Y1 = p1.y; // < 2
const FF Z1 = p1.z; // < 2
const FF X2 = p2.x; // < 2
const FF Y2 = p2.y; // < 2
const FF Z2 = p2.z; // < 2
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
const FF t07 = t05 - t06; // t05 ← t05 t06 < 2
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
const FF t12 = t10 - t11; // t12 ← t10 t11 < 2
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
const FF t17 = t15 - t16; // t17 ← t15 t16 < 2
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
const FF t20 = FF::template mul_unsigned<3>(
FF::template mul_const<B_VALUE>(t02)); // t20b3 · t02 < 2
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
const FF t22 = t01 - t20; // t22t01 t20 < 2
const FF t23 = FF::template mul_unsigned<3>(
FF::template mul_const<B_VALUE>(t17)); // t23b3 · t17 < 2
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
const FF t25 = t07 * t22; // t25 ← t07 · t22 < 2
const FF X3 = t25 - t24; // X3 ← t25 t24 < 2
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
const FF X1 = p1.x; // < 2
const FF Y1 = p1.y; // < 2
const FF Z1 = p1.z; // < 2
const FF X2 = p2.x; // < 2
const FF Y2 = p2.y; // < 2
const FF Z2 = p2.z; // < 2
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
const FF t07 = t05 - t06; // t05 ← t05 t06 < 2
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
const FF t12 = t10 - t11; // t12 ← t10 t11 < 2
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
const FF t17 = t15 - t16; // t17 ← t15 t16 < 2
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
const FF t20 = FF::mul(3 * B_VALUE, t02); // t20 ← b3 · t02 < 2
const FF t21 = t01 + t20; // t21t01 + t20 < 2
const FF t22 = t01 - t20; // t22 ← t01 t20 < 2
const FF t23 = FF::mul(3 * B_VALUE, t17); // t23b3 · t17 < 2
const FF t24 = t12 * t23; // t24 ← t12 · t23 < 2
const FF t25 = t07 * t22; // t25t07 · t22 < 2
const FF X3 = t25 - t24; // X3 ← t25 t24 < 2
const FF t27 = t23 * t19; // t27 ← t23 · t19 < 2
const FF t28 = t22 * t21; // t28 ← t22 · t21 < 2
const FF Y3 = t28 + t27; // Y3 ← t28 + t27 < 2
const FF t30 = t19 * t07; // t30 ← t19 · t07 < 2
const FF t31 = t21 * t12; // t31 ← t21 · t12 < 2
const FF Z3 = t31 + t30; // Z3 ← t31 + t30 < 2
return {X3, Y3, Z3};
}
@@ -87,7 +85,7 @@ class Projective {
}
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point) {
os << "Point { x: " << point.x << "; y: " << point.y << "; z: " << point.z << " }";
os << "x: " << point.x << "; y: " << point.y << "; z: " << point.z;
return os;
}
@@ -97,7 +95,7 @@ class Projective {
friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point) {
Projective res = zero();
#ifdef __CUDA_ARCH__
#ifdef CUDA_ARCH
#pragma unroll
#endif
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
@@ -122,7 +120,7 @@ class Projective {
static HOST_DEVICE_INLINE bool is_on_curve(const Projective &point) {
if (is_zero(point))
return true;
bool eq_holds = (FF::template mul_const<B_VALUE>(FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
bool eq_holds = (FF::mul(B_VALUE, FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
return point.z != FF::zero() && eq_holds;
}

View File

@@ -26,20 +26,14 @@ protected:
proj *points1{};
proj *points2{};
g2_proj *g2_points1{};
g2_proj *g2_points2{};
scalar_field *scalars1{};
scalar_field *scalars2{};
proj *zero_points{};
g2_proj *g2_zero_points{};
scalar_field *zero_scalars{};
scalar_field *one_scalars{};
affine *aff_points{};
g2_affine *g2_aff_points{};
proj *res_points1{};
proj *res_points2{};
g2_proj *g2_res_points1{};
g2_proj *g2_res_points2{};
scalar_field *res_scalars1{};
scalar_field *res_scalars2{};
@@ -47,20 +41,14 @@ protected:
assert(!cudaDeviceReset());
assert(!cudaMallocManaged(&points1, n * sizeof(proj)));
assert(!cudaMallocManaged(&points2, n * sizeof(proj)));
assert(!cudaMallocManaged(&g2_points1, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&g2_points2, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&scalars1, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&scalars2, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&zero_points, n * sizeof(proj)));
assert(!cudaMallocManaged(&g2_zero_points, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&zero_scalars, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&one_scalars, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&aff_points, n * sizeof(affine)));
assert(!cudaMallocManaged(&g2_aff_points, n * sizeof(g2_affine)));
assert(!cudaMallocManaged(&res_points1, n * sizeof(proj)));
assert(!cudaMallocManaged(&res_points2, n * sizeof(proj)));
assert(!cudaMallocManaged(&g2_res_points1, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&g2_res_points2, n * sizeof(g2_proj)));
assert(!cudaMallocManaged(&res_scalars1, n * sizeof(scalar_field)));
assert(!cudaMallocManaged(&res_scalars2, n * sizeof(scalar_field)));
}
@@ -68,20 +56,14 @@ protected:
~PrimitivesTest() override {
cudaFree(points1);
cudaFree(points2);
cudaFree(g2_points1);
cudaFree(g2_points2);
cudaFree(scalars1);
cudaFree(scalars2);
cudaFree(zero_points);
cudaFree(g2_zero_points);
cudaFree(zero_scalars);
cudaFree(one_scalars);
cudaFree(aff_points);
cudaFree(g2_aff_points);
cudaFree(res_points1);
cudaFree(res_points2);
cudaFree(g2_res_points1);
cudaFree(g2_res_points2);
cudaFree(res_scalars1);
cudaFree(res_scalars2);
cudaDeviceReset();
@@ -90,20 +72,14 @@ protected:
void SetUp() override {
ASSERT_EQ(device_populate_random<proj>(points1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<proj>(points2, n), cudaSuccess);
ASSERT_EQ(device_populate_random<g2_proj>(g2_points1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<g2_proj>(g2_points2, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_field>(scalars1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_field>(scalars2, n), cudaSuccess);
ASSERT_EQ(device_set<proj>(zero_points, proj::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<g2_proj>(g2_zero_points, g2_proj::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_field>(zero_scalars, scalar_field::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_field>(one_scalars, scalar_field::one(), n), cudaSuccess);
ASSERT_EQ(cudaMemset(aff_points, 0, n * sizeof(affine)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_aff_points, 0, n * sizeof(g2_affine)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_points1, 0, n * sizeof(proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_points2, 0, n * sizeof(proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_res_points1, 0, n * sizeof(g2_proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_res_points2, 0, n * sizeof(g2_proj)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars1, 0, n * sizeof(scalar_field)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars2, 0, n * sizeof(scalar_field)), cudaSuccess);
}
@@ -279,104 +255,6 @@ TEST_F(PrimitivesTest, ECMixedAdditionOfNegatedPointEqSubtraction) {
ASSERT_EQ(res_points1[i], points1[i] + res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECRandomPointsAreOnCurve) {
for (unsigned i = 0; i < 2; i++)
ASSERT_PRED1(g2_proj::is_on_curve, g2_points1[i]);
}
TEST_F(PrimitivesTest, G2ECPointAdditionSubtractionCancel) {
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_sub(g2_res_points1, g2_points2, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECPointZeroAddition) {
ASSERT_EQ(vec_add(g2_points1, g2_zero_points, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
}
TEST_F(PrimitivesTest, G2ECPointAdditionHostDeviceEq) {
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i] + g2_points2[i], g2_res_points1[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationHostDeviceEq) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(scalars1[i] * g2_points1[i], g2_res_points1[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByOne) {
ASSERT_EQ(vec_mul(one_scalars, points1, res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByMinusOne) {
ASSERT_EQ(vec_neg(one_scalars, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_neg(g2_points1, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByTwo) {
ASSERT_EQ(vec_add(one_scalars, one_scalars, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ((one_scalars[i] + one_scalars[i]) * g2_points1[i], g2_res_points1[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationInverseCancel) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(field_vec_inv(scalars1, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_res_points1, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverMultiplication) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_mul(scalars2, g2_res_points1, g2_res_points2, n), cudaSuccess);
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverAddition) {
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_mul(scalars2, g2_points1, g2_res_points2, n), cudaSuccess);
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(res_scalars1[i] * g2_points1[i], g2_res_points1[i] + g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECProjectiveToAffine) {
ASSERT_EQ(point_vec_to_affine(g2_points1, g2_aff_points, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_points1[i], g2_proj::from_affine(g2_aff_points[i]));
}
TEST_F(PrimitivesTest, G2ECMixedPointAddition) {
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
ASSERT_EQ(vec_add(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
}
TEST_F(PrimitivesTest, G2ECMixedAdditionOfNegatedPointEqSubtraction) {
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
ASSERT_EQ(vec_sub(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
ASSERT_EQ(vec_neg(g2_points2, g2_res_points2, n), cudaSuccess);
for (unsigned i = 0; i < n; i++)
ASSERT_EQ(g2_res_points1[i], g2_points1[i] + g2_res_points2[i]);
}
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);

View File

@@ -3,17 +3,12 @@
// TODO: change the curve depending on env variable
#include "../curves/bls12_381.cuh"
#include "projective.cuh"
#include "extension_field.cuh"
#include "field.cuh"
typedef Field<fp_config> scalar_field;
typedef Field<fq_config> base_field;
typedef Affine<base_field> affine;
static constexpr base_field b = base_field{ weierstrass_b };
typedef Projective<base_field, scalar_field, b> proj;
typedef ExtensionField<fq_config> base_extension_field;
typedef Affine<base_extension_field> g2_affine;
static constexpr base_extension_field b2 = base_extension_field{ base_field {b_re}, base_field {b_im}};
typedef Projective<base_extension_field, scalar_field, b2> g2_proj;
typedef Projective<base_field, scalar_field, group_generator, weierstrass_b> proj;
template <class T1, class T2>
@@ -98,16 +93,15 @@ int field_vec_sqr(const scalar_field *x, scalar_field *result, const unsigned co
return error ? error : cudaDeviceSynchronize();
}
template <class P, class A>
__global__ void to_affine_points_kernel(const P *x, A *result, const unsigned count) {
__global__ void to_affine_points_kernel(const proj *x, affine *result, const unsigned count) {
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
if (gid >= count)
return;
result[gid] = P::to_affine(x[gid]);
result[gid] = proj::to_affine(x[gid]);
}
template <class P, class A> int point_vec_to_affine(const P *x, A *result, const unsigned count) {
to_affine_points_kernel<P, A><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
int point_vec_to_affine(const proj *x, affine *result, const unsigned count) {
to_affine_points_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}

View File

@@ -1,334 +0,0 @@
use std::ffi::c_uint;
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger_limbs_q, BigInteger_limbs_p, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_CURVE_NAME_U<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_CURVE_NAME_U<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_CURVE_NAME_U<NUM_LIMBS> {
fn default() -> Self {
Field_CURVE_NAME_U::zero()
}
}
impl<const NUM_LIMBS: usize> Field_CURVE_NAME_U<NUM_LIMBS> {
pub fn zero() -> Self {
Field_CURVE_NAME_U {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_CURVE_NAME_U { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_CURVE_NAME_U: usize = limbs_q;
pub const SCALAR_LIMBS_CURVE_NAME_U: usize = limbs_p;
pub type BaseField_CURVE_NAME_U = Field_CURVE_NAME_U<BASE_LIMBS_CURVE_NAME_U>;
pub type ScalarField_CURVE_NAME_U = Field_CURVE_NAME_U<SCALAR_LIMBS_CURVE_NAME_U>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
//
impl BaseField_CURVE_NAME_U {
pub fn limbs(&self) -> [u32; BASE_LIMBS_CURVE_NAME_U] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger_limbs_q {
BigInteger_limbs_q::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger_limbs_q) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
//
impl ScalarField_CURVE_NAME_U {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_CURVE_NAME_U] {
self.s
}
pub fn to_ark(&self) -> BigInteger_limbs_p {
BigInteger_limbs_p::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger_limbs_p) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger_limbs_p {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger_limbs_p) -> ScalarField_CURVE_NAME_U {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
pub z: BaseField_CURVE_NAME_U,
}
impl Default for Point_CURVE_NAME_U {
fn default() -> Self {
Point_CURVE_NAME_U::zero()
}
}
impl Point_CURVE_NAME_U {
pub fn zero() -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::one(),
z: BaseField_CURVE_NAME_U::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_CURVE_NAME_U {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_CURVE_NAME_U {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_CURVE_NAME_U::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_CURVE_NAME_U) -> Point_CURVE_NAME_U {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_CURVE_NAME_U::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_CURVE_NAME_U::one(),
}
}
}
extern "C" {
fn eq_CURVE_NAME_L(point1: *const Point_CURVE_NAME_U, point2: *const Point_CURVE_NAME_U) -> c_uint;
}
impl PartialEq for Point_CURVE_NAME_U {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_CURVE_NAME_L(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
}
impl Default for PointAffineNoInfinity_CURVE_NAME_U {
fn default() -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::zero(),
}
}
}
impl PointAffineNoInfinity_CURVE_NAME_U {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_CURVE_NAME_U {
Point_CURVE_NAME_U {
x: self.x,
y: self.y,
z: BaseField_CURVE_NAME_U::one(),
}
}
pub fn to_ark(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(Fq_CURVE_NAME_U::new(self.x.to_ark()), Fq_CURVE_NAME_U::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(
Fq_CURVE_NAME_U::from_repr(self.x.to_ark()).unwrap(),
Fq_CURVE_NAME_U::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_CURVE_NAME_U) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(p.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(p.y.into_repr()),
}
}
}
impl Point_CURVE_NAME_U {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
z: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_CURVE_NAME_U {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_CURVE_NAME_U, "length must be 3 * {}", BASE_LIMBS_CURVE_NAME_U);
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: value[..BASE_LIMBS_CURVE_NAME_U].try_into().unwrap(),
},
y: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U..BASE_LIMBS_CURVE_NAME_U * 2].try_into().unwrap(),
},
z: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(ark_affine.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
PointAffineNoInfinity_CURVE_NAME_U {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_CURVE_NAME_U {
pub fn from_limbs(value: &[u32]) -> ScalarField_CURVE_NAME_U {
ScalarField_CURVE_NAME_U {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_CURVE_NAME_L::{Fr as Fr_CURVE_NAME_U};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::CURVE_NAME_L::{Point_CURVE_NAME_U, ScalarField_CURVE_NAME_U}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_CURVE_NAME_U::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_CURVE_NAME_U::zero();
let right = Point_CURVE_NAME_U::zero();
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,312 +0,0 @@
use std::ffi::c_uint;
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger_limbs_p, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_CURVE_NAME_U<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_CURVE_NAME_U<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_CURVE_NAME_U<NUM_LIMBS> {
fn default() -> Self {
Field_CURVE_NAME_U::zero()
}
}
impl<const NUM_LIMBS: usize> Field_CURVE_NAME_U<NUM_LIMBS> {
pub fn zero() -> Self {
Field_CURVE_NAME_U {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_CURVE_NAME_U { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_CURVE_NAME_U: usize = limbs_p;
pub const SCALAR_LIMBS_CURVE_NAME_U: usize = limbs_p;
pub type BaseField_CURVE_NAME_U = Field_CURVE_NAME_U<BASE_LIMBS_CURVE_NAME_U>;
pub type ScalarField_CURVE_NAME_U = Field_CURVE_NAME_U<SCALAR_LIMBS_CURVE_NAME_U>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl ScalarField_CURVE_NAME_U {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_CURVE_NAME_U] {
self.s
}
pub fn to_ark(&self) -> BigInteger_limbs_p {
BigInteger_limbs_p::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger_limbs_p) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger_limbs_p {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger_limbs_p) -> ScalarField_CURVE_NAME_U {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
pub z: BaseField_CURVE_NAME_U,
}
impl Default for Point_CURVE_NAME_U {
fn default() -> Self {
Point_CURVE_NAME_U::zero()
}
}
impl Point_CURVE_NAME_U {
pub fn zero() -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::one(),
z: BaseField_CURVE_NAME_U::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_CURVE_NAME_U {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_CURVE_NAME_U {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_CURVE_NAME_U::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_CURVE_NAME_U) -> Point_CURVE_NAME_U {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_CURVE_NAME_U::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_CURVE_NAME_U::one(),
}
}
}
extern "C" {
fn eq_CURVE_NAME_L(point1: *const Point_CURVE_NAME_U, point2: *const Point_CURVE_NAME_U) -> c_uint;
}
impl PartialEq for Point_CURVE_NAME_U {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_CURVE_NAME_L(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_CURVE_NAME_U {
pub x: BaseField_CURVE_NAME_U,
pub y: BaseField_CURVE_NAME_U,
}
impl Default for PointAffineNoInfinity_CURVE_NAME_U {
fn default() -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::zero(),
y: BaseField_CURVE_NAME_U::zero(),
}
}
}
impl PointAffineNoInfinity_CURVE_NAME_U {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_CURVE_NAME_U {
Point_CURVE_NAME_U {
x: self.x,
y: self.y,
z: BaseField_CURVE_NAME_U::one(),
}
}
pub fn to_ark(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(Fq_CURVE_NAME_U::new(self.x.to_ark()), Fq_CURVE_NAME_U::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_CURVE_NAME_U {
G1Affine_CURVE_NAME_U::new(
Fq_CURVE_NAME_U::from_repr(self.x.to_ark()).unwrap(),
Fq_CURVE_NAME_U::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_CURVE_NAME_U) -> Self {
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(p.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(p.y.into_repr()),
}
}
}
impl Point_CURVE_NAME_U {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(x),
},
y: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(y),
},
z: BaseField_CURVE_NAME_U {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_CURVE_NAME_U {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_CURVE_NAME_U, "length must be 3 * {}", BASE_LIMBS_CURVE_NAME_U);
Point_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U {
s: value[..BASE_LIMBS_CURVE_NAME_U].try_into().unwrap(),
},
y: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U..BASE_LIMBS_CURVE_NAME_U * 2].try_into().unwrap(),
},
z: BaseField_CURVE_NAME_U {
s: value[BASE_LIMBS_CURVE_NAME_U * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_CURVE_NAME_U {
x: BaseField_CURVE_NAME_U::from_ark(ark_affine.x.into_repr()),
y: BaseField_CURVE_NAME_U::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_CURVE_NAME_U {
PointAffineNoInfinity_CURVE_NAME_U {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_CURVE_NAME_U {
pub fn from_limbs(value: &[u32]) -> ScalarField_CURVE_NAME_U {
ScalarField_CURVE_NAME_U {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_CURVE_NAME_L::{Fr as Fr_CURVE_NAME_U};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::CURVE_NAME_L::{Point_CURVE_NAME_U, ScalarField_CURVE_NAME_U}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_CURVE_NAME_U::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_CURVE_NAME_U::zero();
let right = Point_CURVE_NAME_U::zero();
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(&[0; 8], &[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8]);
assert_eq!(left, right);
let right = Point_CURVE_NAME_U::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0],
&[0; 8],
&[1, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,332 +0,0 @@
use std::ffi::c_uint;
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_BLS12_377<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BLS12_377<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_BLS12_377<NUM_LIMBS> {
fn default() -> Self {
Field_BLS12_377::zero()
}
}
impl<const NUM_LIMBS: usize> Field_BLS12_377<NUM_LIMBS> {
pub fn zero() -> Self {
Field_BLS12_377 {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_BLS12_377 { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_BLS12_377: usize = 12;
pub const SCALAR_LIMBS_BLS12_377: usize = 8;
pub type BaseField_BLS12_377 = Field_BLS12_377<BASE_LIMBS_BLS12_377>;
pub type ScalarField_BLS12_377 = Field_BLS12_377<SCALAR_LIMBS_BLS12_377>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl BaseField_BLS12_377 {
pub fn limbs(&self) -> [u32; BASE_LIMBS_BLS12_377] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger384 {
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger384) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
impl ScalarField_BLS12_377 {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BLS12_377] {
self.s
}
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BLS12_377 {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_BLS12_377 {
pub x: BaseField_BLS12_377,
pub y: BaseField_BLS12_377,
pub z: BaseField_BLS12_377,
}
impl Default for Point_BLS12_377 {
fn default() -> Self {
Point_BLS12_377::zero()
}
}
impl Point_BLS12_377 {
pub fn zero() -> Self {
Point_BLS12_377 {
x: BaseField_BLS12_377::zero(),
y: BaseField_BLS12_377::one(),
z: BaseField_BLS12_377::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_BLS12_377 {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_BLS12_377 {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_BLS12_377::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_BLS12_377) -> Point_BLS12_377 {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_BLS12_377 {
x: BaseField_BLS12_377::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_BLS12_377::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_BLS12_377::one(),
}
}
}
extern "C" {
fn eq_bls12_377(point1: *const Point_BLS12_377, point2: *const Point_BLS12_377) -> c_uint;
}
impl PartialEq for Point_BLS12_377 {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_bls12_377(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_BLS12_377 {
pub x: BaseField_BLS12_377,
pub y: BaseField_BLS12_377,
}
impl Default for PointAffineNoInfinity_BLS12_377 {
fn default() -> Self {
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377::zero(),
y: BaseField_BLS12_377::zero(),
}
}
}
impl PointAffineNoInfinity_BLS12_377 {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_377 {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_BLS12_377 {
Point_BLS12_377 {
x: self.x,
y: self.y,
z: BaseField_BLS12_377::one(),
}
}
pub fn to_ark(&self) -> G1Affine_BLS12_377 {
G1Affine_BLS12_377::new(Fq_BLS12_377::new(self.x.to_ark()), Fq_BLS12_377::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_BLS12_377 {
G1Affine_BLS12_377::new(
Fq_BLS12_377::from_repr(self.x.to_ark()).unwrap(),
Fq_BLS12_377::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_BLS12_377) -> Self {
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377::from_ark(p.x.into_repr()),
y: BaseField_BLS12_377::from_ark(p.y.into_repr()),
}
}
}
impl Point_BLS12_377 {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_BLS12_377 {
x: BaseField_BLS12_377 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_377 {
s: get_fixed_limbs(y),
},
z: BaseField_BLS12_377 {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_377 {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_BLS12_377, "length must be 3 * {}", BASE_LIMBS_BLS12_377);
Point_BLS12_377 {
x: BaseField_BLS12_377 {
s: value[..BASE_LIMBS_BLS12_377].try_into().unwrap(),
},
y: BaseField_BLS12_377 {
s: value[BASE_LIMBS_BLS12_377..BASE_LIMBS_BLS12_377 * 2].try_into().unwrap(),
},
z: BaseField_BLS12_377 {
s: value[BASE_LIMBS_BLS12_377 * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_377 {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_BLS12_377 {
x: BaseField_BLS12_377::from_ark(ark_affine.x.into_repr()),
y: BaseField_BLS12_377::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_377 {
PointAffineNoInfinity_BLS12_377 {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_BLS12_377 {
pub fn from_limbs(value: &[u32]) -> ScalarField_BLS12_377 {
ScalarField_BLS12_377 {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_bls12_377::{Fr as Fr_BLS12_377};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_377::{Point_BLS12_377, ScalarField_BLS12_377}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_BLS12_377::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_BLS12_377::zero();
let right = Point_BLS12_377::zero();
assert_eq!(left, right);
let right = Point_BLS12_377::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point_BLS12_377::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,332 +0,0 @@
use std::ffi::c_uint;
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_BLS12_381<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BLS12_381<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_BLS12_381<NUM_LIMBS> {
fn default() -> Self {
Field_BLS12_381::zero()
}
}
impl<const NUM_LIMBS: usize> Field_BLS12_381<NUM_LIMBS> {
pub fn zero() -> Self {
Field_BLS12_381 {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_BLS12_381 { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_BLS12_381: usize = 12;
pub const SCALAR_LIMBS_BLS12_381: usize = 8;
pub type BaseField_BLS12_381 = Field_BLS12_381<BASE_LIMBS_BLS12_381>;
pub type ScalarField_BLS12_381 = Field_BLS12_381<SCALAR_LIMBS_BLS12_381>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl BaseField_BLS12_381 {
pub fn limbs(&self) -> [u32; BASE_LIMBS_BLS12_381] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger384 {
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger384) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
impl ScalarField_BLS12_381 {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BLS12_381] {
self.s
}
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BLS12_381 {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_BLS12_381 {
pub x: BaseField_BLS12_381,
pub y: BaseField_BLS12_381,
pub z: BaseField_BLS12_381,
}
impl Default for Point_BLS12_381 {
fn default() -> Self {
Point_BLS12_381::zero()
}
}
impl Point_BLS12_381 {
pub fn zero() -> Self {
Point_BLS12_381 {
x: BaseField_BLS12_381::zero(),
y: BaseField_BLS12_381::one(),
z: BaseField_BLS12_381::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_BLS12_381 {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_BLS12_381 {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_BLS12_381::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_BLS12_381) -> Point_BLS12_381 {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_BLS12_381 {
x: BaseField_BLS12_381::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_BLS12_381::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_BLS12_381::one(),
}
}
}
extern "C" {
fn eq_bls12_381(point1: *const Point_BLS12_381, point2: *const Point_BLS12_381) -> c_uint;
}
impl PartialEq for Point_BLS12_381 {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_bls12_381(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_BLS12_381 {
pub x: BaseField_BLS12_381,
pub y: BaseField_BLS12_381,
}
impl Default for PointAffineNoInfinity_BLS12_381 {
fn default() -> Self {
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381::zero(),
y: BaseField_BLS12_381::zero(),
}
}
}
impl PointAffineNoInfinity_BLS12_381 {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_381 {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_BLS12_381 {
Point_BLS12_381 {
x: self.x,
y: self.y,
z: BaseField_BLS12_381::one(),
}
}
pub fn to_ark(&self) -> G1Affine_BLS12_381 {
G1Affine_BLS12_381::new(Fq_BLS12_381::new(self.x.to_ark()), Fq_BLS12_381::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_BLS12_381 {
G1Affine_BLS12_381::new(
Fq_BLS12_381::from_repr(self.x.to_ark()).unwrap(),
Fq_BLS12_381::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_BLS12_381) -> Self {
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381::from_ark(p.x.into_repr()),
y: BaseField_BLS12_381::from_ark(p.y.into_repr()),
}
}
}
impl Point_BLS12_381 {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_BLS12_381 {
x: BaseField_BLS12_381 {
s: get_fixed_limbs(x),
},
y: BaseField_BLS12_381 {
s: get_fixed_limbs(y),
},
z: BaseField_BLS12_381 {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_381 {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_BLS12_381, "length must be 3 * {}", BASE_LIMBS_BLS12_381);
Point_BLS12_381 {
x: BaseField_BLS12_381 {
s: value[..BASE_LIMBS_BLS12_381].try_into().unwrap(),
},
y: BaseField_BLS12_381 {
s: value[BASE_LIMBS_BLS12_381..BASE_LIMBS_BLS12_381 * 2].try_into().unwrap(),
},
z: BaseField_BLS12_381 {
s: value[BASE_LIMBS_BLS12_381 * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_381 {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_BLS12_381 {
x: BaseField_BLS12_381::from_ark(ark_affine.x.into_repr()),
y: BaseField_BLS12_381::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_381 {
PointAffineNoInfinity_BLS12_381 {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_BLS12_381 {
pub fn from_limbs(value: &[u32]) -> ScalarField_BLS12_381 {
ScalarField_BLS12_381 {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_bls12_381::{Fr as Fr_BLS12_381};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_381::{Point_BLS12_381, ScalarField_BLS12_381}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_BLS12_381::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_BLS12_381::zero();
let right = Point_BLS12_381::zero();
assert_eq!(left, right);
let right = Point_BLS12_381::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point_BLS12_381::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,312 +0,0 @@
use std::ffi::c_uint;
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger256, PrimeField};
use std::mem::transmute;
use ark_ff::Field;
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
use rustacuda_core::DeviceCopy;
use rustacuda_derive::DeviceCopy;
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field_BN254<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
unsafe impl<const NUM_LIMBS: usize> DeviceCopy for Field_BN254<NUM_LIMBS> {}
impl<const NUM_LIMBS: usize> Default for Field_BN254<NUM_LIMBS> {
fn default() -> Self {
Field_BN254::zero()
}
}
impl<const NUM_LIMBS: usize> Field_BN254<NUM_LIMBS> {
pub fn zero() -> Self {
Field_BN254 {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field_BN254 { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS_BN254: usize = 8;
pub const SCALAR_LIMBS_BN254: usize = 8;
pub type BaseField_BN254 = Field_BN254<BASE_LIMBS_BN254>;
pub type ScalarField_BN254 = Field_BN254<SCALAR_LIMBS_BN254>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl ScalarField_BN254 {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS_BN254] {
self.s
}
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField_BN254 {
unsafe { transmute(v) }
}
}
#[derive(Debug, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct Point_BN254 {
pub x: BaseField_BN254,
pub y: BaseField_BN254,
pub z: BaseField_BN254,
}
impl Default for Point_BN254 {
fn default() -> Self {
Point_BN254::zero()
}
}
impl Point_BN254 {
pub fn zero() -> Self {
Point_BN254 {
x: BaseField_BN254::zero(),
y: BaseField_BN254::one(),
z: BaseField_BN254::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective_BN254 {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine_BN254 {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq_BN254::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq_BN254::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq_BN254::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine_BN254::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective_BN254) -> Point_BN254 {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point_BN254 {
x: BaseField_BN254::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField_BN254::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField_BN254::one(),
}
}
}
extern "C" {
fn eq_bn254(point1: *const Point_BN254, point2: *const Point_BN254) -> c_uint;
}
impl PartialEq for Point_BN254 {
fn eq(&self, other: &Self) -> bool {
unsafe { eq_bn254(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
#[repr(C)]
pub struct PointAffineNoInfinity_BN254 {
pub x: BaseField_BN254,
pub y: BaseField_BN254,
}
impl Default for PointAffineNoInfinity_BN254 {
fn default() -> Self {
PointAffineNoInfinity_BN254 {
x: BaseField_BN254::zero(),
y: BaseField_BN254::zero(),
}
}
}
impl PointAffineNoInfinity_BN254 {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity_BN254 {
x: BaseField_BN254 {
s: get_fixed_limbs(x),
},
y: BaseField_BN254 {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point_BN254 {
Point_BN254 {
x: self.x,
y: self.y,
z: BaseField_BN254::one(),
}
}
pub fn to_ark(&self) -> G1Affine_BN254 {
G1Affine_BN254::new(Fq_BN254::new(self.x.to_ark()), Fq_BN254::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine_BN254 {
G1Affine_BN254::new(
Fq_BN254::from_repr(self.x.to_ark()).unwrap(),
Fq_BN254::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine_BN254) -> Self {
PointAffineNoInfinity_BN254 {
x: BaseField_BN254::from_ark(p.x.into_repr()),
y: BaseField_BN254::from_ark(p.y.into_repr()),
}
}
}
impl Point_BN254 {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point_BN254 {
x: BaseField_BN254 {
s: get_fixed_limbs(x),
},
y: BaseField_BN254 {
s: get_fixed_limbs(y),
},
z: BaseField_BN254 {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point_BN254 {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS_BN254, "length must be 3 * {}", BASE_LIMBS_BN254);
Point_BN254 {
x: BaseField_BN254 {
s: value[..BASE_LIMBS_BN254].try_into().unwrap(),
},
y: BaseField_BN254 {
s: value[BASE_LIMBS_BN254..BASE_LIMBS_BN254 * 2].try_into().unwrap(),
},
z: BaseField_BN254 {
s: value[BASE_LIMBS_BN254 * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity_BN254 {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity_BN254 {
x: BaseField_BN254::from_ark(ark_affine.x.into_repr()),
y: BaseField_BN254::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BN254 {
PointAffineNoInfinity_BN254 {
x: self.x,
y: self.y,
}
}
}
impl ScalarField_BN254 {
pub fn from_limbs(value: &[u32]) -> ScalarField_BN254 {
ScalarField_BN254 {
s: get_fixed_limbs(value),
}
}
}
#[cfg(test)]
mod tests {
use ark_bn254::{Fr as Fr_BN254};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bn254::{Point_BN254, ScalarField_BN254}};
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField_BN254::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point_BN254::zero();
let right = Point_BN254::zero();
assert_eq!(left, right);
let right = Point_BN254::from_limbs(&[0; 8], &[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8]);
assert_eq!(left, right);
let right = Point_BN254::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0],
&[0; 8],
&[1, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,3 +0,0 @@
pub mod bls12_381;
pub mod bls12_377;
pub mod bn254;

404
src/field.rs Normal file
View File

@@ -0,0 +1,404 @@
use std::ffi::c_uint;
use ark_bls12_381::{Fq, G1Affine, G1Projective};
use ark_ec::AffineCurve;
use ark_ff::{BigInteger384, PrimeField};
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
#[derive(Debug, PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field<const NUM_LIMBS: usize> {
pub s: [u32; NUM_LIMBS],
}
impl<const NUM_LIMBS: usize> Default for Field<NUM_LIMBS> {
fn default() -> Self {
Field::zero()
}
}
impl<const NUM_LIMBS: usize> Field<NUM_LIMBS> {
pub fn zero() -> Self {
Field {
s: [0u32; NUM_LIMBS],
}
}
pub fn one() -> Self {
let mut s = [0u32; NUM_LIMBS];
s[0] = 1;
Field { s }
}
fn to_bytes_le(&self) -> Vec<u8> {
self.s
.iter()
.map(|s| s.to_le_bytes().to_vec())
.flatten()
.collect::<Vec<_>>()
}
}
pub const BASE_LIMBS: usize = 12;
pub const SCALAR_LIMBS: usize = 8;
#[cfg(feature = "bn254")]
pub const BASE_LIMBS: usize = 8;
#[cfg(feature = "bn254")]
pub const SCALAR_LIMBS: usize = 8;
pub type BaseField = Field<BASE_LIMBS>;
pub type ScalarField = Field<SCALAR_LIMBS>;
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
match val.len() {
n if n < NUM_LIMBS => {
let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
padded[..val.len()].copy_from_slice(&val);
padded
}
n if n == NUM_LIMBS => val.try_into().unwrap(),
_ => panic!("slice has too many elements"),
}
}
impl BaseField {
pub fn limbs(&self) -> [u32; BASE_LIMBS] {
self.s
}
pub fn from_limbs(value: &[u32]) -> Self {
Self {
s: get_fixed_limbs(value),
}
}
pub fn to_ark(&self) -> BigInteger384 {
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger384) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
}
impl ScalarField {
pub fn limbs(&self) -> [u32; SCALAR_LIMBS] {
self.s
}
}
#[derive(Debug, Clone, Copy)]
#[repr(C)]
pub struct Point {
pub x: BaseField,
pub y: BaseField,
pub z: BaseField,
}
impl Default for Point {
fn default() -> Self {
Point::zero()
}
}
impl Point {
pub fn zero() -> Self {
Point {
x: BaseField::zero(),
y: BaseField::one(),
z: BaseField::zero(),
}
}
pub fn infinity() -> Self {
Self::zero()
}
pub fn to_ark(&self) -> G1Projective {
//TODO: generic conversion
self.to_ark_affine().into_projective()
}
pub fn to_ark_affine(&self) -> G1Affine {
//TODO: generic conversion
use ark_ff::Field;
use std::ops::Mul;
let proj_x_field = Fq::from_le_bytes_mod_order(&self.x.to_bytes_le());
let proj_y_field = Fq::from_le_bytes_mod_order(&self.y.to_bytes_le());
let proj_z_field = Fq::from_le_bytes_mod_order(&self.z.to_bytes_le());
let inverse_z = proj_z_field.inverse().unwrap();
let aff_x = proj_x_field.mul(inverse_z);
let aff_y = proj_y_field.mul(inverse_z);
G1Affine::new(aff_x, aff_y, false)
}
pub fn from_ark(ark: G1Projective) -> Point {
use ark_ff::Field;
let z_inv = ark.z.inverse().unwrap();
let z_invsq = z_inv * z_inv;
let z_invq3 = z_invsq * z_inv;
Point {
x: BaseField::from_ark((ark.x * z_invsq).into_repr()),
y: BaseField::from_ark((ark.y * z_invq3).into_repr()),
z: BaseField::one(),
}
}
}
extern "C" {
fn eq(point1: *const Point, point2: *const Point) -> c_uint;
}
impl PartialEq for Point {
fn eq(&self, other: &Self) -> bool {
unsafe { eq(self, other) != 0 }
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
#[repr(C)]
pub struct PointAffineNoInfinity {
pub x: BaseField,
pub y: BaseField,
}
impl Default for PointAffineNoInfinity {
fn default() -> Self {
PointAffineNoInfinity {
x: BaseField::zero(),
y: BaseField::zero(),
}
}
}
impl PointAffineNoInfinity {
// TODO: generics
///From u32 limbs x,y
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
PointAffineNoInfinity {
x: BaseField {
s: get_fixed_limbs(x),
},
y: BaseField {
s: get_fixed_limbs(y),
},
}
}
pub fn limbs(&self) -> Vec<u32> {
[self.x.limbs(), self.y.limbs()].concat()
}
pub fn to_projective(&self) -> Point {
Point {
x: self.x,
y: self.y,
z: BaseField::one(),
}
}
pub fn to_ark(&self) -> G1Affine {
G1Affine::new(Fq::new(self.x.to_ark()), Fq::new(self.y.to_ark()), false)
}
pub fn to_ark_repr(&self) -> G1Affine {
G1Affine::new(
Fq::from_repr(self.x.to_ark()).unwrap(),
Fq::from_repr(self.y.to_ark()).unwrap(),
false,
)
}
pub fn from_ark(p: &G1Affine) -> Self {
PointAffineNoInfinity {
x: BaseField::from_ark(p.x.into_repr()),
y: BaseField::from_ark(p.y.into_repr()),
}
}
}
impl Point {
// TODO: generics
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
Point {
x: BaseField {
s: get_fixed_limbs(x),
},
y: BaseField {
s: get_fixed_limbs(y),
},
z: BaseField {
s: get_fixed_limbs(z),
},
}
}
pub fn from_xy_limbs(value: &[u32]) -> Point {
let l = value.len();
assert_eq!(l, 3 * BASE_LIMBS, "length must be 3 * {}", BASE_LIMBS);
Point {
x: BaseField {
s: value[..BASE_LIMBS].try_into().unwrap(),
},
y: BaseField {
s: value[BASE_LIMBS..BASE_LIMBS * 2].try_into().unwrap(),
},
z: BaseField {
s: value[BASE_LIMBS * 2..].try_into().unwrap(),
},
}
}
pub fn to_affine(&self) -> PointAffineNoInfinity {
let ark_affine = self.to_ark_affine();
PointAffineNoInfinity {
x: BaseField::from_ark(ark_affine.x.into_repr()),
y: BaseField::from_ark(ark_affine.y.into_repr()),
}
}
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity {
PointAffineNoInfinity {
x: self.x,
y: self.y,
}
}
}
impl ScalarField {
pub fn from_limbs(value: &[u32]) -> ScalarField {
ScalarField {
s: get_fixed_limbs(value),
}
}
}
#[derive(Debug, PartialEq, Clone, Copy)]
#[repr(C)]
pub struct Scalar {
pub s: ScalarField, //TODO: do we need this wrapping struct?
}
impl Scalar {
pub fn from_limbs_le(value: &[u32]) -> Scalar {
Scalar {
s: ScalarField::from_limbs(value),
}
}
pub fn from_limbs_be(value: &[u32]) -> Scalar {
let mut value = value.to_vec();
value.reverse();
Self::from_limbs_le(&value)
}
pub fn limbs(&self) -> Vec<u32> {
self.s.limbs().to_vec()
}
pub fn one() -> Self {
Scalar {
s: ScalarField::one(),
}
}
pub fn zero() -> Self {
Scalar {
s: ScalarField::zero(),
}
}
}
impl Default for Scalar {
fn default() -> Self {
Scalar {
s: ScalarField::zero(),
}
}
}
#[cfg(test)]
mod tests {
use std::mem::transmute;
use ark_bls12_381::Fr;
use ark_ff::{BigInteger256, PrimeField};
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, field::Point};
use super::{Scalar, ScalarField};
impl ScalarField {
pub fn to_ark(&self) -> BigInteger256 {
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
}
pub fn from_ark(ark: BigInteger256) -> Self {
Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
}
pub fn to_ark_transmute(&self) -> BigInteger256 {
unsafe { transmute(*self) }
}
pub fn from_ark_transmute(v: BigInteger256) -> ScalarField {
unsafe { transmute(v) }
}
}
impl Scalar {
pub fn to_ark_mod_p(&self) -> Fr {
Fr::new(self.s.to_ark())
}
pub fn to_ark_repr(&self) -> Fr {
Fr::from_repr(self.s.to_ark()).unwrap()
}
pub fn to_ark_transmute(&self) -> Fr {
unsafe { std::mem::transmute(*self) }
}
pub fn from_ark_transmute(v: &Fr) -> Scalar {
unsafe { std::mem::transmute_copy(v) }
}
pub fn from_ark(v: &Fr) -> Scalar {
Scalar {
s: ScalarField::from_ark(v.into_repr()),
}
}
}
#[test]
fn test_ark_scalar_convert() {
let limbs = [0x0fffffff, 1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7];
let scalar = ScalarField::from_limbs(&limbs);
assert_eq!(
scalar.to_ark(),
scalar.to_ark_transmute(),
"{:08X?} {:08X?}",
scalar.to_ark(),
scalar.to_ark_transmute()
)
}
#[test]
#[allow(non_snake_case)]
fn test_point_equality() {
let left = Point::zero();
let right = Point::zero();
assert_eq!(left, right);
let right = Point::from_limbs(&[0; 12], &[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], &[0; 12]);
assert_eq!(left, right);
let right = Point::from_limbs(
&[2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
&[0; 12],
&[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
);
assert!(left != right);
}
}

View File

@@ -1,5 +1,686 @@
pub mod test_bls12_377;
pub mod test_bls12_381;
pub mod test_bn254;
use std::ffi::{c_int, c_uint};
use field::*;
pub mod field;
pub mod utils;
pub mod curves;
extern "C" {
fn msm_cuda(
out: *mut Point,
points: *const PointAffineNoInfinity,
scalars: *const ScalarField,
count: usize,
device_id: usize,
) -> c_uint;
fn msm_batch_cuda(
out: *mut Point,
points: *const PointAffineNoInfinity,
scalars: *const ScalarField,
batch_size: usize,
msm_size: usize,
device_id: usize,
) -> c_uint;
fn ntt_cuda(inout: *mut ScalarField, n: usize, inverse: bool, device_id: usize) -> c_int;
fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
fn ntt_end2end_batch(
inout: *mut ScalarField,
arr_size: usize,
n: usize,
inverse: bool,
) -> c_int;
fn ecntt_end2end_batch(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
fn vec_mod_mult_point(
inout: *mut Point,
scalars: *const ScalarField,
n_elements: usize,
device_id: usize,
) -> c_int;
fn vec_mod_mult_scalar(
inout: *mut ScalarField,
scalars: *const ScalarField,
n_elements: usize,
device_id: usize,
) -> c_int;
fn batch_vector_mult_proj_cuda(
inout: *mut Point,
scalars: *const ScalarField,
n_scalars: usize,
batch_size: usize,
device_id: usize,
) -> c_int;
fn batch_vector_mult_scalar_cuda(
inout: *mut ScalarField,
scalars: *const ScalarField,
n_mult: usize,
batch_size: usize,
device_id: usize,
) -> c_int;
fn matrix_vec_mod_mult(
matrix_flattened: *const ScalarField,
input: *const ScalarField,
output: *mut ScalarField,
n_elements: usize,
device_id: usize,
) -> c_int;
}
pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
let count = points.len();
if count != scalars.len() {
todo!("variable length")
}
let mut ret = Point::zero();
unsafe {
msm_cuda(
&mut ret as *mut _ as *mut Point,
points as *const _ as *const PointAffineNoInfinity,
scalars as *const _ as *const ScalarField,
scalars.len(),
device_id,
)
};
ret
}
pub fn msm_batch(
points: &[PointAffineNoInfinity],
scalars: &[Scalar],
batch_size: usize,
device_id: usize,
) -> Vec<Point> {
let count = points.len();
if count != scalars.len() {
todo!("variable length")
}
let mut ret = vec![Point::zero(); batch_size];
unsafe {
msm_batch_cuda(
&mut ret[0] as *mut _ as *mut Point,
points as *const _ as *const PointAffineNoInfinity,
scalars as *const _ as *const ScalarField,
batch_size,
count / batch_size,
device_id,
)
};
ret
}
/// Compute an in-place ECNTT on the input data.
fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
let ret_code = unsafe {
ntt_cuda(
values as *mut _ as *mut ScalarField,
values.len(),
inverse,
device_id,
)
};
ret_code
}
pub fn ntt(values: &mut [Scalar], device_id: usize) {
ntt_internal(values, device_id, false);
}
pub fn intt(values: &mut [Scalar], device_id: usize) {
ntt_internal(values, device_id, true);
}
/// Compute an in-place ECNTT on the input data.
fn ntt_internal_batch(
values: &mut [Scalar],
device_id: usize,
batch_size: usize,
inverse: bool,
) -> i32 {
unsafe {
ntt_end2end_batch(
values as *mut _ as *mut ScalarField,
values.len(),
batch_size,
inverse,
)
}
}
pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
ntt_internal_batch(values, 0, batch_size, false);
}
pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
ntt_internal_batch(values, 0, batch_size, true);
}
/// Compute an in-place ECNTT on the input data.
fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
unsafe {
ecntt_cuda(
values as *mut _ as *mut Point,
values.len(),
inverse,
device_id,
)
}
}
pub fn ecntt(values: &mut [Point], device_id: usize) {
ecntt_internal(values, false, device_id);
}
/// Compute an in-place iECNTT on the input data.
pub fn iecntt(values: &mut [Point], device_id: usize) {
ecntt_internal(values, true, device_id);
}
/// Compute an in-place ECNTT on the input data.
fn ecntt_internal_batch(
values: &mut [Point],
device_id: usize,
batch_size: usize,
inverse: bool,
) -> i32 {
unsafe {
ecntt_end2end_batch(
values as *mut _ as *mut Point,
values.len(),
batch_size,
inverse,
)
}
}
pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
ecntt_internal_batch(values, 0, batch_size, false);
}
/// Compute an in-place iECNTT on the input data.
pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
ecntt_internal_batch(values, 0, batch_size, true);
}
pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
assert_eq!(a.len(), b.len());
unsafe {
vec_mod_mult_point(
a as *mut _ as *mut Point,
b as *const _ as *const ScalarField,
a.len(),
device_id,
);
}
}
pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
assert_eq!(a.len(), b.len());
unsafe {
vec_mod_mult_scalar(
a as *mut _ as *mut ScalarField,
b as *const _ as *const ScalarField,
a.len(),
device_id,
);
}
}
pub fn mult_p_batch_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
assert_eq!(a.len() % b.len(), 0);
unsafe {
batch_vector_mult_proj_cuda(
a as *mut _ as *mut Point,
b as *const _ as *const ScalarField,
b.len(),
a.len() / b.len(),
device_id,
);
}
}
pub fn mult_sc_batch_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
assert_eq!(a.len() % b.len(), 0);
unsafe {
batch_vector_mult_scalar_cuda(
a as *mut _ as *mut ScalarField,
b as *const _ as *const ScalarField,
b.len(),
a.len() / b.len(),
device_id,
);
}
}
// Multiply a matrix by a scalar:
// `a` - flattenned matrix;
// `b` - vector to multiply `a` by;
pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
let mut c = Vec::with_capacity(b.len());
for i in 0..b.len() {
c.push(Scalar::zero());
}
unsafe {
matrix_vec_mod_mult(
a as *const _ as *const ScalarField,
b as *const _ as *const ScalarField,
c.as_mut_slice() as *mut _ as *mut ScalarField,
b.len(),
device_id,
);
}
c
}
#[cfg(test)]
mod tests {
use std::ops::Add;
use ark_bls12_381::{Fr, G1Affine, G1Projective};
use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
use ark_ff::{FftField, Field, Zero};
use ark_std::UniformRand;
use rand::{rngs::StdRng, RngCore, SeedableRng};
use crate::{field::*, *};
fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
let mut rng = ark_std::rand::thread_rng();
let mut points_ga: Vec<G1Projective> = Vec::new();
for _ in 0..nof_elements {
let aff = G1Projective::rand(&mut rng);
points_ga.push(aff);
}
points_ga
}
fn ecntt_arc_naive(
points: &Vec<G1Projective>,
size: usize,
inverse: bool,
) -> Vec<G1Projective> {
let mut result: Vec<G1Projective> = Vec::new();
for _ in 0..size {
result.push(G1Projective::zero());
}
let rou: Fr;
if !inverse {
rou = Fr::get_root_of_unity(size).unwrap();
} else {
rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
}
for k in 0..size {
for l in 0..size {
let pow: [u64; 1] = [(l * k).try_into().unwrap()];
let mul_rou = Fr::pow(&rou, &pow);
result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
}
}
if inverse {
let size2 = size as u64;
for k in 0..size {
let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
result[k] = result[k].into_affine().mul(multfactor);
}
}
return result;
}
fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
let mut eq = true;
for i in 0..points.len() {
if points2[i].ne(&points[i]) {
eq = false;
break;
}
}
return eq;
}
fn test_naive_ark_ecntt(size: usize) {
let points = random_points_ark_proj(size);
let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
assert!(!check_eq(&result2, &result1));
assert!(check_eq(&result2, &points));
}
fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
let rng: Box<dyn RngCore> = match seed {
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
None => Box::new(rand::thread_rng()),
};
rng
}
fn generate_random_points(
count: usize,
mut rng: Box<dyn RngCore>,
) -> Vec<PointAffineNoInfinity> {
(0..count)
.map(|_| Point::from_ark(G1Projective::rand(&mut rng)).to_xy_strip_z())
.collect()
}
fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
(0..count)
.map(|_| Point::from_ark(G1Projective::rand(&mut rng)))
.collect()
}
fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
(0..count)
.map(|_| Scalar::from_ark(&Fr::rand(&mut rng)))
.collect()
}
#[test]
fn test_msm() {
let test_sizes = [7, 8, 9, 12];
for pow2 in test_sizes {
let count = 1 << pow2;
let seed = None; //set Some to provide seed
let points = generate_random_points(count, get_rng(seed));
let scalars = generate_random_scalars(count, get_rng(seed));
let msm_result = msm(&points, &scalars, 0);
let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark_mod_p().0).collect();
let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
assert_eq!(msm_result.to_ark(), msm_result_ark);
assert_eq!(
msm_result.to_ark_affine(),
Point::from_ark(msm_result_ark).to_ark_affine()
);
}
}
#[test]
fn test_batch_msm() {
for batch_pow2 in [5, 6, 8, 9] {
for pow2 in [9, 12] {
let msm_size = 1 << pow2;
let batch_size = 1 << batch_pow2;
let seed = None; //set Some to provide seed
let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
let scalars_r_ark: Vec<_> =
scalars_batch.iter().map(|x| x.to_ark_mod_p().0).collect();
let expected: Vec<_> = point_r_ark
.chunks(msm_size)
.zip(scalars_r_ark.chunks(msm_size))
.map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
.collect();
let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
assert_eq!(result, expected);
}
}
}
#[test]
fn test_ntt() {
//NTT
let seed = None; //some value to fix the rng
let test_size = 1 << 5;
let scalars = generate_random_scalars(test_size, get_rng(seed));
let mut ntt_result = scalars.clone();
ntt(&mut ntt_result, 0);
assert_ne!(ntt_result, scalars);
let mut intt_result = ntt_result.clone();
intt(&mut intt_result, 0);
assert_eq!(intt_result, scalars);
//ECNTT
let points_proj = generate_random_points_proj(test_size, get_rng(seed));
test_naive_ark_ecntt(test_size);
assert!(points_proj[0].to_ark().into_affine().is_on_curve());
//naive ark
let points_proj_ark = points_proj
.iter()
.map(|p| p.to_ark())
.collect::<Vec<G1Projective>>();
let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
assert_eq!(points_proj_ark, iecntt_result_naive);
//ingo gpu
let mut ecntt_result = points_proj.to_vec();
ecntt(&mut ecntt_result, 0);
assert_ne!(ecntt_result, points_proj);
let mut iecntt_result = ecntt_result.clone();
iecntt(&mut iecntt_result, 0);
assert_eq!(
iecntt_result_naive,
points_proj
.iter()
.map(|p| p.to_ark_affine())
.collect::<Vec<G1Affine>>()
);
assert_eq!(
iecntt_result
.iter()
.map(|p| p.to_ark_affine())
.collect::<Vec<G1Affine>>(),
points_proj
.iter()
.map(|p| p.to_ark_affine())
.collect::<Vec<G1Affine>>()
);
}
#[test]
fn test_ntt_batch() {
//NTT
let seed = None; //some value to fix the rng
let test_size = 1 << 9;
let batches = 4;
let scalars_batch: Vec<Scalar> =
generate_random_scalars(test_size * batches, get_rng(seed));
let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
for i in 0..batches {
scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
}
let mut ntt_result = scalars_batch.clone();
// do batch ntt
ntt_batch(&mut ntt_result, test_size, 0);
let mut ntt_result_vec_of_vec = Vec::new();
// do ntt for every chunk
for i in 0..batches {
ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
ntt(&mut ntt_result_vec_of_vec[i], 0);
}
// check that the ntt of each vec of scalars is equal to the intt of the specific batch
for i in 0..batches {
assert_eq!(
ntt_result_vec_of_vec[i],
ntt_result[i * test_size..(i + 1) * test_size]
);
}
// check that ntt output is different from input
assert_ne!(ntt_result, scalars_batch);
let mut intt_result = ntt_result.clone();
// do batch intt
intt_batch(&mut intt_result, test_size, 0);
let mut intt_result_vec_of_vec = Vec::new();
// do intt for every chunk
for i in 0..batches {
intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
intt(&mut intt_result_vec_of_vec[i], 0);
}
// check that the intt of each vec of scalars is equal to the intt of the specific batch
for i in 0..batches {
assert_eq!(
intt_result_vec_of_vec[i],
intt_result[i * test_size..(i + 1) * test_size]
);
}
assert_eq!(intt_result, scalars_batch);
// //ECNTT
let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
for i in 0..batches {
points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
}
let mut ntt_result_points = points_proj.clone();
// do batch ecintt
ecntt_batch(&mut ntt_result_points, test_size, 0);
let mut ntt_result_points_vec_of_vec = Vec::new();
for i in 0..batches {
ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
}
for i in 0..batches {
assert_eq!(
ntt_result_points_vec_of_vec[i],
ntt_result_points[i * test_size..(i + 1) * test_size]
);
}
assert_ne!(ntt_result_points, points_proj);
let mut intt_result_points = ntt_result_points.clone();
// do batch ecintt
iecntt_batch(&mut intt_result_points, test_size, 0);
let mut intt_result_points_vec_of_vec = Vec::new();
// do ecintt for every chunk
for i in 0..batches {
intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
iecntt(&mut intt_result_points_vec_of_vec[i], 0);
}
// check that the ecintt of each vec of scalars is equal to the intt of the specific batch
for i in 0..batches {
assert_eq!(
intt_result_points_vec_of_vec[i],
intt_result_points[i * test_size..(i + 1) * test_size]
);
}
assert_eq!(intt_result_points, points_proj);
}
// testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
#[test]
fn test_matrix_multiplication() {
let seed = None; // some value to fix the rng
let test_size = 1 << 5;
let rou = Fr::get_root_of_unity(test_size).unwrap();
let matrix_flattened: Vec<Scalar> = (0..test_size).map(
|row_num| { (0..test_size).map(
|col_num| {
let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
Scalar::from_ark(&Fr::pow(&rou, &pow))
}).collect::<Vec<Scalar>>()
}).flatten().collect::<Vec<_>>();
let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
let mut ntt_result = vector.clone();
ntt(&mut ntt_result, 0);
// we don't use the same roots of unity as arkworks, so the results are permutations
// of one another and the only guaranteed fixed scalars are the following ones:
assert_eq!(result[0], ntt_result[0]);
assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
}
#[test]
#[allow(non_snake_case)]
fn test_vec_scalar_mul() {
let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
mult_sc_vec(&mut intoo, &expected, 0);
assert_eq!(intoo, expected);
}
#[test]
#[allow(non_snake_case)]
fn test_vec_point_mul() {
let dummy_one = Point {
x: BaseField::one(),
y: BaseField::zero(),
z: BaseField::one(),
};
let mut inout = [dummy_one, dummy_one, Point::zero()];
let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
let expected = [
Point::zero(),
Point {
x: BaseField::zero(),
y: BaseField::one(),
z: BaseField::zero(),
},
Point {
x: BaseField::zero(),
y: BaseField::one(),
z: BaseField::zero(),
},
];
multp_vec(&mut inout, &scalars, 0);
assert_eq!(inout, expected);
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -33,7 +33,7 @@ pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
mod tests {
use ark_ff::BigInteger256;
use crate::curves::bls12_381::{ScalarField_BLS12_381 as ScalarField};
use crate::field::ScalarField;
use super::*;
@@ -46,7 +46,7 @@ mod tests {
))
.limbs();
assert_eq!(arr_u32.to_vec(), s);
assert_eq!(arr_u32, s);
let arr_u64_expected = [
0x0FFFFFFF00000001,
@@ -65,12 +65,7 @@ mod tests {
#[test]
fn test_u64_vec_to_u32_vec() {
let arr_u64 = [
0x2FFFFFFF00000001,
0x4FFFFFFF00000003,
0x6FFFFFFF00000005,
0x8FFFFFFF00000007,
];
let arr_u64 = [0x2FFFFFFF00000001, 0x4FFFFFFF00000003, 0x6FFFFFFF00000005, 0x8FFFFFFF00000007];
let arr_u32_expected = [1, 0x2fffffff, 3, 0x4fffffff, 5, 0x6fffffff, 7, 0x8fffffff];