Support bw6-761 (#188)

Resolves #191 and #113

---------

Co-authored-by: DmytroTym <dmytrotym1@gmail.com>
Co-authored-by: ImmanuelSegol <3ditds@gmail.com>
This commit is contained in:
liuxiao
2023-10-21 23:49:06 +08:00
committed by GitHub
parent 09d8c5da6a
commit fd62fe5ae8
64 changed files with 4253 additions and 686 deletions

View File

@@ -36,6 +36,7 @@ ICICLE is a CUDA implementation of general functions widely used in ZKP. ICICLE
- [BLS12-381]
- [BLS12-377]
- [BN254]
- [BW6-671]
## Build and usage
@@ -117,20 +118,21 @@ Create a JSON file with the curve parameters. The curve is defined by the follow
- ``curve_name`` - e.g. ``bls12_381``.
- ``modulus_p`` - scalar field modulus (in decimal).
- ``bit_count_p`` - number of bits needed to represent `` modulus_p`` .
- ``limb_p`` - number of bytes needed to represent `` modulus_p`` (rounded).
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
- ``limb_p`` - number of (32-bit) limbs needed to represent `` modulus_p`` (rounded up).
- ``ntt_size`` - log of the maximal size subgroup of the scalar field.
- ``modulus_q`` - base field modulus (in decimal).
- ``bit_count_q`` - number of bits needed to represent `` modulus_q`` .
- ``limb_q`` number of bytes needed to represent `` modulus_p`` (rounded).
- ``weierstrass_b`` - Weierstrauss constant of the curve.
- ``weierstrass_b_g2_re`` - Weierstrauss real constant of the g2 curve.
- ``weierstrass_b_g2_im`` - Weierstrauss imaginary constant of the g2 curve.
- ``gen_x`` - x-value of a generator element for the curve.
- ``gen_y`` - y-value of a generator element for the curve.
- ``gen_x_re`` - real x-value of a generator element for the g2 curve.
- ``gen_x_im`` - imaginary x-value of a generator element for the g2 curve.
- ``gen_y_re`` - real y-value of a generator element for the g2 curve.
- ``gen_y_im`` - imaginary y-value of a generator element for the g2 curve.
- ``limb_q`` - number of (32-bit) limbs needed to represent `` modulus_q`` (rounded up).
- ``weierstrass_b`` - `b` of the curve in Weierstrauss form.
- ``weierstrass_b_g2_re`` - real part of the `b` value in of the g2 curve in Weierstrass form.
- ``weierstrass_b_g2_im`` - imaginary part of the `b` value in of the g2 curve in Weierstrass form.
- ``gen_x`` - `x` coordinate of a generator element for the curve.
- ``gen_y`` - `y` coordinate of a generator element for the curve.
- ``gen_x_re`` - real part of the `x` coordinate of generator element for the g2 curve.
- ``gen_x_im`` - imaginary part of the `x` coordinate of generator element for the g2 curve.
- ``gen_y_re`` - real part of the `y` coordinate of generator element for the g2 curve.
- ``gen_y_im`` - imaginary part of the `y` coordinate of generator element for the g2 curve.
- ``nonresidue`` - nonresidue, or `i^2`, or `u^2` - square of the element that generates quadratic extension field of the base field.
Here's an example for BLS12-381.
```
@@ -144,14 +146,15 @@ Here's an example for BLS12-381.
"bit_count_q" : 381,
"limb_q" : 12,
"weierstrass_b" : 4,
"weierstrass_b_g2_re":4,
"weierstrass_b_g2_im":4,
"weierstrass_b_g2_re" : 4,
"weierstrass_b_g2_im" : 4,
"gen_x" : 3685416753713387016781088315183077757961620795782546409894578378688607592378376318836054947676345821548104185464507,
"gen_y" : 1339506544944476473020471379941921221584933875938349620426543736416511423956333506472724655353366534992391756441569,
"gen_x_re" : 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
"gen_x_im" : 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
"gen_y_re" : 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
"gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582
"gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582,
"nonresidue" : -1
}
```
@@ -214,6 +217,7 @@ See [LICENSE-MIT][LMIT] for details.
[BLS12-381]: ./icicle/curves/bls12_381/supported_operations.cu
[BLS12-377]: ./icicle/curves/bls12_377/supported_operations.cu
[BN254]: ./icicle/curves/bn254/supported_operations.cu
[BW6-671]: ./icicle/curves/bw6_671/supported_operations.cu
[NVCC]: https://docs.nvidia.com/cuda/#installation-guides
[CRV_TEMPLATE]: ./icicle/curves/curve_template/
[CRV_CONFIG]: ./icicle/curves/index.cu

View File

@@ -3,7 +3,7 @@
"modulus_p" : 8444461749428370424248824938781546531375899335154063827935233455917409239041,
"bit_count_p" : 253,
"limb_p" : 8,
"ntt_size" : 32,
"ntt_size" : 47,
"modulus_q" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
"bit_count_q" : 377,
"limb_q" : 12,
@@ -16,5 +16,6 @@
"g2_gen_x_re" : 233578398248691099356572568220835526895379068987715365179118596935057653620464273615301663571204657964920925606294,
"g2_gen_x_im" : 140913150380207355837477652521042157274541796891053068589147167627541651775299824604154852141315666357241556069118,
"g2_gen_y_re" : 63160294768292073209381361943935198908131692476676907196754037919244929611450776219210369229519898517858833747423,
"g2_gen_y_im" : 149157405641012693445398062341192467754805999074082136895788947234480009303640899064710353187729182149407503257491
"g2_gen_y_im" : 149157405641012693445398062341192467754805999074082136895788947234480009303640899064710353187729182149407503257491,
"nonresidue" : -5
}

View File

@@ -16,5 +16,6 @@
"g2_gen_x_re" : 352701069587466618187139116011060144890029952792775240219908644239793785735715026873347600343865175952761926303160,
"g2_gen_x_im" : 3059144344244213709971259814753781636986470325476647558659373206291635324768958432433509563104347017837885763365758,
"g2_gen_y_re" : 1985150602287291935568054521177171638300868978215655730859378665066344726373823718423869104263333984641494340347905,
"g2_gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582
"g2_gen_y_im" : 927553665492332455747201965776037880757740193453592970025027978793976877002675564980949289727957565575433344219582,
"nonresidue" : -1
}

View File

@@ -16,5 +16,6 @@
"g2_gen_x_re" : 10857046999023057135944570762232829481370756359578518086990519993285655852781,
"g2_gen_x_im" : 11559732032986387107991004021392285783925812861821192530917403151452391805634,
"g2_gen_y_re" : 8495653923123431417604973247489272438418190587263600148770280649306958101930,
"g2_gen_y_im" : 4082367875863433681332203403145435568316851327593401208105741076214120093531
"g2_gen_y_im" : 4082367875863433681332203403145435568316851327593401208105741076214120093531,
"nonresidue" : -1
}

View File

@@ -0,0 +1,21 @@
{
"curve_name" : "bw6_761",
"modulus_p" : 258664426012969094010652733694893533536393512754914660539884262666720468348340822774968888139573360124440321458177,
"bit_count_p" : 377,
"limb_p" : 12,
"ntt_size" : 46,
"modulus_q" : 6891450384315732539396789682275657542479668912536150109513790160209623422243491736087683183289411687640864567753786613451161759120554247759349511699125301598951605099378508850372543631423596795951899700429969112842764913119068299,
"bit_count_q" : 761,
"limb_q" : 24,
"root_of_unity" : 32863578547254505029601261939868325669770508939375122462904745766352256812585773382134936404344547323199885654433,
"weierstrass_b" : 6891450384315732539396789682275657542479668912536150109513790160209623422243491736087683183289411687640864567753786613451161759120554247759349511699125301598951605099378508850372543631423596795951899700429969112842764913119068298,
"weierstrass_b_g2_re" : 4,
"weierstrass_b_g2_im" : 0,
"g1_gen_x" : 6238772257594679368032145693622812838779005809760824733138787810501188623461307351759238099287535516224314149266511977132140828635950940021790489507611754366317801811090811367945064510304504157188661901055903167026722666149426237,
"g1_gen_y" : 2101735126520897423911504562215834951148127555913367997162789335052900271653517958562461315794228241561913734371411178226936527683203879553093934185950470971848972085321797958124416462268292467002957525517188485984766314758624099,
"g2_gen_x_re" : 6445332910596979336035888152774071626898886139774101364933948236926875073754470830732273879639675437155036544153105017729592600560631678554299562762294743927912429096636156401171909259073181112518725201388196280039960074422214428,
"g2_gen_x_im" : 1,
"g2_gen_y_re" : 562923658089539719386922163444547387757586534741080263946953401595155211934630598999300396317104182598044793758153214972605680357108252243146746187917218885078195819486220416605630144001533548163105316661692978285266378674355041,
"g2_gen_y_im" : 1,
"nonresidue" : -1
}

View File

@@ -17,7 +17,7 @@ def to_hex(val: int, length):
n = 8
chunks = [x[i:i+n] for i in range(0, len(x), n)][::-1]
s = ""
for c in chunks:
for c in chunks[:length // n]:
s += f'0x{c}, '
return s[:-2]
@@ -30,15 +30,15 @@ def compute_values(modulus, modulus_bit_count, limbs):
modulus_2 = to_hex(modulus*2,limb_size)
modulus_4 = to_hex(modulus*4,limb_size)
modulus_wide = to_hex(modulus,limb_size*2)
modulus_squared = to_hex(modulus*modulus,limb_size)
modulus_squared_2 = to_hex(modulus*modulus*2,limb_size)
modulus_squared_4 = to_hex(modulus*modulus*4,limb_size)
modulus_squared = to_hex(modulus*modulus,limb_size*2)
modulus_squared_2 = to_hex(modulus*modulus*2,limb_size*2)
modulus_squared_4 = to_hex(modulus*modulus*4,limb_size*2)
m_raw = int(math.floor(int(pow(2,2*modulus_bit_count) // modulus)))
m = to_hex(m_raw,limb_size)
one = to_hex(1,limb_size)
zero = to_hex(0,limb_size)
montgomery_r = to_hex((2 ** bit_size) % modulus, limb_size)
montgomery_r_inv = to_hex(((modulus+1)//2)**bit_size % modulus, limb_size)
montgomery_r = to_hex(pow(2,bit_size,modulus),limb_size)
montgomery_r_inv = to_hex(pow(2,-bit_size,modulus),limb_size)
return (
modulus_,
@@ -56,7 +56,7 @@ def compute_values(modulus, modulus_bit_count, limbs):
)
def get_fq_params(modulus, modulus_bit_count, limbs, g1_gen_x, g1_gen_y, g2_gen_x_re, g2_gen_x_im, g2_gen_y_re, g2_gen_y_im):
def get_fq_params(modulus, modulus_bit_count, limbs, nonresidue):
(
modulus,
modulus_2,
@@ -73,6 +73,8 @@ def get_fq_params(modulus, modulus_bit_count, limbs, g1_gen_x, g1_gen_y, g2_gen_
) = compute_values(modulus, modulus_bit_count, limbs)
limb_size = 8*limbs
nonresidue_is_negative = str(nonresidue < 0).lower()
nonresidue = abs(nonresidue)
return {
'fq_modulus': modulus,
'fq_modulus_2': modulus_2,
@@ -86,12 +88,8 @@ def get_fq_params(modulus, modulus_bit_count, limbs, g1_gen_x, g1_gen_y, g2_gen_
'fq_zero': zero,
'fq_montgomery_r': montgomery_r,
'fq_montgomery_r_inv': montgomery_r_inv,
'fq_gen_x': to_hex(g1_gen_x, limb_size),
'fq_gen_y': to_hex(g1_gen_y, limb_size),
'fq_gen_x_re': to_hex(g2_gen_x_re, limb_size),
'fq_gen_x_im': to_hex(g2_gen_x_im, limb_size),
'fq_gen_y_re': to_hex(g2_gen_y_re, limb_size),
'fq_gen_y_im': to_hex(g2_gen_y_im, limb_size)
'nonresidue': nonresidue,
'nonresidue_is_negative': nonresidue_is_negative
}
@@ -151,6 +149,18 @@ def get_fp_params(modulus, modulus_bit_count, limbs, root_of_unity, size=0):
}
def get_generators(g1_gen_x, g1_gen_y, g2_gen_x_re, g2_gen_x_im, g2_gen_y_re, g2_gen_y_im, size):
return {
'fq_gen_x': to_hex(g1_gen_x, size),
'fq_gen_y': to_hex(g1_gen_y, size),
'fq_gen_x_re': to_hex(g2_gen_x_re, size),
'fq_gen_x_im': to_hex(g2_gen_x_im, size),
'fq_gen_y_re': to_hex(g2_gen_y_re, size),
'fq_gen_y_im': to_hex(g2_gen_y_im, size)
}
def get_weier_params(weierstrass_b, weierstrass_b_g2_re, weierstrass_b_g2_im, size):
return {
@@ -171,6 +181,7 @@ def get_params(config):
bit_count_q = config["bit_count_q"]
limb_q = config["limb_q"]
root_of_unity = config["root_of_unity"]
nonresidue = config["nonresidue"]
if root_of_unity == modulus_p:
sys.exit("Invalid root_of_unity value; please update in curve parameters")
@@ -194,13 +205,15 @@ def get_params(config):
}
fp_params = get_fp_params(modulus_p, bit_count_p, limb_p, root_of_unity, ntt_size)
fq_params = get_fq_params(modulus_q, bit_count_q, limb_q, g1_gen_x, g1_gen_y, g2_generator_x_re, g2_generator_x_im, g2_generator_y_re, g2_generator_y_im)
fq_params = get_fq_params(modulus_q, bit_count_q, limb_q, nonresidue)
generators = get_generators(g1_gen_x, g1_gen_y, g2_generator_x_re, g2_generator_x_im, g2_generator_y_re, g2_generator_y_im, 8*limb_q)
weier_params = get_weier_params(weierstrass_b, weierstrass_b_g2_re, weierstrass_b_g2_im, 8*limb_q)
return {
**params,
**fp_params,
**fq_params,
**generators,
**weier_params
}

View File

@@ -5,20 +5,25 @@ LDFLAGS = -shared
FEATURES = -DG2_DEFINED
TARGET_BN254 = libbn254.so
TARGET_BW6761 = libbw6761.so
TARGET_BLS12_381 = libbls12_381.so
TARGET_BLS12_377 = libbls12_377.so
VPATH = ../icicle/curves/bn254:../icicle/curves/bls12_377:../icicle/curves/bls12_381
VPATH = ../icicle/curves/bn254:../icicle/curves/bls12_377:../icicle/curves/bls12_381:../icicle/curves/bw6_761
SRCS_BN254 = lde.cu msm.cu projective.cu ve_mod_mult.cu
SRCS_BW6761 = lde.cu msm.cu projective.cu ve_mod_mult.cu
SRCS_BLS12_381 = lde.cu msm.cu projective.cu ve_mod_mult.cu poseidon.cu
SRCS_BLS12_377 = lde.cu msm.cu projective.cu ve_mod_mult.cu
all: $(TARGET_BN254) $(TARGET_BLS12_381) $(TARGET_BLS12_377)
all: $(TARGET_BN254) $(TARGET_BLS12_381) $(TARGET_BLS12_377) $(TARGET_BW6761)
$(TARGET_BN254):
$(NVCC) $(FEATURES) $(CFLAGS) $(LDFLAGS) $(addprefix ../icicle/curves/bn254/, $(SRCS_BN254)) -o $@
$(TARGET_BW6761):
$(NVCC) $(FEATURES) $(CFLAGS) $(LDFLAGS) $(addprefix ../icicle/curves/bw6_761/, $(SRCS_BW6761)) -o $@
$(TARGET_BLS12_381):
$(NVCC) $(FEATURES) $(CFLAGS) $(LDFLAGS) $(addprefix ../icicle/curves/bls12_381/, $(SRCS_BLS12_381)) -o $@
@@ -26,4 +31,4 @@ $(TARGET_BLS12_377):
$(NVCC) $(FEATURES) $(CFLAGS) $(LDFLAGS) $(addprefix ../icicle/curves/bls12_377/, $(SRCS_BLS12_377)) -o $@
clean:
rm -f $(TARGET_BN254) $(TARGET_BLS12_381) $(TARGET_BLS12_377)
rm -f $(TARGET_BN254) $(TARGET_BLS12_381) $(TARGET_BLS12_377) $(TARGET_BW6761)

View File

@@ -11,13 +11,13 @@ To compile the CUDA files, you will need:
## Structure of the Makefile
The Makefile is designed to compile CUDA files for three curves: BN254, BLS12_381, and BLS12_377. The source files are located in the `icicle/curves/` directory.
The Makefile is designed to compile CUDA files for four curves: BN254, BLS12_381, BLS12_377 and BW6_671. The source files are located in the `icicle/curves/` directory.
## Compiling CUDA Code
1. Navigate to the directory containing the Makefile in your terminal.
2. To compile all curve libraries, use the `make all` command. This will create three shared libraries: `libbn254.so`, `libbls12_381.so`, and `libbls12_377.so`.
3. If you want to compile a specific curve, you can do so by specifying the target. For example, to compile only the BN254 curve, use `make libbn254.so`. Replace `libbn254.so` with `libbls12_381.so` or `libbls12_377.so` to compile those curves instead.
2. To compile all curve libraries, use the `make all` command. This will create four shared libraries: `libbn254.so`, `libbls12_381.so`, `libbls12_377.so` and `libbw6_671.so`.
3. If you want to compile a specific curve, you can do so by specifying the target. For example, to compile only the BN254 curve, use `make libbn254.so`. Replace `libbn254.so` with `libbls12_381.so`, `libbls12_377.so` or `libbw6_671.so` to compile those curves instead.
The resulting `.so` files are the compiled shared libraries for each curve.
@@ -25,13 +25,13 @@ The resulting `.so` files are the compiled shared libraries for each curve.
The shared libraries produced from the CUDA code compilation are used to bind Golang to ICICLE's CUDA code.
1. These shared libraries (`libbn254.so`, `libbls12_381.so`, `libbls12_377.so`) can be imported in your Go project to leverage the GPU accelerated functionalities provided by ICICLE.
1. These shared libraries (`libbn254.so`, `libbls12_381.so`, `libbls12_377.so`, `libbw6_671.so`) can be imported in your Go project to leverage the GPU accelerated functionalities provided by ICICLE.
2. In your Go project, you can use `cgo` to link these shared libraries. Here's a basic example on how you can use `cgo` to link these libraries:
```go
/*
#cgo LDFLAGS: -L/path/to/shared/libs -lbn254 -lbls12_381 -lbls12_377
#cgo LDFLAGS: -L/path/to/shared/libs -lbn254 -lbls12_381 -lbls12_377 -lbw6_671
#include "icicle.h" // make sure you use the correct header file(s)
*/
import "C"
@@ -46,7 +46,7 @@ Replace `/path/to/shared/libs` with the actual path where the shared libraries a
## Cleaning up
If you want to remove the compiled files, you can use the `make clean` command. This will remove the `libbn254.so`, `libbls12_381.so`, and `libbls12_377.so` files.
If you want to remove the compiled files, you can use the `make clean` command. This will remove the `libbn254.so`, `libbls12_381.so`, `libbls12_377.so` and `libbw6_671.so` files.
## Common issues

View File

@@ -84,17 +84,6 @@ func (f *G2Element) ToBytesLe() []byte {
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.BLS12_377_g2_affine_t)(unsafe.Pointer(p))
in := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(projective))

View File

@@ -71,7 +71,8 @@ func TestG2ShouldConvertToProjective(t *testing.T) {
var pointAffine G2PointAffine
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
var proj G2Point
proj.FromAffine(&pointAffine)
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))

View File

@@ -84,17 +84,6 @@ func (f *G2Element) ToBytesLe() []byte {
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.BLS12_381_g2_affine_t)(unsafe.Pointer(p))
in := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(projective))

View File

@@ -71,7 +71,8 @@ func TestG2ShouldConvertToProjective(t *testing.T) {
var pointAffine G2PointAffine
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
var proj G2Point
proj.FromAffine(&pointAffine)
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))

View File

@@ -84,17 +84,6 @@ func (f *G2Element) ToBytesLe() []byte {
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.BN254_g2_affine_t)(unsafe.Pointer(p))
in := (*C.BN254_g2_projective_t)(unsafe.Pointer(projective))

View File

@@ -71,7 +71,8 @@ func TestG2ShouldConvertToProjective(t *testing.T) {
var pointAffine G2PointAffine
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
var proj G2Point
proj.FromAffine(&pointAffine)
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))

View File

@@ -0,0 +1,328 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"unsafe"
"encoding/binary"
)
// #cgo CFLAGS: -I./include/
// #cgo CFLAGS: -I/usr/local/cuda/include
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbw6761
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
const SCALAR_SIZE = 12
const BASE_SIZE = 24
type G1ScalarField struct {
S [SCALAR_SIZE]uint32
}
type G1BaseField struct {
S [BASE_SIZE]uint32
}
/*
* BaseField Constrctors
*/
func (f *G1BaseField) SetZero() *G1BaseField {
var S [BASE_SIZE]uint32
f.S = S
return f
}
func (f *G1BaseField) SetOne() *G1BaseField {
var S [BASE_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (p *G1ProjectivePoint) FromAffine(affine *G1PointAffine) *G1ProjectivePoint {
out := (*C.BW6761_projective_t)(unsafe.Pointer(p))
in := (*C.BW6761_affine_t)(unsafe.Pointer(affine))
C.projective_from_affine_bw6_761(out, in)
return p
}
func (f *G1BaseField) FromLimbs(limbs [BASE_SIZE]uint32) *G1BaseField {
copy(f.S[:], limbs[:])
return f
}
/*
* BaseField methods
*/
func (f *G1BaseField) Limbs() [BASE_SIZE]uint32 {
return f.S
}
func (f *G1BaseField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* ScalarField methods
*/
func (p *G1ScalarField) Random() *G1ScalarField {
outC := (*C.BW6761_scalar_t)(unsafe.Pointer(p))
C.random_scalar_bw6_761(outC)
return p
}
func (f *G1ScalarField) SetZero() *G1ScalarField {
var S [SCALAR_SIZE]uint32
f.S = S
return f
}
func (f *G1ScalarField) SetOne() *G1ScalarField {
var S [SCALAR_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (a *G1ScalarField) Eq(b *G1ScalarField) bool {
for i, v := range a.S {
if b.S[i] != v {
return false
}
}
return true
}
/*
* ScalarField methods
*/
func (f *G1ScalarField) Limbs() [SCALAR_SIZE]uint32 {
return f.S
}
func (f *G1ScalarField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* PointBW6761
*/
type G1ProjectivePoint struct {
X, Y, Z G1BaseField
}
func (f *G1ProjectivePoint) SetZero() *G1ProjectivePoint {
var yOne G1BaseField
yOne.SetOne()
var xZero G1BaseField
xZero.SetZero()
var zZero G1BaseField
zZero.SetZero()
f.X = xZero
f.Y = yOne
f.Z = zZero
return f
}
func (p *G1ProjectivePoint) Eq(pCompare *G1ProjectivePoint) bool {
// Cast *PointBW6761 to *C.BW6761_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It'S your responsibility to ensure that the types are compatible.
pC := (*C.BW6761_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BW6761_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it'S fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_bw6_761(pC, pCompareC))
}
func (p *G1ProjectivePoint) IsOnCurve() bool {
point := (*C.BW6761_projective_t)(unsafe.Pointer(p))
res := C.projective_is_on_curve_bw6_761(point)
return bool(res)
}
func (p *G1ProjectivePoint) Random() *G1ProjectivePoint {
outC := (*C.BW6761_projective_t)(unsafe.Pointer(p))
C.random_projective_bw6_761(outC)
return p
}
func (p *G1ProjectivePoint) StripZ() *G1PointAffine {
return &G1PointAffine{
X: p.X,
Y: p.Y,
}
}
func (p *G1ProjectivePoint) FromLimbs(x, y, z *[]uint32) *G1ProjectivePoint {
var _x G1BaseField
var _y G1BaseField
var _z G1BaseField
_x.FromLimbs(GetFixedLimbs(x))
_y.FromLimbs(GetFixedLimbs(y))
_z.FromLimbs(GetFixedLimbs(z))
p.X = _x
p.Y = _y
p.Z = _z
return p
}
/*
* PointAffineNoInfinityBW6761
*/
type G1PointAffine struct {
X, Y G1BaseField
}
func (p *G1PointAffine) FromProjective(projective *G1ProjectivePoint) *G1PointAffine {
in := (*C.BW6761_projective_t)(unsafe.Pointer(projective))
out := (*C.BW6761_affine_t)(unsafe.Pointer(p))
C.projective_to_affine_bw6_761(out, in)
return p
}
func (p *G1PointAffine) ToProjective() *G1ProjectivePoint {
var Z G1BaseField
Z.SetOne()
return &G1ProjectivePoint{
X: p.X,
Y: p.Y,
Z: Z,
}
}
func (p *G1PointAffine) FromLimbs(X, Y *[]uint32) *G1PointAffine {
var _x G1BaseField
var _y G1BaseField
_x.FromLimbs(GetFixedLimbs(X))
_y.FromLimbs(GetFixedLimbs(Y))
p.X = _x
p.Y = _y
return p
}
/*
* Multiplication
*/
func MultiplyVec(a []G1ProjectivePoint, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
pointsC := (*C.BW6761_projective_t)(unsafe.Pointer(&a[0]))
scalarsC := (*C.BW6761_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_point_bw6_761(pointsC, scalarsC, nElementsC, deviceIdC)
}
func MultiplyScalar(a []G1ScalarField, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
aC := (*C.BW6761_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BW6761_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_scalar_bw6_761(aC, bC, nElementsC, deviceIdC)
}
// Multiply a matrix by a scalar:
//
// `a` - flattenned matrix;
// `b` - vector to multiply `a` by;
func MultiplyMatrix(a []G1ScalarField, b []G1ScalarField, deviceID int) {
c := make([]G1ScalarField, len(b))
for i := range c {
var p G1ScalarField
p.SetZero()
c[i] = p
}
aC := (*C.BW6761_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BW6761_scalar_t)(unsafe.Pointer(&b[0]))
cC := (*C.BW6761_scalar_t)(unsafe.Pointer(&c[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.matrix_vec_mod_mult_bw6_761(aC, bC, cC, nElementsC, deviceIdC)
}
/*
* Utils
*/
func GetFixedLimbs(slice *[]uint32) [BASE_SIZE]uint32 {
if len(*slice) <= BASE_SIZE {
limbs := [BASE_SIZE]uint32{}
copy(limbs[:len(*slice)], *slice)
return limbs
}
panic("slice has too many elements")
}

View File

@@ -0,0 +1,212 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"encoding/binary"
"fmt"
"testing"
"github.com/stretchr/testify/assert"
)
func generateUint32Array(length int, isZero bool) []uint32 {
arr := make([]uint32, length)
for i := 0; i < length; i++ {
if isZero {
arr[i] = 0x0
} else {
arr[i] = uint32(i + 1) // You can modify this line to fill the array as needed
}
}
return arr
}
func TestNewFieldBW6761One(t *testing.T) {
var oneField G1BaseField
oneField.SetOne()
rawOneField := [24]uint32([24]uint32{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, oneField.S, rawOneField)
}
func TestNewFieldBW6761Zero(t *testing.T) {
var zeroField G1BaseField
zeroField.SetZero()
rawZeroField := [24]uint32([24]uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, zeroField.S, rawZeroField)
}
func TestFieldBW6761ToBytesLe(t *testing.T) {
var p G1ProjectivePoint
p.Random()
expected := make([]byte, len(p.X.S)*4) // each uint32 takes 4 bytes
for i, v := range p.X.S {
binary.LittleEndian.PutUint32(expected[i*4:], v)
}
assert.Equal(t, p.X.ToBytesLe(), expected)
assert.Equal(t, len(p.X.ToBytesLe()), 96)
}
func TestNewPointBW6761Zero(t *testing.T) {
var pointZero G1ProjectivePoint
pointZero.SetZero()
var baseOne G1BaseField
baseOne.SetOne()
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, pointZero.X, zeroSanity)
assert.Equal(t, pointZero.Y, baseOne)
assert.Equal(t, pointZero.Z, zeroSanity)
}
func TestFromProjectiveToAffine(t *testing.T) {
fmt.Print() // this prevents the test from hanging. TODO: figure out why
var projective G1ProjectivePoint
var affine G1PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G1ProjectivePoint
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestBW6761Eq(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
var p2 G1ProjectivePoint
p2.Random()
assert.Equal(t, p1.Eq(&p1), true)
assert.Equal(t, p1.Eq(&p2), false)
}
func TestBW6761StripZ(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
p2ZLess := p1.StripZ()
assert.IsType(t, G1PointAffine{}, *p2ZLess)
assert.Equal(t, p1.X, p2ZLess.X)
assert.Equal(t, p1.Y, p2ZLess.Y)
}
func TestPointBW6761fromLimbs(t *testing.T) {
var p G1ProjectivePoint
p.Random()
x := p.X.Limbs()
y := p.Y.Limbs()
z := p.Z.Limbs()
xSlice := x[:]
ySlice := y[:]
zSlice := z[:]
var pFromLimbs G1ProjectivePoint
pFromLimbs.FromLimbs(&xSlice, &ySlice, &zSlice)
assert.Equal(t, pFromLimbs, p)
}
func TestNewPointAffineNoInfinityBW6761Zero(t *testing.T) {
var zeroP G1PointAffine
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, zeroP.X, zeroSanity)
assert.Equal(t, zeroP.Y, zeroSanity)
}
func TestPointAffineNoInfinityBW6761FromLimbs(t *testing.T) {
// Initialize your test values
x := [24]uint32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}
y := [24]uint32{1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8}
xSlice := x[:]
ySlice := y[:]
// Execute your function
var result G1PointAffine
result.FromLimbs(&xSlice, &ySlice)
var xBase G1BaseField
var yBase G1BaseField
xBase.FromLimbs(x)
yBase.FromLimbs(y)
// Define your expected result
expected := G1PointAffine{
X: xBase,
Y: yBase,
}
// Test if result is as expected
assert.Equal(t, expected, result)
}
func TestGetFixedLimbs(t *testing.T) {
t.Run("case of valid input of length less than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7}
expected := [24]uint32{1, 2, 3, 4, 5, 6, 7, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of valid input of length 24", func(t *testing.T) {
slice := generateUint32Array(24, false)
expected := [24]uint32(generateUint32Array(24, false))
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of empty input", func(t *testing.T) {
slice := []uint32{}
expected := [24]uint32(generateUint32Array(24, true))
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of input length greater than 24", func(t *testing.T) {
slice := generateUint32Array(25, false)
defer func() {
if r := recover(); r == nil {
t.Errorf("the code did not panic")
}
}()
GetFixedLimbs(&slice)
})
}

View File

@@ -0,0 +1,98 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"encoding/binary"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo CFLAGS: -I/usr/local/cuda/include
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbw6761
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
// G2 extension field
type G2Element [12]uint64
type G2PointAffine struct {
X, Y G2Element
}
type G2Point struct {
X, Y, Z G2Element
}
func (p *G2Point) Random() *G2Point {
outC := (*C.BW6761_g2_projective_t)(unsafe.Pointer(p))
C.random_g2_projective_bw6_761(outC)
return p
}
func (p *G2Point) FromAffine(affine *G2PointAffine) *G2Point {
out := (*C.BW6761_g2_projective_t)(unsafe.Pointer(p))
in := (*C.BW6761_g2_affine_t)(unsafe.Pointer(affine))
C.g2_projective_from_affine_bw6_761(out, in)
return p
}
func (p *G2Point) Eq(pCompare *G2Point) bool {
// Cast *PointBW6761 to *C.BW6761_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It's your responsibility to ensure that the types are compatible.
pC := (*C.BW6761_g2_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BW6761_g2_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it's fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_g2_bw6_761(pC, pCompareC))
}
func (f *G2Element) ToBytesLe() []byte {
var bytes []byte
for _, val := range f {
buf := make([]byte, 8) // 8 bytes because uint64 is 64-bit
binary.LittleEndian.PutUint64(buf, val)
bytes = append(bytes, buf...)
}
return bytes
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.BW6761_g2_affine_t)(unsafe.Pointer(p))
in := (*C.BW6761_g2_projective_t)(unsafe.Pointer(projective))
C.g2_projective_to_affine_bw6_761(out, in)
return p
}
func (p *G2Point) IsOnCurve() bool {
// Directly copy memory from the C struct to the Go struct
point := (*C.BW6761_g2_projective_t)(unsafe.Pointer(p))
res := C.g2_projective_is_on_curve_bw6_761(point)
return bool(res)
}

View File

@@ -0,0 +1,83 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"fmt"
"testing"
"unsafe"
"github.com/stretchr/testify/assert"
)
func TestG2Eqg2(t *testing.T) {
var point G2Point
point.Random()
assert.True(t, point.Eq(&point))
}
func TestG2FromProjectiveToAffine(t *testing.T) {
fmt.Print() // this prevents the test from hanging. TODO: figure out why
var projective G2Point
projective.Random()
var affine G2PointAffine
affine.FromProjective(&projective)
var projective2 G2Point
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestG2Eqg2NotEqual(t *testing.T) {
var point G2Point
point.Random()
var point2 G2Point
point2.Random()
assert.False(t, point.Eq(&point2))
}
func TestG2ToBytes(t *testing.T) {
var point G2Point
var element G2Element
point.Random()
bytes := point.X.ToBytesLe()
assert.Equal(t, len(bytes), int(unsafe.Sizeof(element)))
}
func TestG2ShouldConvertToProjective(t *testing.T) {
fmt.Print() // this prevents the test from hanging. TODO: figure out why
var pointProjective G2Point
pointProjective.Random()
var pointAffine G2PointAffine
pointAffine.FromProjective(&pointProjective)
var proj G2Point
proj.FromAffine(&pointAffine)
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))
}

View File

@@ -0,0 +1,101 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdbool.h>
// msm.h
#ifndef _BW6761_MSM_H
#define _BW6761_MSM_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BW6761 projective and affine structs
typedef struct BW6761_projective_t BW6761_projective_t;
typedef struct BW6761_g2_projective_t BW6761_g2_projective_t;
typedef struct BW6761_affine_t BW6761_affine_t;
typedef struct BW6761_g2_affine_t BW6761_g2_affine_t;
typedef struct BW6761_scalar_t BW6761_scalar_t;
typedef cudaStream_t CudaStream_t;
int msm_cuda_bw6_761(
BW6761_projective_t* out, BW6761_affine_t* points, BW6761_scalar_t* scalars, size_t count, size_t device_id);
int msm_batch_cuda_bw6_761(
BW6761_projective_t* out,
BW6761_affine_t* points,
BW6761_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_scalar_t* d_scalars,
BW6761_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_scalar_t* d_scalars,
BW6761_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id);
int msm_g2_cuda_bw6_761(
BW6761_g2_projective_t* out,
BW6761_g2_affine_t* points,
BW6761_scalar_t* scalars,
size_t count,
size_t device_id);
int msm_batch_g2_cuda_bw6_761(
BW6761_g2_projective_t* out,
BW6761_g2_affine_t* points,
BW6761_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_g2_cuda_bw6_761(
BW6761_g2_projective_t* d_out,
BW6761_scalar_t* d_scalars,
BW6761_g2_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_g2_cuda_bw6_761(
BW6761_g2_projective_t* d_out,
BW6761_scalar_t* d_scalars,
BW6761_g2_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BW6761_MSM_H */

View File

@@ -0,0 +1,198 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// ntt.h
#ifndef _BW6761_NTT_H
#define _BW6761_NTT_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BW6761 projective and affine structs
typedef struct BW6761_projective_t BW6761_projective_t;
typedef struct BW6761_affine_t BW6761_affine_t;
typedef struct BW6761_scalar_t BW6761_scalar_t;
typedef struct BW6761_g2_projective_t BW6761_g2_projective_t;
typedef struct BW6761_g2_affine_t BW6761_g2_affine_t;
int ntt_cuda_bw6_761(BW6761_scalar_t* arr, uint32_t n, bool inverse, size_t device_id);
int ntt_batch_cuda_bw6_761(
BW6761_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
int ecntt_cuda_bw6_761(BW6761_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
int ecntt_batch_cuda_bw6_761(
BW6761_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
BW6761_scalar_t*
build_domain_cuda_bw6_761(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
int interpolate_scalars_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_evaluations,
BW6761_scalar_t* d_domain,
unsigned n,
unsigned device_id,
size_t stream);
int interpolate_scalars_batch_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_evaluations,
BW6761_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_points_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_projective_t* d_evaluations,
BW6761_scalar_t* d_domain,
unsigned n,
size_t device_id,
size_t stream);
int interpolate_points_batch_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_projective_t* d_evaluations,
BW6761_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_scalars_on_coset_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_evaluations,
BW6761_scalar_t* d_domain,
unsigned n,
BW6761_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int interpolate_scalars_batch_on_coset_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_evaluations,
BW6761_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
BW6761_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_scalars_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id,
size_t stream);
int evaluate_scalars_batch_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_points_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_projective_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id,
size_t stream);
int evaluate_points_batch_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_projective_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_scalars_on_coset_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BW6761_scalar_t* coset_powers,
unsigned device_id,
size_t stream);
int evaluate_scalars_on_coset_batch_cuda_bw6_761(
BW6761_scalar_t* d_out,
BW6761_scalar_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BW6761_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_projective_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BW6761_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_batch_cuda_bw6_761(
BW6761_projective_t* d_out,
BW6761_projective_t* d_coefficients,
BW6761_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BW6761_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int reverse_order_scalars_cuda_bw6_761(BW6761_scalar_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_scalars_batch_cuda_bw6_761(
BW6761_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int reverse_order_points_cuda_bw6_761(BW6761_projective_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_points_batch_cuda_bw6_761(
BW6761_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int add_scalars_cuda_bw6_761(
BW6761_scalar_t* d_out, BW6761_scalar_t* d_in1, BW6761_scalar_t* d_in2, unsigned n, size_t stream);
int sub_scalars_cuda_bw6_761(
BW6761_scalar_t* d_out, BW6761_scalar_t* d_in1, BW6761_scalar_t* d_in2, unsigned n, size_t stream);
int to_montgomery_scalars_cuda_bw6_761(BW6761_scalar_t* d_inout, unsigned n, size_t stream);
int from_montgomery_scalars_cuda_bw6_761(BW6761_scalar_t* d_inout, unsigned n, size_t stream);
// points g1
int to_montgomery_proj_points_cuda_bw6_761(BW6761_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_cuda_bw6_761(BW6761_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_cuda_bw6_761(BW6761_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_cuda_bw6_761(BW6761_affine_t* d_inout, unsigned n, size_t stream);
// points g2
int to_montgomery_proj_points_g2_cuda_bw6_761(BW6761_g2_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_g2_cuda_bw6_761(BW6761_g2_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_g2_cuda_bw6_761(BW6761_g2_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_g2_cuda_bw6_761(BW6761_g2_affine_t* d_inout, unsigned n, size_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BW6761_NTT_H */

View File

@@ -0,0 +1,50 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// projective.h
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BW6761_projective_t BW6761_projective_t;
typedef struct BW6761_g2_projective_t BW6761_g2_projective_t;
typedef struct BW6761_affine_t BW6761_affine_t;
typedef struct BW6761_g2_affine_t BW6761_g2_affine_t;
typedef struct BW6761_scalar_t BW6761_scalar_t;
bool projective_is_on_curve_bw6_761(BW6761_projective_t* point1);
int random_scalar_bw6_761(BW6761_scalar_t* out);
int random_projective_bw6_761(BW6761_projective_t* out);
BW6761_projective_t* projective_zero_bw6_761();
int projective_to_affine_bw6_761(BW6761_affine_t* out, BW6761_projective_t* point1);
int projective_from_affine_bw6_761(BW6761_projective_t* out, BW6761_affine_t* point1);
int random_g2_projective_bw6_761(BW6761_g2_projective_t* out);
int g2_projective_to_affine_bw6_761(BW6761_g2_affine_t* out, BW6761_g2_projective_t* point1);
int g2_projective_from_affine_bw6_761(BW6761_g2_projective_t* out, BW6761_g2_affine_t* point1);
bool g2_projective_is_on_curve_bw6_761(BW6761_g2_projective_t* point1);
bool eq_bw6_761(BW6761_projective_t* point1, BW6761_projective_t* point2);
bool eq_g2_bw6_761(BW6761_g2_projective_t* point1, BW6761_g2_projective_t* point2);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,49 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <stdbool.h>
#include <cuda.h>
// ve_mod_mult.h
#ifndef _BW6761_VEC_MULT_H
#define _BW6761_VEC_MULT_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BW6761_projective_t BW6761_projective_t;
typedef struct BW6761_scalar_t BW6761_scalar_t;
int32_t vec_mod_mult_point_bw6_761(
BW6761_projective_t* inout, BW6761_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_scalar_bw6_761(
BW6761_scalar_t* inout, BW6761_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_device_scalar_bw6_761(
BW6761_scalar_t* inout, BW6761_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
int32_t matrix_vec_mod_mult_bw6_761(
BW6761_scalar_t* matrix_flattened,
BW6761_scalar_t* input,
BW6761_scalar_t* output,
size_t n_elments,
size_t device_id);
#ifdef __cplusplus
}
#endif
#endif /* _BW6761_VEC_MULT_H */

View File

@@ -0,0 +1,209 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"errors"
"fmt"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo CFLAGS: -I/usr/local/cuda/include
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbw6761
// #include "msm.h"
import "C"
func Msm(out *G1ProjectivePoint, points []G1PointAffine, scalars []G1ScalarField, device_id int) (*G1ProjectivePoint, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BW6761_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BW6761_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BW6761_projective_t)(unsafe.Pointer(out))
ret := C.msm_cuda_bw6_761(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_cuda_bw6_761 returned error code: %d", ret)
}
return out, nil
}
func MsmG2(out *G2Point, points []G2PointAffine, scalars []G1ScalarField, device_id int) (*G2Point, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BW6761_g2_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BW6761_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BW6761_g2_projective_t)(unsafe.Pointer(out))
ret := C.msm_g2_cuda_bw6_761(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_g2_cuda_bw6_761 returned error code: %d", ret)
}
return out, nil
}
func MsmBatch(points *[]G1PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]G1ProjectivePoint, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]G1ProjectivePoint, batchSize)
for i := 0; i < len(out); i++ {
var p G1ProjectivePoint
p.SetZero()
out[i] = p
}
outC := (*C.BW6761_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BW6761_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BW6761_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_cuda_bw6_761(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bw6_761 returned error code: %d", ret)
}
return out, nil
}
func MsmG2Batch(points *[]G2PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]G2Point, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]G2Point, batchSize)
outC := (*C.BW6761_g2_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BW6761_g2_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BW6761_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_g2_cuda_bw6_761(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bw6_761 returned error code: %d", ret)
}
return out, nil
}
func Commit(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BW6761_projective_t)(d_out)
scalarsC := (*C.BW6761_scalar_t)(d_scalars)
pointsC := (*C.BW6761_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_cuda_bw6_761(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BW6761_g2_projective_t)(d_out)
scalarsC := (*C.BW6761_scalar_t)(d_scalars)
pointsC := (*C.BW6761_g2_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_g2_cuda_bw6_761(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitBatch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BW6761_projective_t)(d_out)
scalarsC := (*C.BW6761_scalar_t)(d_scalars)
pointsC := (*C.BW6761_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.commit_batch_cuda_bw6_761(d_outC, scalarsC, pointsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2Batch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BW6761_g2_projective_t)(d_out)
scalarsC := (*C.BW6761_scalar_t)(d_scalars)
pointsC := (*C.BW6761_g2_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.msm_batch_g2_cuda_bw6_761(d_outC, pointsC, scalarsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}

View File

@@ -0,0 +1,367 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"fmt"
"math"
"testing"
"time"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
"github.com/stretchr/testify/assert"
)
func GeneratePoints(count int) []G1PointAffine {
// Declare a slice of integers
var points []G1PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var pointProjective G1ProjectivePoint
pointProjective.Random()
var pointAffine G1PointAffine
pointAffine.FromProjective(&pointProjective)
points = append(points, pointAffine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func GeneratePointsProj(count int) []G1ProjectivePoint {
// Declare a slice of integers
var points []G1ProjectivePoint
// Use a loop to populate the slice
for i := 0; i < count; i++ {
var p G1ProjectivePoint
p.Random()
points = append(points, p)
}
return points
}
func GenerateScalars(count int, skewed bool) []G1ScalarField {
// Declare a slice of integers
var scalars []G1ScalarField
var rand G1ScalarField
var zero G1ScalarField
var one G1ScalarField
var randLarge G1ScalarField
zero.SetZero()
one.SetOne()
randLarge.Random()
if skewed && count > 1_200_000 {
for i := 0; i < count-1_200_000; i++ {
rand.Random()
scalars = append(scalars, rand)
}
for i := 0; i < 600_000; i++ {
scalars = append(scalars, randLarge)
}
for i := 0; i < 400_000; i++ {
scalars = append(scalars, zero)
}
for i := 0; i < 200_000; i++ {
scalars = append(scalars, one)
}
} else {
for i := 0; i < count; i++ {
rand.Random()
scalars = append(scalars, rand)
}
}
return scalars[:count]
}
func TestMSM(t *testing.T) {
fmt.Print() // this prevents the test from hanging. TODO: figure out why
for _, v := range []int{8} {
count := 1 << v
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating scalars\n")
out := new(G1ProjectivePoint)
startTime := time.Now()
_, e := Msm(out, points, scalars, 0) // non mont
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func TestCommitMSM(t *testing.T) {
for _, v := range []int{8} {
count := 1<<v - 1
fmt.Print("Started generating points and scalars\n")
points := GeneratePoints(count)
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating points and scalars\n")
var sizeOutD G1ProjectivePoint
out_d, _ := goicicle.CudaMalloc(int(unsafe.Sizeof(sizeOutD)))
var sizePoints G1PointAffine
pointsBytes := count * int(unsafe.Sizeof(sizePoints))
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
var sizeScalar G1ScalarField
scalarBytes := count * int(unsafe.Sizeof(sizeScalar))
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := Commit(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G1ProjectivePoint, 1)
goicicle.CudaMemCpyDtoH[G1ProjectivePoint](outHost, out_d, int(unsafe.Sizeof(sizeOutD)))
assert.Equal(t, e, 0, "error should be 0")
assert.True(t, outHost[0].IsOnCurve())
}
}
func BenchmarkCommit(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := msmSize * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := msmSize * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
e := Commit(out_d, scalars_d, points_d, msmSize, 10)
assert.Equal(b, e, 0, "error should be 0")
outHost := make([]G1ProjectivePoint, 1)
goicicle.CudaMemCpyDtoH[G1ProjectivePoint](outHost, out_d, 288)
assert.True(b, outHost[0].IsOnCurve())
if e != 0 {
panic("Error occured")
}
}
})
}
}
func TestBatchMSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GeneratePoints(count)
scalars := GenerateScalars(count, false)
pointsResults, e := MsmBatch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBW6761 returned an error: %v", e)
}
if len(pointsResults) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(pointsResults))
}
for _, s := range pointsResults {
assert.True(t, s.IsOnCurve())
}
}
}
}
func BenchmarkMSM(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G1ProjectivePoint)
_, e := Msm(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
// G2
func GenerateG2Points(count int) []G2PointAffine {
// Declare a slice of integers
var points []G2PointAffine
// populate the slice
for i := 0; i < 10; i++ {
fmt.Print() // this prevents the test from hanging. TODO: figure out why
var p G2Point
p.Random()
var affine G2PointAffine
affine.FromProjective(&p)
points = append(points, affine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func TestMsmG2BW6761(t *testing.T) {
for _, v := range []int{8} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating scalars\n")
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func BenchmarkMsmG2BW6761(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GenerateG2Points(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM G2 %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
func TestCommitG2MSM(t *testing.T) {
for _, v := range []int{8} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating scalars\n")
var sizeCheckG2PointAffine G2PointAffine
inputPointsBytes := count * int(unsafe.Sizeof(sizeCheckG2PointAffine))
var sizeCheckG2Point G2Point
out_d, _ := goicicle.CudaMalloc(int(unsafe.Sizeof(sizeCheckG2Point)))
points_d, _ := goicicle.CudaMalloc(inputPointsBytes)
goicicle.CudaMemCpyHtoD[G2PointAffine](points_d, points, inputPointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := CommitG2(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G2Point, 1)
goicicle.CudaMemCpyDtoH[G2Point](outHost, out_d, int(unsafe.Sizeof(sizeCheckG2Point)))
assert.Equal(t, e, 0, "error should be 0")
assert.Equal(t, len(outHost), 1)
result := outHost[0]
assert.True(t, result.IsOnCurve())
}
}
func TestBatchG2MSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GenerateG2Points(count)
scalars := GenerateScalars(count, false)
pointsResults, e := MsmG2Batch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBW6761 returned an error: %v", e)
}
if len(pointsResults) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(pointsResults))
}
for _, s := range pointsResults {
assert.True(t, s.IsOnCurve())
}
}
}
}

View File

@@ -0,0 +1,222 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"errors"
"fmt"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
)
// #cgo CFLAGS: -I./include/
// #cgo CFLAGS: -I/usr/local/cuda/include
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbw6761
// #include "ntt.h"
import "C"
const (
NONE = 0
DIF = 1
DIT = 2
)
func Ntt(scalars *[]G1ScalarField, isInverse bool, deviceId int) uint64 {
scalarsC := (*C.BW6761_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
ret := C.ntt_cuda_bw6_761(scalarsC, C.uint32_t(len(*scalars)), C.bool(isInverse), C.size_t(deviceId))
return uint64(ret)
}
func NttBatch(scalars *[]G1ScalarField, isInverse bool, batchSize, deviceId int) uint64 {
scalarsC := (*C.BW6761_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
isInverseC := C.bool(isInverse)
batchSizeC := C.uint32_t(batchSize)
deviceIdC := C.size_t(deviceId)
ret := C.ntt_batch_cuda_bw6_761(scalarsC, C.uint32_t(len(*scalars)), batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNtt(values *[]G1ProjectivePoint, isInverse bool, deviceId int) uint64 {
valuesC := (*C.BW6761_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
ret := C.ecntt_cuda_bw6_761(valuesC, n, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNttBatch(values *[]G1ProjectivePoint, isInverse bool, batchSize, deviceId int) uint64 {
valuesC := (*C.BW6761_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
batchSizeC := C.uint32_t(batchSize)
ret := C.ecntt_batch_cuda_bw6_761(valuesC, n, batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func GenerateTwiddles(d_size int, log_d_size int, inverse bool) (up unsafe.Pointer, err error) {
domain_size := C.uint32_t(d_size)
logn := C.uint32_t(log_d_size)
is_inverse := C.bool(inverse)
dp := C.build_domain_cuda_bw6_761(domain_size, logn, is_inverse, 0, 0)
if dp == nil {
err = errors.New("nullptr returned from generating twiddles")
return unsafe.Pointer(nil), err
}
return unsafe.Pointer(dp), nil
}
// Reverses d_scalars in-place
func ReverseScalars(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BW6761_scalar_t)(d_scalars)
lenC := C.int(len)
if success := C.reverse_order_scalars_cuda_bw6_761(scalarsC, lenC, 0, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func Interpolate(scalars, twiddles, cosetPowers unsafe.Pointer, size int, isCoset bool) unsafe.Pointer {
size_d := size * 48
dp, err := goicicle.CudaMalloc(size_d)
if err != nil {
return nil
}
d_out := (*C.BW6761_scalar_t)(dp)
scalarsC := (*C.BW6761_scalar_t)(scalars)
twiddlesC := (*C.BW6761_scalar_t)(twiddles)
cosetPowersC := (*C.BW6761_scalar_t)(cosetPowers)
sizeC := C.uint(size)
var ret C.int
if isCoset {
ret = C.interpolate_scalars_on_coset_cuda_bw6_761(d_out, scalarsC, twiddlesC, sizeC, cosetPowersC, 0, 0)
} else {
ret = C.interpolate_scalars_cuda_bw6_761(d_out, scalarsC, twiddlesC, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
}
return unsafe.Pointer(d_out)
}
func Evaluate(scalars_out, scalars, twiddles, coset_powers unsafe.Pointer, scalars_size, twiddles_size int, isCoset bool) int {
scalars_outC := (*C.BW6761_scalar_t)(scalars_out)
scalarsC := (*C.BW6761_scalar_t)(scalars)
twiddlesC := (*C.BW6761_scalar_t)(twiddles)
coset_powersC := (*C.BW6761_scalar_t)(coset_powers)
sizeC := C.uint(scalars_size)
twiddlesC_size := C.uint(twiddles_size)
var ret C.int
if isCoset {
ret = C.evaluate_scalars_on_coset_cuda_bw6_761(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, coset_powersC, 0, 0)
} else {
ret = C.evaluate_scalars_cuda_bw6_761(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
return -1
}
return 0
}
func VecScalarAdd(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BW6761_scalar_t)(in1_d)
in2_dC := (*C.BW6761_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.add_scalars_cuda_bw6_761(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error adding scalar vectors")
return -1
}
return 0
}
func VecScalarSub(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BW6761_scalar_t)(in1_d)
in2_dC := (*C.BW6761_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.sub_scalars_cuda_bw6_761(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error subtracting scalar vectors")
return -1
}
return 0
}
func ToMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BW6761_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.to_montgomery_scalars_cuda_bw6_761(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func FromMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BW6761_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.from_montgomery_scalars_cuda_bw6_761(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BW6761_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_cuda_bw6_761(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func G2AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BW6761_g2_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_g2_cuda_bw6_761(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}

View File

@@ -0,0 +1,148 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
import (
"fmt"
"github.com/stretchr/testify/assert"
"reflect"
"testing"
)
func TestNttBW6761Batch(t *testing.T) {
count := 1 << 20
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
NttBatch(&nttResult, false, count, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBW6761CompareToGnarkDIF(t *testing.T) {
count := 1 << 2
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestINttBW6761CompareToGnarkDIT(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, true, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBW6761(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, 0)
assert.NotEqual(t, nttResult, scalars)
inttResult := make([]G1ScalarField, len(nttResult))
copy(inttResult, nttResult)
assert.Equal(t, inttResult, nttResult)
Ntt(&inttResult, true, 0)
assert.Equal(t, inttResult, scalars)
}
func TestNttBatchBW6761(t *testing.T) {
count := 1 << 5
batches := 4
scalars := GenerateScalars(count*batches, false)
var scalarVecOfVec [][]G1ScalarField = make([][]G1ScalarField, 0)
for i := 0; i < batches; i++ {
start := i * count
end := (i + 1) * count
batch := make([]G1ScalarField, len(scalars[start:end]))
copy(batch, scalars[start:end])
scalarVecOfVec = append(scalarVecOfVec, batch)
}
nttBatchResult := make([]G1ScalarField, len(scalars))
copy(nttBatchResult, scalars)
NttBatch(&nttBatchResult, false, count, 0)
var nttResultVecOfVec [][]G1ScalarField
for i := 0; i < batches; i++ {
// Clone the slice
clone := make([]G1ScalarField, len(scalarVecOfVec[i]))
copy(clone, scalarVecOfVec[i])
// Add it to the result vector of vectors
nttResultVecOfVec = append(nttResultVecOfVec, clone)
// Call the ntt_bw6_761 function
Ntt(&nttResultVecOfVec[i], false, 0)
}
assert.NotEqual(t, nttBatchResult, scalars)
// Check that the ntt of each vec of scalars is equal to the intt of the specific batch
for i := 0; i < batches; i++ {
if !reflect.DeepEqual(nttResultVecOfVec[i], nttBatchResult[i*count:((i+1)*count)]) {
t.Errorf("ntt of vec of scalars not equal to intt of specific batch")
}
}
}
func BenchmarkNTT(b *testing.B) {
LOG_NTT_SIZES := []int{12, 15, 20, 21, 22, 23, 24, 25, 26}
for _, logNTTSize := range LOG_NTT_SIZES {
nttSize := 1 << logNTTSize
b.Run(fmt.Sprintf("NTT %d", logNTTSize), func(b *testing.B) {
scalars := GenerateScalars(nttSize, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
for n := 0; n < b.N; n++ {
Ntt(&nttResult, false, 0)
}
})
}
}

View File

@@ -0,0 +1,42 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bw6761
// #cgo CFLAGS: -I./include/
// #cgo CFLAGS: -I/usr/local/cuda/include
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbw6761
// #include "ve_mod_mult.h"
import "C"
import (
"fmt"
"unsafe"
)
func VecScalarMulMod(scalarVec1, scalarVec2 unsafe.Pointer, size int) int {
scalarVec1C := (*C.BW6761_scalar_t)(scalarVec1)
scalarVec2C := (*C.BW6761_scalar_t)(scalarVec2)
sizeC := C.size_t(size)
ret := C.vec_mod_mult_device_scalar_bw6_761(scalarVec1C, scalarVec2C, sizeC, 0)
if ret != 0 {
fmt.Print("error multiplying scalar vectors")
return -1
}
return 0
}

View File

@@ -10,6 +10,7 @@ fi
TARGET_BN254="libbn254.so"
TARGET_BLS12_381="libbls12_381.so"
TARGET_BLS12_377="libbls12_377.so"
TARGET_BW6_671="libbw6_671.so"
MAKE_FAIL=0
@@ -23,6 +24,7 @@ fi
TARGET_BN254_PATH=$(dirname "$(find `pwd` -name $TARGET_BN254 -print -quit)")/
TARGET_BLS12_381_PATH=$(dirname "$(find `pwd` -name $TARGET_BLS12_381 -print -quit)")/
TARGET_BLS12_377_PATH=$(dirname "$(find `pwd` -name $TARGET_BLS12_377 -print -quit)")/
TARGET_BW6_671_PATH=$(dirname "$(find `pwd` -name $TARGET_BW6_671 -print -quit)")/
if [[ "$TARGET_BLS12_377_PATH" != "" ]]; then
@@ -36,6 +38,11 @@ if [[ "$TARGET_BN254_PATH" != "" ]]; then
fi
if [[ "$TARGET_BLS12_381_PATH" != "" ]]; then
echo "BLS12_381_PATH found @ $TARGET_BLS12_381_PATH"
echo "BLS12_381 found @ $TARGET_BLS12_381_PATH"
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$TARGET_BLS12_381_PATH
fi
if [[ "$TARGET_BW6_671_PATH" != "" ]]; then
echo "BW6_671 found @ $TARGET_BW6_671_PATH"
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$TARGET_BW6_671_PATH
fi

View File

@@ -11,6 +11,16 @@ type Curve struct {
G2ElementSize int
}
var BW6_761 = Curve{
PackageName: "bw6761",
CurveNameUpperCase: "BW6761",
CurveNameLowerCase: "bw6_761",
SharedLib: "-lbw6761",
ScalarSize: 12,
BaseSize: 24,
G2ElementSize: 6,
}
var BN_254 = Curve{
PackageName: "bn254",
CurveNameUpperCase: "BN254",

View File

@@ -33,14 +33,6 @@ func (p *G2Point) Random() *G2Point {
return p
}
func (p *G2Point) FromAffine(affine *G2PointAffine) *G2Point {
out := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(p))
in := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(unsafe.Pointer(affine))
C.g2_projective_from_affine_{{.CurveNameLowerCase}}(out, in)
return p
}
func (p *G2Point) Eq(pCompare *G2Point) bool {
// Cast *Point{{.CurveNameUpperCase}} to *C.{{.CurveNameUpperCase}}_projective_t
@@ -66,15 +58,13 @@ func (f *G2Element) ToBytesLe() []byte {
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
func (p *G2Point) FromAffine(affine *G2PointAffine) *G2Point {
out := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(p))
in := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(unsafe.Pointer(affine))
C.g2_projective_from_affine_{{.CurveNameLowerCase}}(out, in)
return p
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {

View File

@@ -53,7 +53,8 @@ func TestG2ShouldConvertToProjective(t *testing.T) {
var pointAffine G2PointAffine
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
var proj G2Point
proj.FromAffine(&pointAffine)
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))

View File

@@ -31,9 +31,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "g1.go"), Templates: []string{"g1.go.tmpl"}},
}
bw6761_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "g1.go"), Templates: []string{"g1.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_entries...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./curves/", bw6761_entries...))
bn254_g2_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "g2.go"), Templates: []string{"g2.go.tmpl"}},
@@ -47,9 +52,15 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "g2.go"), Templates: []string{"g2.go.tmpl"}},
}
bw6761_g2_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "g2.go"), Templates: []string{"g2.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_g2_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_g2_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_g2_entries...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./curves/", bw6761_g2_entries...))
bn254_msm_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "msm.go"), Templates: []string{"msm.go.tmpl"}},
}
@@ -62,9 +73,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "msm.go"), Templates: []string{"msm.go.tmpl"}},
}
bw6761_msm_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "msm.go"), Templates: []string{"msm.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./msm/", bls12377_msm_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./msm/", bn254_msm_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./msm/", bls12381_msm_entries...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./msm/", bw6761_msm_entries...))
bn254_ntt_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "ntt.go"), Templates: []string{"ntt.go.tmpl"}},
@@ -78,9 +94,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "ntt.go"), Templates: []string{"ntt.go.tmpl"}},
}
bw6761_ntt_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "ntt.go"), Templates: []string{"ntt.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./ntt/", bls12377_ntt_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./ntt/", bn254_ntt_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./ntt/", bls12381_ntt_entries...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./ntt/", bw6761_ntt_entries...))
bn254_vec_mod_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "vec_mod.go"), Templates: []string{"vec_mod.go.tmpl"}},
@@ -94,9 +115,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "vec_mod.go"), Templates: []string{"vec_mod.go.tmpl"}},
}
bw6761_vec_mod_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "vec_mod.go"), Templates: []string{"vec_mod.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./ops/", bls12377_vec_mod_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./ops/", bn254_vec_mod_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./ops/", bls12381_vec_mod_entries...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./ops/", bw6761_vec_mod_entries...))
h_msm_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "msm.h"), Templates: []string{"msm.h.tmpl"}},
@@ -110,9 +136,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "include", "msm.h"), Templates: []string{"msm.h.tmpl"}},
}
h_msm_bw6761 := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "include", "msm.h"), Templates: []string{"msm.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", h_msm_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", h_msm_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", h_msm_bls12381...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./hfiles/", h_msm_bw6761...))
h_ntt_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "ntt.h"), Templates: []string{"ntt.h.tmpl"}},
@@ -126,9 +157,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "include", "ntt.h"), Templates: []string{"ntt.h.tmpl"}},
}
h_ntt_bw6761 := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "include", "ntt.h"), Templates: []string{"ntt.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", h_ntt_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", h_ntt_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", h_ntt_bls12381...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./hfiles/", h_ntt_bw6761...))
ve_mod_mult_h_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "ve_mod_mult.h"), Templates: []string{"ve_mod_mult.h.tmpl"}},
@@ -142,9 +178,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "include", "ve_mod_mult.h"), Templates: []string{"ve_mod_mult.h.tmpl"}},
}
ve_mod_mult_ht_bw6761 := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "include", "ve_mod_mult.h"), Templates: []string{"ve_mod_mult.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", ve_mod_mult_h_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", ve_mod_mult_h_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", ve_mod_mult_ht_bls12381...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./hfiles/", ve_mod_mult_ht_bw6761...))
projective_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "projective.h"), Templates: []string{"projective.h.tmpl"}},
@@ -158,9 +199,14 @@ func genMainFiles() {
{File: filepath.Join(baseDir, "bls12381", "include", "projective.h"), Templates: []string{"projective.h.tmpl"}},
}
projective_bw6761 := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "include", "projective.h"), Templates: []string{"projective.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", projective_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", projective_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", projective_bls12381...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./hfiles/", projective_bw6761...))
}
func genTestFiles() {
@@ -177,9 +223,14 @@ func genTestFiles() {
{File: filepath.Join(baseDir, "bls12381", "g1_test.go"), Templates: []string{"g1_test.go.tmpl"}},
}
bw6761_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "g1_test.go"), Templates: []string{"g1_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_entries...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./curves/", bw6761_entries...))
// G2 TESTS
bn254_entries_g2_test := []bavard.Entry{
@@ -194,9 +245,14 @@ func genTestFiles() {
{File: filepath.Join(baseDir, "bls12381", "g2_test.go"), Templates: []string{"g2_test.go.tmpl"}},
}
bw6761_entries_g2_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "g2_test.go"), Templates: []string{"g2_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_entries_g2_test...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_entries_g2_test...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_entries_g2_test...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./curves/", bw6761_entries_g2_test...))
// MSM TEST
bn254_entries_msm_test := []bavard.Entry{
@@ -211,9 +267,14 @@ func genTestFiles() {
{File: filepath.Join(baseDir, "bls12381", "msm_test.go"), Templates: []string{"msm_test.go.tmpl"}},
}
bw6761_entries_msm_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "msm_test.go"), Templates: []string{"msm_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./msm/", bls12377_entries_msm_test...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./msm/", bn254_entries_msm_test...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./msm/", bls12381_entries_msm_test...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./msm/", bw6761_entries_msm_test...))
// FFT TEST
bn254_entries_fft_test := []bavard.Entry{
@@ -228,9 +289,14 @@ func genTestFiles() {
{File: filepath.Join(baseDir, "bls12381", "ntt_test.go"), Templates: []string{"ntt_test.go.tmpl"}},
}
bw6761_entries_msm_test_entries_fft_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bw6761", "ntt_test.go"), Templates: []string{"ntt_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./ntt/", bls12377_entries_fft_test...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./ntt/", bn254_entries_fft_test...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./ntt/", bls12381_entries_fft_test...))
assertNoError(bgen.Generate(config.BW6_761, config.BW6_761.PackageName, "./ntt/", bw6761_entries_msm_test_entries_fft_test...))
}
func main() {

View File

@@ -997,7 +997,7 @@ void batched_large_msm(
S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream)
{
unsigned c = get_optimal_c(msm_size);
unsigned bitsize = 255;
unsigned bitsize = S::NBITS;
batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device, stream);
}
#endif

View File

@@ -5,7 +5,7 @@
#include "../../utils/sharedmem.cuh"
#include "../vector_manipulation/ve_mod_mult.cuh"
const uint32_t MAX_NUM_THREADS = 1024;
const uint32_t MAX_NUM_THREADS = 512;
const uint32_t MAX_THREADS_BATCH = 512; // TODO: allows 100% occupancy for scalar NTT for sm_86..sm_89
const uint32_t MAX_SHARED_MEM_ELEMENT_SIZE = 32; // TODO: occupancy calculator, hardcoded for sm_86..sm_89
const uint32_t MAX_SHARED_MEM = MAX_SHARED_MEM_ELEMENT_SIZE * 1024;

View File

@@ -9,17 +9,22 @@
#include "params.cuh"
namespace BLS12_377 {
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_377::fp_config> scalar_t;
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
static constexpr point_field_t gen_x = point_field_t{PARAMS_BLS12_377::g1_gen_x};
static constexpr point_field_t gen_y = point_field_t{PARAMS_BLS12_377::g1_gen_y};
static constexpr point_field_t b = point_field_t{PARAMS_BLS12_377::weierstrass_b};
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Projective<point_field_t, scalar_t, b, gen_x, gen_y> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
static constexpr g2_point_field_t g2_gen_x =
g2_point_field_t{point_field_t{PARAMS_BLS12_377::g2_gen_x_re}, point_field_t{PARAMS_BLS12_377::g2_gen_x_im}};
static constexpr g2_point_field_t g2_gen_y =
g2_point_field_t{point_field_t{PARAMS_BLS12_377::g2_gen_y_re}, point_field_t{PARAMS_BLS12_377::g2_gen_y_im}};
static constexpr g2_point_field_t g2_b = g2_point_field_t{
point_field_t{PARAMS_BLS12_377::weierstrass_b_g2_re}, point_field_t{PARAMS_BLS12_377::weierstrass_b_g2_im}};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Projective<g2_point_field_t, scalar_t, g2_b, g2_gen_x, g2_gen_y> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
} // namespace BLS12_377
} // namespace BLS12_377

View File

@@ -4,7 +4,7 @@
namespace PARAMS_BLS12_377 {
struct fp_config {
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 32;
static constexpr unsigned omegas_count = 47;
static constexpr unsigned modulus_bit_count = 253;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
@@ -38,72 +38,102 @@ namespace PARAMS_BLS12_377 {
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
static constexpr storage_array<omegas_count, limbs_count> omega = {
{{0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3, 0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6},
{0x01ab3a4d, 0x006f60fa, 0x814ba450, 0xe6600e15, 0xdf9eb147, 0xbde4df36, 0x33760d7b, 0x055d58fa},
{0xfdacff58, 0x8215b91d, 0x98331645, 0xd8d9177d, 0x439e803c, 0xe85223ad, 0xcca42c1f, 0x04aa8ef0},
{0x293f8481, 0xd52cc17a, 0x6f133205, 0x041178fb, 0xb2961832, 0xbbc70d18, 0x481760cd, 0x073d34d1},
{0x5e9020dd, 0xade9d4b4, 0x87db8813, 0x489259d2, 0x25051238, 0x5ddce740, 0xb5bc4d11, 0x0c775db1},
{0xd5fba57b, 0x90684fea, 0xe0defe98, 0xed237883, 0x030ae924, 0xc502b692, 0xe7a1ec2c, 0x08aa58e8},
{0x44ddbbdc, 0xbafb92a6, 0x26b01974, 0x63c7a02d, 0x5f28a274, 0x0ff86e13, 0x867f2e29, 0x0a7b462a},
{0x355dd694, 0x4258374d, 0x44c76a20, 0x5c31e8ac, 0xaa5fd062, 0x9b473969, 0x1a37b6b4, 0x0a693d77},
{0x22df9f13, 0x56313de8, 0x599e7536, 0xe2e75200, 0x6d163e50, 0xa1b4fce7, 0xc8111763, 0x0aec2172},
{0xf32d6bac, 0xa0b973d4, 0xf0d81b72, 0xae951889, 0x2e2daa0a, 0x51dbe098, 0x40d9af8f, 0x04679474},
{0x1b29736e, 0x8f267f19, 0x1d5a0c3a, 0xa2e04d58, 0x1ae99514, 0x76803064, 0x57f7c806, 0x12129439},
{0xbd83a3da, 0xd3b69b29, 0xe02ce197, 0x9543950f, 0xc2f87783, 0x80799665, 0xc15be215, 0x11ce8199},
{0xf284f768, 0xdeee484b, 0xe26a0475, 0x2a02e015, 0x88d968c2, 0xf0eb4925, 0x82a391c9, 0x0620ce9e},
{0xa90a2740, 0xfe3ca4f0, 0x512a7c7a, 0xd259ff36, 0xb41fe696, 0xbca3176a, 0xf33132ce, 0x05bd5ea3},
{0xb14361d4, 0x7f1db43f, 0x25ab6d51, 0x7927e578, 0x383bf21e, 0xb43e52a5, 0xd27fa99f, 0x077595e9},
{0xa9966ac4, 0x1ae0ea67, 0xda83fb3b, 0x4e2dbb1c, 0x0b51380e, 0xf77cf749, 0xb28a7670, 0x048b4b0e},
{0xa0234d2d, 0xe943054c, 0xe5f5be5e, 0x673b0ee0, 0x5048a19a, 0xcdd48e41, 0xabc3cb99, 0x0997d277},
{0x1912f7fa, 0x77d7da1d, 0x299fd7d6, 0xbcb7a5b2, 0x142a4480, 0x705e45dd, 0xb492dbd8, 0x0dc835fd},
{0x20b7298a, 0xd7652451, 0x65013b06, 0xc7c9a0b7, 0xad0d8457, 0x479b82a9, 0x0c99f5ce, 0x0bef1e5a},
{0xe5f8848a, 0x270a2326, 0xa727567d, 0x97d14afa, 0x48746fc7, 0x1a3a5a4e, 0xa42f077a, 0x0044e4b1},
{0x4dd87a5e, 0xf423a283, 0xd9a4c364, 0x1fe46601, 0xbfdc7e9b, 0xda4addbf, 0x3bf94b2b, 0x0a7f2bd8},
{0xf02ba42c, 0x553085d9, 0x1119b10d, 0x59662159, 0x6b8ea03f, 0xaa670958, 0x7ce92983, 0x066f6f5f},
{0xedc626c3, 0xf30e312d, 0xcf1f3a94, 0x8367a7ca, 0x917a1b28, 0x621e15e1, 0xf2e93b82, 0x07cd59f8},
{0xafeb494b, 0x97319dcd, 0x1d78404c, 0xab30c83e, 0xf26ffe90, 0x452d8a48, 0xa36452c7, 0x0bfc2e92},
{0xcc943028, 0xed2576ad, 0xfa4c6090, 0x846e49bc, 0x0049d8e6, 0xc74c1865, 0x665d7be5, 0x0e9c5a12},
{0xf45b9621, 0x102fbfb0, 0xf04faac0, 0xe80f4241, 0x7ca61177, 0x0b830bfd, 0x7033169d, 0x10521892},
{0x3358eb25, 0xdbc547bc, 0x722037db, 0x8909d398, 0x5e705b6d, 0x8b7075b5, 0x9bdaf407, 0x02694bb2},
{0x66a16869, 0x50c487c1, 0xd1fd4525, 0x380a66ab, 0x265e8539, 0xd455a01a, 0x064b5334, 0x0cd62875},
{0x4637701d, 0x0848f958, 0x4c8353af, 0x8a750076, 0x0ef6174a, 0x485f4e4f, 0xf38db632, 0x078d97a1},
{0x3d766f80, 0x1b4b71cf, 0x1069012d, 0x47d21195, 0x9151ebec, 0x5635235f, 0x2b13c808, 0x093f7d91},
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
{0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000},
{0xfbfa0a01, 0x0f830f7e, 0xd75769a0, 0x20f8b46c, 0xf05d5033, 0x7108bd18, 0x0788de01, 0x07405e08},
{0x60b9bdae, 0xc78085a6, 0x789094f5, 0x3116ec22, 0xce87d660, 0x0a02a81d, 0xc2a94856, 0x0ead8236},
{0x3e83a7cc, 0x6ffc39d9, 0x958a0a74, 0x117d996e, 0x0b92e8c9, 0xc242289d, 0x29d977d6, 0x0484efb4},
{0x0111ec3f, 0x15455b00, 0xc5f6be6f, 0x6b62d7af, 0x337f2d07, 0xfcba0365, 0x43fccd26, 0x0f151842},
{0xc31ec69b, 0x57951b2e, 0x2a37ce1f, 0x3e0a4be7, 0xcf3b198a, 0x960aeb4a, 0x341fd5cd, 0x04fb0673},
{0xa921851f, 0x71c1b78e, 0x7808f239, 0x3c26340c, 0x976fb990, 0xbcc8f69b, 0xe880dc71, 0x06a5edb2},
{0xc0f5679e, 0x7619eab5, 0x0dc0b9cd, 0x1f4cd10e, 0xbf6a480a, 0x7e1b70aa, 0x7f5461bb, 0x0ffc66da},
{0xec5cbab2, 0x8159806d, 0x498264a3, 0x14ea1333, 0xe3abfaa6, 0x56bbe1d8, 0x02aa031f, 0x09d2b5c4},
{0xc010c48a, 0xd2aa9562, 0x3b004b60, 0x447e5c11, 0x11e243bb, 0xd5a21c13, 0x0ab418b1, 0x01eab23e},
{0xacff6986, 0x08715ee8, 0xa93924d0, 0xab01878a, 0x6e9ae5c4, 0xbfbc5e71, 0x26b08d6e, 0x0f8000bf},
{0x3ddbc679, 0x06bc13b0, 0x615256ce, 0x7269a1f1, 0x1f5221a2, 0xf7716fbf, 0x8c66c14f, 0x0fa1f02c},
{0x906f531f, 0xdd40f131, 0x30728eff, 0xb06b29c7, 0x88839294, 0xc891fd19, 0x646978e8, 0x04e88447},
{0x6e259cdc, 0xb1e4b769, 0x00514e5e, 0xbcb0b709, 0x05113e7f, 0x74edb7c0, 0xe92e22af, 0x10c88511},
{0x240ede5b, 0xebb2e898, 0x42cd84c6, 0xc2639185, 0x9408f956, 0xf79e8391, 0x94e87a7d, 0x06872fa1},
{0x260678ff, 0xf8522249, 0xa8de9973, 0x6148cb16, 0x5a4e8d56, 0x5750f3f4, 0xbaeaf0c3, 0x0e805156},
{0x240ede5b, 0xebb2e898, 0x42cd84c6, 0xc2639185, 0x9408f956, 0xf79e8391, 0x94e87a7d, 0x06872fa1}}};
{0x3d766f80, 0x1b4b71cf, 0x1069012d, 0x47d21195, 0x9151ebec, 0x5635235f, 0x2b13c808, 0x093f7d91},
{0x4637701d, 0x0848f958, 0x4c8353af, 0x8a750076, 0x0ef6174a, 0x485f4e4f, 0xf38db632, 0x078d97a1},
{0x66a16869, 0x50c487c1, 0xd1fd4525, 0x380a66ab, 0x265e8539, 0xd455a01a, 0x064b5334, 0x0cd62875},
{0x3358eb25, 0xdbc547bc, 0x722037db, 0x8909d398, 0x5e705b6d, 0x8b7075b5, 0x9bdaf407, 0x02694bb2},
{0xf45b9621, 0x102fbfb0, 0xf04faac0, 0xe80f4241, 0x7ca61177, 0x0b830bfd, 0x7033169d, 0x10521892},
{0xcc943028, 0xed2576ad, 0xfa4c6090, 0x846e49bc, 0x0049d8e6, 0xc74c1865, 0x665d7be5, 0x0e9c5a12},
{0xafeb494b, 0x97319dcd, 0x1d78404c, 0xab30c83e, 0xf26ffe90, 0x452d8a48, 0xa36452c7, 0x0bfc2e92},
{0xedc626c3, 0xf30e312d, 0xcf1f3a94, 0x8367a7ca, 0x917a1b28, 0x621e15e1, 0xf2e93b82, 0x07cd59f8},
{0xf02ba42c, 0x553085d9, 0x1119b10d, 0x59662159, 0x6b8ea03f, 0xaa670958, 0x7ce92983, 0x066f6f5f},
{0x4dd87a5e, 0xf423a283, 0xd9a4c364, 0x1fe46601, 0xbfdc7e9b, 0xda4addbf, 0x3bf94b2b, 0x0a7f2bd8},
{0xe5f8848a, 0x270a2326, 0xa727567d, 0x97d14afa, 0x48746fc7, 0x1a3a5a4e, 0xa42f077a, 0x0044e4b1},
{0x20b7298a, 0xd7652451, 0x65013b06, 0xc7c9a0b7, 0xad0d8457, 0x479b82a9, 0x0c99f5ce, 0x0bef1e5a},
{0x1912f7fa, 0x77d7da1d, 0x299fd7d6, 0xbcb7a5b2, 0x142a4480, 0x705e45dd, 0xb492dbd8, 0x0dc835fd},
{0xa0234d2d, 0xe943054c, 0xe5f5be5e, 0x673b0ee0, 0x5048a19a, 0xcdd48e41, 0xabc3cb99, 0x0997d277},
{0xa9966ac4, 0x1ae0ea67, 0xda83fb3b, 0x4e2dbb1c, 0x0b51380e, 0xf77cf749, 0xb28a7670, 0x048b4b0e},
{0xb14361d4, 0x7f1db43f, 0x25ab6d51, 0x7927e578, 0x383bf21e, 0xb43e52a5, 0xd27fa99f, 0x077595e9},
{0xa90a2740, 0xfe3ca4f0, 0x512a7c7a, 0xd259ff36, 0xb41fe696, 0xbca3176a, 0xf33132ce, 0x05bd5ea3},
{0xf284f768, 0xdeee484b, 0xe26a0475, 0x2a02e015, 0x88d968c2, 0xf0eb4925, 0x82a391c9, 0x0620ce9e},
{0xbd83a3da, 0xd3b69b29, 0xe02ce197, 0x9543950f, 0xc2f87783, 0x80799665, 0xc15be215, 0x11ce8199},
{0x1b29736e, 0x8f267f19, 0x1d5a0c3a, 0xa2e04d58, 0x1ae99514, 0x76803064, 0x57f7c806, 0x12129439},
{0xf32d6bac, 0xa0b973d4, 0xf0d81b72, 0xae951889, 0x2e2daa0a, 0x51dbe098, 0x40d9af8f, 0x04679474},
{0x22df9f13, 0x56313de8, 0x599e7536, 0xe2e75200, 0x6d163e50, 0xa1b4fce7, 0xc8111763, 0x0aec2172},
{0x355dd694, 0x4258374d, 0x44c76a20, 0x5c31e8ac, 0xaa5fd062, 0x9b473969, 0x1a37b6b4, 0x0a693d77},
{0x44ddbbdc, 0xbafb92a6, 0x26b01974, 0x63c7a02d, 0x5f28a274, 0x0ff86e13, 0x867f2e29, 0x0a7b462a},
{0xd5fba57b, 0x90684fea, 0xe0defe98, 0xed237883, 0x030ae924, 0xc502b692, 0xe7a1ec2c, 0x08aa58e8},
{0x5e9020dd, 0xade9d4b4, 0x87db8813, 0x489259d2, 0x25051238, 0x5ddce740, 0xb5bc4d11, 0x0c775db1},
{0x293f8481, 0xd52cc17a, 0x6f133205, 0x041178fb, 0xb2961832, 0xbbc70d18, 0x481760cd, 0x073d34d1},
{0xfdacff58, 0x8215b91d, 0x98331645, 0xd8d9177d, 0x439e803c, 0xe85223ad, 0xcca42c1f, 0x04aa8ef0},
{0x01ab3a4d, 0x006f60fa, 0x814ba450, 0xe6600e15, 0xdf9eb147, 0xbde4df36, 0x33760d7b, 0x055d58fa},
{0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3, 0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6}}};
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
{{0xb9112c51, 0x2542c2b2, 0x6e23b3ce, 0x36ead8da, 0x76476754, 0x9a268d13, 0xa1ad7cf1, 0x121f44ad},
{0x3b3625b6, 0x1e62401f, 0x28471e5a, 0xd0692164, 0x5cad6b77, 0xb85aa9ec, 0xaa95acf2, 0x063e4b66},
{0x4ffa086a, 0xecc89610, 0xca06afc6, 0x4db82291, 0x8f3a6426, 0x9ae7c68c, 0x2a874432, 0x0b3dae8c},
{0xe3b4dc56, 0xa0594a67, 0x91b698e1, 0xc8e6b582, 0x8df78057, 0x711cadbf, 0x396466f8, 0x0049abdf},
{0x6464580f, 0x33e6c8c0, 0x3c4aa09f, 0x9d560eb3, 0xcc98f404, 0xb3f1a899, 0x8ca24b48, 0x012c1ea5},
{0xaf858193, 0x2b955be2, 0x5fb5e378, 0xa513d8be, 0xa326aeb9, 0x88c4ebeb, 0xf3d45990, 0x00c378e2},
{0x33bf2a1c, 0x842b0c9c, 0xa29b9236, 0x1fd43c95, 0xc06795d3, 0x6b37a603, 0x0c1b712a, 0x00017b17},
{0x526bf9fc, 0x023031cc, 0x79c209ba, 0x0e4136c0, 0x3ec42e5c, 0xe5234df1, 0x1d455234, 0x00cb9592},
{0xef01ed78, 0xf2828212, 0xf103c9ca, 0xa66094ac, 0x7a2d5573, 0xdceb481d, 0x8af46aab, 0x0190fcde},
{0x89b0ca6f, 0xb4d938e2, 0x2c897570, 0x0214eb59, 0x2d4cf27a, 0x56c45327, 0x3ed546a4, 0x10a2f358},
{0x78500f1a, 0x98310dd7, 0x735ccb27, 0x1c6050bf, 0xb2081df4, 0x07b6fa7f, 0xfa0f1e20, 0x003edf24},
{0xa39b02a3, 0x8a3de898, 0xdc94422c, 0x068b2992, 0xf493db31, 0x1c5f019a, 0x11b0f668, 0x066b1790},
{0xdddb58ec, 0x41f8042f, 0x10886d85, 0x7dd54384, 0x622ff4b4, 0x19544f90, 0x050cc539, 0x02f0b49a},
{0x7998b62c, 0xbb53132b, 0x22c9b4aa, 0x064a9186, 0x71d61334, 0xd56de253, 0x04e416f6, 0x10fcf25f},
{0xdf80223d, 0x55f432c9, 0x11a2fed9, 0x23daf2f6, 0x41ae8c34, 0x9e43e003, 0x95f22373, 0x0d51533b},
{0x78fd3239, 0xaf29730b, 0x40c3e723, 0xbd907ac9, 0x77f214f7, 0x5dcc0aad, 0xb05fb3a1, 0x02d958da},
{0x498fb549, 0xd5993cd5, 0x09da9272, 0x718adcee, 0x72bd5bc0, 0x9e03cbb4, 0xc592813f, 0x07206942},
{0xe978594b, 0x4ddd3320, 0x3abe3f79, 0xe5f36fbe, 0xe4dcff8e, 0x5dba9ef2, 0x7105148f, 0x0bfc27e2},
{0x3e47b53f, 0x50380ce2, 0x3a9613fc, 0x6ea3c2d3, 0x4c87ab50, 0xfe743105, 0xd192221c, 0x07871979},
{0x49c6284a, 0x9ba6aa00, 0xeacbdc63, 0x0b8429fb, 0xedafdf37, 0x9b9c6c5b, 0xad0c78c6, 0x009907e8},
{0x5d4e643c, 0x3da791ea, 0x85bff013, 0xb6a956ef, 0xd73de6a3, 0x86c629a8, 0x6b8c48a9, 0x0a5a5f55},
{0x4b9ac952, 0x3d29f5ba, 0xc8ea8f94, 0x7c7f2662, 0xcefc3052, 0x736ccb63, 0x0981f3cb, 0x04bfce2f},
{0x930cee0b, 0x432d3626, 0xf26e8ba3, 0x55ed3efb, 0x14c5457f, 0x802eebcc, 0xe2310f22, 0x00d300e3},
{0x60cf1330, 0x840f913b, 0x1df5ed87, 0x5610cde6, 0x72b36ddf, 0x858381b0, 0x6f64e0b7, 0x109bf66c},
{0x03ad3139, 0x01d3f431, 0xa137ce16, 0xe56f6002, 0x1deb42e8, 0x97f53369, 0xaa37cddd, 0x033fa9ac},
{0xc161761f, 0x271d7caf, 0xc369a371, 0xf1001d6f, 0x00e60f51, 0x65286415, 0xb74d14b8, 0x00b918f9},
{0xa26c8c12, 0xa6f4e1d1, 0xf6610f7e, 0x13571553, 0x56701caf, 0xd95e5df6, 0x2263d69d, 0x050e7b89},
{0x1d75bec9, 0xe29ef6c0, 0xd4b0183b, 0xead287a2, 0xedfd3795, 0x75a017cf, 0x64427c8e, 0x107f8d0f},
{0x00db2b48, 0xa43c0e02, 0x933d10ee, 0x76585489, 0xc0ba6a80, 0x12d64af1, 0x2fad8d8e, 0x01940f43},
{0x4b1b63a9, 0x12998cbc, 0xcf420c9f, 0x0f780c6c, 0x129289ad, 0xa5e48723, 0x240a141d, 0x0a3a1223},
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
{0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e},
{0xd60fb046, 0xc9fa190c, 0xc5b4674e, 0xdb5c179b, 0xbc7b8726, 0x2b2bce0b, 0xbf6e69bf, 0x0e4eb338},
{0x8ffc4ed5, 0x74732d1f, 0xb7f2eefc, 0x42d9f590, 0xa24dd4dd, 0xf70461e5, 0xef64676f, 0x03b6eba4},
{0x102bbab0, 0x5a21f98a, 0x8d8e2efb, 0xa6a147a9, 0x7612906f, 0x0eb4f005, 0x47d8d2e3, 0x0e1a5481},
{0xd01e5aa8, 0x6e509add, 0x6e3f123d, 0xe1582468, 0x8274db24, 0xbd6313ee, 0xd173a634, 0x05d5836e},
{0xe975c0cf, 0x6aab3344, 0x6f1dc38e, 0xca362e0e, 0x1dd1743a, 0x2fe72cda, 0xc1b4c4c2, 0x0c1c956e},
{0xec89a64f, 0x59fe97a0, 0xe8de5d4c, 0x579617d7, 0xc9c1ea7b, 0x256a305b, 0x53fa131b, 0x01ffae4e},
{0x29bcb088, 0x463a73ff, 0xe1438e80, 0xee9e9a5e, 0x3c9369e4, 0x2a00951f, 0x80a32052, 0x09711183},
{0x4bec8dd2, 0xa36899db, 0x96393687, 0x2946872e, 0x842df3c8, 0xd4b5734f, 0x5f5cd8fb, 0x0834098f},
{0xe3c711b9, 0x4bc485f6, 0x648d1d7e, 0xf43a2598, 0xee88abaa, 0x7f981a0e, 0xec6a3f27, 0x0c88c9c3},
{0x49046b52, 0x42bcc6c2, 0x56ab9ecc, 0xcc77294a, 0xe4df3ddd, 0x02ecb41a, 0x67f76726, 0x0e567d22},
{0x91c64fc2, 0x1cc56cc3, 0xd16a490b, 0x8cb71e65, 0x14fac366, 0x984be37e, 0xa25d7ba5, 0x0a08e032},
{0xd4f5941e, 0x966d9739, 0xe5772a73, 0x5805deb6, 0x5c1f970c, 0xe4eb0d33, 0xbdf35409, 0x039715db},
{0xcc6518ac, 0x8419686c, 0x9c7a2366, 0x96dec3a8, 0x71724384, 0xefbfcac6, 0xaf34c239, 0x0c44b99a},
{0xc18ff4fd, 0xcb66fe1b, 0x86c8d586, 0x588e18b3, 0x1dfab57c, 0xc6e6d2a3, 0x7d7d4efd, 0x10918ad2},
{0x97a18f58, 0x56d6cf22, 0xd0d7abd9, 0x11710758, 0x5eb7a9c5, 0xd1a6608b, 0xc4937e38, 0x04059bdb},
{0xc18ff4fd, 0xcb66fe1b, 0x86c8d586, 0x588e18b3, 0x1dfab57c, 0xc6e6d2a3, 0x7d7d4efd, 0x10918ad2}}};
{0x4b1b63a9, 0x12998cbc, 0xcf420c9f, 0x0f780c6c, 0x129289ad, 0xa5e48723, 0x240a141d, 0x0a3a1223},
{0x00db2b48, 0xa43c0e02, 0x933d10ee, 0x76585489, 0xc0ba6a80, 0x12d64af1, 0x2fad8d8e, 0x01940f43},
{0x1d75bec9, 0xe29ef6c0, 0xd4b0183b, 0xead287a2, 0xedfd3795, 0x75a017cf, 0x64427c8e, 0x107f8d0f},
{0xa26c8c12, 0xa6f4e1d1, 0xf6610f7e, 0x13571553, 0x56701caf, 0xd95e5df6, 0x2263d69d, 0x050e7b89},
{0xc161761f, 0x271d7caf, 0xc369a371, 0xf1001d6f, 0x00e60f51, 0x65286415, 0xb74d14b8, 0x00b918f9},
{0x03ad3139, 0x01d3f431, 0xa137ce16, 0xe56f6002, 0x1deb42e8, 0x97f53369, 0xaa37cddd, 0x033fa9ac},
{0x60cf1330, 0x840f913b, 0x1df5ed87, 0x5610cde6, 0x72b36ddf, 0x858381b0, 0x6f64e0b7, 0x109bf66c},
{0x930cee0b, 0x432d3626, 0xf26e8ba3, 0x55ed3efb, 0x14c5457f, 0x802eebcc, 0xe2310f22, 0x00d300e3},
{0x4b9ac952, 0x3d29f5ba, 0xc8ea8f94, 0x7c7f2662, 0xcefc3052, 0x736ccb63, 0x0981f3cb, 0x04bfce2f},
{0x5d4e643c, 0x3da791ea, 0x85bff013, 0xb6a956ef, 0xd73de6a3, 0x86c629a8, 0x6b8c48a9, 0x0a5a5f55},
{0x49c6284a, 0x9ba6aa00, 0xeacbdc63, 0x0b8429fb, 0xedafdf37, 0x9b9c6c5b, 0xad0c78c6, 0x009907e8},
{0x3e47b53f, 0x50380ce2, 0x3a9613fc, 0x6ea3c2d3, 0x4c87ab50, 0xfe743105, 0xd192221c, 0x07871979},
{0xe978594b, 0x4ddd3320, 0x3abe3f79, 0xe5f36fbe, 0xe4dcff8e, 0x5dba9ef2, 0x7105148f, 0x0bfc27e2},
{0x498fb549, 0xd5993cd5, 0x09da9272, 0x718adcee, 0x72bd5bc0, 0x9e03cbb4, 0xc592813f, 0x07206942},
{0x78fd3239, 0xaf29730b, 0x40c3e723, 0xbd907ac9, 0x77f214f7, 0x5dcc0aad, 0xb05fb3a1, 0x02d958da},
{0xdf80223d, 0x55f432c9, 0x11a2fed9, 0x23daf2f6, 0x41ae8c34, 0x9e43e003, 0x95f22373, 0x0d51533b},
{0x7998b62c, 0xbb53132b, 0x22c9b4aa, 0x064a9186, 0x71d61334, 0xd56de253, 0x04e416f6, 0x10fcf25f},
{0xdddb58ec, 0x41f8042f, 0x10886d85, 0x7dd54384, 0x622ff4b4, 0x19544f90, 0x050cc539, 0x02f0b49a},
{0xa39b02a3, 0x8a3de898, 0xdc94422c, 0x068b2992, 0xf493db31, 0x1c5f019a, 0x11b0f668, 0x066b1790},
{0x78500f1a, 0x98310dd7, 0x735ccb27, 0x1c6050bf, 0xb2081df4, 0x07b6fa7f, 0xfa0f1e20, 0x003edf24},
{0x89b0ca6f, 0xb4d938e2, 0x2c897570, 0x0214eb59, 0x2d4cf27a, 0x56c45327, 0x3ed546a4, 0x10a2f358},
{0xef01ed78, 0xf2828212, 0xf103c9ca, 0xa66094ac, 0x7a2d5573, 0xdceb481d, 0x8af46aab, 0x0190fcde},
{0x526bf9fc, 0x023031cc, 0x79c209ba, 0x0e4136c0, 0x3ec42e5c, 0xe5234df1, 0x1d455234, 0x00cb9592},
{0x33bf2a1c, 0x842b0c9c, 0xa29b9236, 0x1fd43c95, 0xc06795d3, 0x6b37a603, 0x0c1b712a, 0x00017b17},
{0xaf858193, 0x2b955be2, 0x5fb5e378, 0xa513d8be, 0xa326aeb9, 0x88c4ebeb, 0xf3d45990, 0x00c378e2},
{0x6464580f, 0x33e6c8c0, 0x3c4aa09f, 0x9d560eb3, 0xcc98f404, 0xb3f1a899, 0x8ca24b48, 0x012c1ea5},
{0xe3b4dc56, 0xa0594a67, 0x91b698e1, 0xc8e6b582, 0x8df78057, 0x711cadbf, 0x396466f8, 0x0049abdf},
{0x4ffa086a, 0xecc89610, 0xca06afc6, 0x4db82291, 0x8f3a6426, 0x9ae7c68c, 0x2a874432, 0x0b3dae8c},
{0x3b3625b6, 0x1e62401f, 0x28471e5a, 0xd0692164, 0x5cad6b77, 0xb85aa9ec, 0xaa95acf2, 0x063e4b66},
{0xb9112c51, 0x2542c2b2, 0x6e23b3ce, 0x36ead8da, 0x76476754, 0x9a268d13, 0xa1ad7cf1, 0x121f44ad}}};
static constexpr storage_array<omegas_count, limbs_count> inv = {
{{0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af},
@@ -137,11 +167,27 @@ namespace PARAMS_BLS12_377 {
{0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e},
{0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e},
{0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e},
{0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e}}};
{0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e},
{0x7af74001, 0xa2117fff, 0x232ac481, 0x2b8e9efe, 0x2bdd8972, 0x139dfa73, 0x90d6f2a7, 0x12ab655e},
{0xbd7ba001, 0x56117fff, 0x79956241, 0xc29c8afe, 0xc40a9cb9, 0xba2923c8, 0x9581cbfe, 0x12ab655e},
{0xdebdd001, 0x30117fff, 0xa4cab121, 0x8e2380fe, 0x9021265d, 0x8d6eb873, 0x97d738aa, 0x12ab655e},
{0xef5ee801, 0x1d117fff, 0xba655891, 0x73e6fbfe, 0xf62c6b2f, 0x771182c8, 0x9901ef00, 0x12ab655e},
{0xf7af7401, 0x13917fff, 0xc532ac49, 0x66c8b97e, 0xa9320d98, 0x6be2e7f3, 0x99974a2b, 0x12ab655e},
{0xfbd7ba01, 0x0ed17fff, 0xca995625, 0xe039983e, 0x02b4decc, 0xe64b9a89, 0x99e1f7c0, 0x12ab655e},
{0xfdebdd01, 0x0c717fff, 0xcd4cab13, 0x1cf2079e, 0xaf764767, 0xa37ff3d3, 0x9a074e8b, 0x12ab655e},
{0xfef5ee81, 0x0b417fff, 0xcea6558a, 0x3b4e3f4e, 0x05d6fbb4, 0x021a2079, 0x9a19f9f1, 0x12ab655e},
{0xff7af741, 0x8aa97fff, 0xcf532ac5, 0xca7c5b26, 0xb10755da, 0xb16736cb, 0x9a234fa3, 0x12ab655e},
{0xffbd7ba1, 0x4a5d7fff, 0xcfa99563, 0x12136912, 0x069f82ee, 0x090dc1f5, 0x9a27fa7d, 0x12ab655e},
{0xffdebdd1, 0x2a377fff, 0xcfd4cab2, 0xb5def008, 0xb16b9977, 0xb4e10789, 0x9a2a4fe9, 0x12ab655e},
{0xffef5ee9, 0x9a247fff, 0xcfea6559, 0x87c4b383, 0x06d1a4bc, 0x0acaaa54, 0x9a2b7aa0, 0x12ab655e},
{0xfff7af75, 0x521affff, 0x4ff532ad, 0xf0b79541, 0x3184aa5e, 0x35bf7bb9, 0x9a2c0ffb, 0x12ab655e},
{0xfffbd7bb, 0x2e163fff, 0x0ffa9957, 0x25310620, 0xc6de2d30, 0xcb39e46b, 0x9a2c5aa8, 0x12ab655e},
{0xfffdebde, 0x1c13dfff, 0x6ffd4cac, 0xbf6dbe8f, 0x118aee98, 0x95f718c5, 0x9a2c7fff, 0x12ab655e}}};
};
struct fq_config {
static constexpr unsigned limbs_count = 12;
static constexpr unsigned omegas_count = 48;
static constexpr unsigned modulus_bit_count = 377;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44,
0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
@@ -176,37 +222,321 @@ namespace PARAMS_BLS12_377 {
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> montgomery_r = {0xffffff, 0xf73fffff, 0xffffff7a, 0xf4a2bbcf,
0xf6b7ffe8, 0x0c9dd045, 0x0aec70e1, 0xdd260cff,
0x5eb6c4e5, 0xc4fa3f93, 0x3aef1539, 0x51c5b9e8};
static constexpr storage<limbs_count> montgomery_r_inv = {0x934f3a1, 0xb0909a28, 0xc1cfac62, 0x3264aa55,
0x2a491ae8, 0xaccd49ca, 0xe80e9a61, 0x28b2dce9,
0x26f7c08a, 0x4d313ea1, 0x36254563, 0x161de1ee};
static constexpr storage<limbs_count> montgomery_r = {0xffffff68, 0x02cdffff, 0x7fffffb1, 0x51409f83,
0x8a7d3ff2, 0x9f7db3a9, 0x6e7c6305, 0x7b4e97b7,
0x803c84e8, 0x4cf495bf, 0xe2fdf49a, 0x008d6661};
static constexpr storage<limbs_count> montgomery_r_inv = {0x451269e8, 0xef129093, 0xe65839f5, 0x6e20bbcd,
0xa5582c93, 0x852e3c88, 0xf7f2e657, 0xeeaaf41d,
0xa4c49351, 0xeb89746c, 0x436b0736, 0x014212fc};
static constexpr storage_array<omegas_count, limbs_count> omega = {
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
{0xf1391c63, 0x6e76d5ec, 0xbff27d8e, 0x99588459, 0x436b0f62, 0xbce649cf, 0x0ad1dec1, 0x400398f5, 0x1a79beb1,
0xc0c534db, 0x796537ca, 0x01680a40},
{0x554c85ba, 0x6cbff0e3, 0x0be8ff9d, 0xc07c7a91, 0x9dde4fa2, 0xc3c79f67, 0xb5726bde, 0x44bc6d1a, 0x76d6d607,
0xad812919, 0x95e8fd0e, 0x001bc0c2},
{0x6d5db237, 0xb8c206b0, 0xcabde6ba, 0x08fed85d, 0xcd92eb6f, 0xf2f54ffc, 0xe39c1788, 0xee81121f, 0x88e82edb,
0x852def4d, 0xb95fdb80, 0x00bf1268},
{0x192bf14f, 0x3663c26a, 0xe6351854, 0x99c859be, 0x159361b8, 0xf9430828, 0xfbe33d7d, 0x478ed715, 0xdb79c984,
0x41e220cf, 0xd961f2be, 0x00cedb38},
{0xcc724685, 0xb99caa69, 0x1388a46d, 0xc24087ba, 0x08f03491, 0xeb13a05a, 0x98fb0ff7, 0x558ab21e, 0x86bbd802,
0x0166d08d, 0xf5b5728a, 0x00d1dec9},
{0x92db32a2, 0x2e3951fe, 0x6014b201, 0x8f5a16c9, 0xa91fbb38, 0xa9e942b9, 0x17b4dbd2, 0xf7bf5b43, 0x81325c7d,
0x57f3934a, 0x615ad019, 0x012be78e},
{0xdce33f04, 0xb42b84a2, 0x0db0b91c, 0x7a0c1423, 0x88d9f8c8, 0xaed11a0c, 0xd484c501, 0x712d6bc0, 0xfa3f7633,
0x50aca1e5, 0xb90f34d0, 0x01002f29},
{0xf012f6a0, 0xbc3db054, 0x0d332ea7, 0x00d66897, 0xfd416167, 0x8278ef44, 0x20268e84, 0x1a1a3c4d, 0x4b11d215,
0x7c976aa6, 0x63b6e925, 0x00949581},
{0x339637c6, 0x9d73cf29, 0xa5642677, 0x8257d1a2, 0xcafd597c, 0xcb48f07f, 0x081435a3, 0x7a505010, 0xacbb9c39,
0xaaa45ce1, 0x7431b9c8, 0x013f2b13},
{0xd4710c0b, 0x9ef8bddb, 0x85047671, 0xb4c73188, 0x134695ba, 0x87a51d65, 0x022416dd, 0x67f3bc43, 0xcb2a157b,
0x21d965b2, 0x5ce4195d, 0x013a57e4},
{0xd2461368, 0xf2db3a9f, 0x3802aef2, 0x0595c232, 0x5ea85bd6, 0xa53d621a, 0xa34ee943, 0xce930fbc, 0x6b372bee,
0x1d216665, 0xa4535740, 0x009f0159},
{0x656bf68d, 0x73cf953a, 0xeac5c1d7, 0x50a5a5b5, 0xaa5355a9, 0x2697b2e1, 0x08de37d2, 0x6be70306, 0x44c5afab,
0x907f6976, 0xd4ec46b1, 0x0155cfa2},
{0x090e3e20, 0x034160c4, 0xf77a6fbb, 0xbc73cc59, 0x188e54f6, 0x437cd23b, 0x17e42614, 0x5a788edd, 0xebdc8eae,
0xf1ad4f54, 0x2f129bcd, 0x005d1440},
{0x4e269ee5, 0x5626c031, 0x0d1501ec, 0x5f97673e, 0x86d31c18, 0x4fe089bd, 0x62d1259a, 0x3e9fffcb, 0x1ff89d01,
0xe1898f32, 0x59d01a38, 0x00fa1331},
{0x38d427b1, 0xda80661b, 0xa814f14b, 0x1913027d, 0xcda4061d, 0xd3f61e24, 0x5da8fcb2, 0x9509e69d, 0x1f05e6d3,
0x0e7493a5, 0xa5c6bd06, 0x00dcb8db},
{0x61cff9ed, 0x88499d0a, 0x53718444, 0x0b317da2, 0x4b7eec5f, 0xc1624bfd, 0x5af10e6f, 0x6ffc3241, 0xd6c66ff2,
0x27d0edf3, 0x73ab0f4a, 0x013019b5},
{0x06027b24, 0x42dc7673, 0x3341b9e7, 0x018f8bbd, 0xa435f7e2, 0xd3b389d9, 0xea031176, 0x279739a5, 0x74c35801,
0x3555ca51, 0x049dcf87, 0x00748c30},
{0x81fe14de, 0x731b16f0, 0x333cc61a, 0x528d6ada, 0x5736dc15, 0x7ae87278, 0xc8bfd40c, 0xa94b9fd2, 0x299b0487,
0x714dd8ed, 0xf1a53233, 0x00642b62},
{0x5bc45170, 0x31270ddf, 0x7f72c758, 0x7efb6b06, 0xcf4973a8, 0x2eb9f2aa, 0xe556d234, 0xdcb534c9, 0x0e043fef,
0xf0b1a210, 0x54dda04e, 0x00e79c44},
{0x2d5f1bc2, 0x213b3f52, 0xfd933428, 0x9e115ba7, 0x434c9e2a, 0x7f77d57e, 0xcdb944ef, 0x47a78418, 0x699aa559,
0x8cb01cbb, 0xb064c4d7, 0x0075bf81},
{0x3fbfc66c, 0x0b6c2e65, 0x6fcab2f8, 0x7bece031, 0xb79dcd4d, 0x2ba7e325, 0xa5c6881b, 0x8c18f66a, 0x7283805a,
0x4d893e5a, 0xfc296bfe, 0x0107d3c5},
{0x948c881a, 0x53fbdbb4, 0x16803d18, 0xf27a9c14, 0xeddfafef, 0x8490f6c5, 0x3e57fa15, 0xfe068e1d, 0xd26b296b,
0xbe923119, 0x9fa377a1, 0x00d56016},
{0x6f5b2ad1, 0xb3bbaeb3, 0x11886a1c, 0x0efd4ba9, 0xdedb7083, 0x5911498f, 0x5bd0a90f, 0x0921fe19, 0x83d379cb,
0x38e05d4e, 0xb7ba3c73, 0x006b39e2},
{0xa55550ba, 0x61b560e4, 0xe7288461, 0xd9ac545b, 0xc6e3e282, 0xde8d2826, 0x7e49dd2c, 0x9e87a310, 0xc43080b7,
0xf2edfc44, 0x95b7d300, 0x012b4875},
{0x27591e60, 0x4048ddc3, 0xc5d21791, 0xb77c9738, 0x49826bea, 0xf2f82033, 0x42f97e95, 0xf60bb703, 0x5966139d,
0xef8f6f16, 0xc0e95e39, 0x00327618},
{0x441e395f, 0xf9059c8f, 0xbd087238, 0x29eab35f, 0x7dee5ff1, 0x5d4abeff, 0x771e60e9, 0x7222499b, 0x7ac324a2,
0xb70c1ea3, 0x0da51ce8, 0x015b3af9},
{0xe9a70026, 0xf7aa576b, 0x01c4a126, 0xb28733ef, 0xa3307647, 0x06b8e768, 0xe12588ce, 0x115500e1, 0x6c9f9b1d,
0x7e8dd6b9, 0x6ec020b3, 0x014d091e},
{0x8e5bbc8d, 0xd318265d, 0x141bee9b, 0x70b460ba, 0x1aa9df5b, 0x145dd6a6, 0xe3478cb3, 0xd9da2548, 0x7b509387,
0x47250509, 0xe967973c, 0x00de53d3},
{0xd2aa57b8, 0x5ff4399c, 0xa6ae9b07, 0x90360194, 0x6cfcdb7a, 0x68979991, 0x64e56abb, 0xf517467c, 0xad7a6573,
0x44227491, 0xa35ebf55, 0x0001da0b},
{0x4d80f6da, 0xd8b22d5a, 0x10ee1a06, 0x6e7b2bfb, 0x17faeac0, 0xac8d97e5, 0x7a12c923, 0x8b75540b, 0x5b42ce02,
0xa2787368, 0xe98d9998, 0x008d30a5},
{0x9dc292bb, 0xee29c02a, 0xc5b7e1c9, 0x9e7ea016, 0x9a908e5f, 0x62daf95d, 0x3e98eae9, 0x80a71c61, 0xfdda3bba,
0x2d514723, 0x068ef829, 0x00f65844},
{0x185b1ad6, 0xf62fdfa4, 0xf90ccbe6, 0x2ae7f104, 0x972ce78e, 0xfa435fb6, 0x45e59f91, 0x53a75d3c, 0x2f320b7a,
0x7290cac2, 0xe7cb5108, 0x01a2022a},
{0xd59dda24, 0xcf0a15be, 0xf2ec72b4, 0xbc77f6d4, 0x96c31202, 0xa8df0caf, 0xbb4f8842, 0xb95429c0, 0xd0087306,
0xb989b210, 0x5571e9f0, 0x002b1694},
{0x67ae536e, 0x7e84d4b5, 0xc8fb9b80, 0x3a920871, 0x1948ee86, 0x1a82df2b, 0xb3c66ed3, 0xdef79467, 0xef64d05a,
0x58fd84f2, 0xd999f400, 0x00c6d5b7},
{0x81ee0d53, 0x7639f9a2, 0xb5747565, 0x8ade807d, 0xe6235609, 0xfd9d6266, 0x53730f18, 0xea1948a3, 0xd890142e,
0xa356108a, 0xe3e8a723, 0x00a48ac6},
{0xd0ca5e04, 0x531c4b83, 0x2ba0a328, 0xff35ced6, 0xa4e563aa, 0x01613079, 0x1442dcd1, 0x6f52b3a3, 0x9e19b0a6,
0x813b4616, 0x9536db26, 0x004828c5},
{0x0bce1b4e, 0x8a9321a9, 0xae85d6ff, 0xb9759dbe, 0x5cb206e0, 0x1ce1d522, 0x35a1607a, 0x87df044f, 0x94e1329a,
0x2ebabee7, 0x73586cc9, 0x01a73170},
{0x3dd667f3, 0x69824754, 0x28fd63a2, 0x61a081a7, 0x99499385, 0x0b9f6d2e, 0x5c253e16, 0x6d45622b, 0x765a7f5f,
0xcd672e4d, 0x7150d847, 0x01182798},
{0x2742d2f6, 0x0af0bfd2, 0x3a02631d, 0x93616956, 0xac8a2203, 0x32dae751, 0x85cf4e2d, 0xea4ffbe7, 0x7dba6eb9,
0x673424f4, 0x61f4060d, 0x002ec230},
{0x5a5b5c2b, 0x226293ca, 0x0684dbc9, 0xbc0ca23e, 0x7d637c4f, 0x4510cf3a, 0x9b2f4a52, 0x7869c488, 0x2fd73a53,
0xec009b90, 0xa8c99cca, 0x003499d6},
{0xfd745afc, 0x9da60b0a, 0x41c5362e, 0xff0769ec, 0xfa9fd8ee, 0x487621e9, 0xab04558f, 0x138910d1, 0xc1ed03ce,
0x870903cf, 0xed3ffb51, 0x002c1cfa},
{0x42870c46, 0x271b1ff3, 0x13b4b491, 0x1e0a9cd1, 0x3c55c65e, 0x2d58cb1a, 0x74756f6e, 0xa6e12c32, 0x2e313bc4,
0xf774a43d, 0xcc386ffc, 0x00ca156d},
{0x4a67741c, 0x588f79b6, 0xc3590b63, 0xc0ae78b5, 0xc3576385, 0xad0bb97d, 0xb8473137, 0x0583dd49, 0x515d8604,
0xb31d9631, 0xd3ba3b12, 0x015337bc},
{0x8a458e8c, 0x976a14f5, 0xc3a26ae8, 0xc90809b4, 0x089acf15, 0x270a1575, 0x5013d4b1, 0x614a0d25, 0x6d09901e,
0x1314e076, 0xf208945e, 0x0022f414},
{0xc563b9a1, 0x7eca603c, 0x06fe0bc3, 0x06df0a43, 0x0ddff8c6, 0xb44d994a, 0x4512a3d4, 0x40fbe05b, 0x8aeffc9b,
0x30f15248, 0x05198a80, 0x0036a92e}}};
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
{{0x00000000, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b,
0xc63b05c0, 0x17c510ea, 0x01ae3a46},
{0x0ec6e39e, 0x1691ea13, 0x700d8272, 0x7db2d8ea, 0x769e389d, 0x620d1860, 0xf62334cd, 0xda1f40fd, 0x52278a89,
0x0575d0e5, 0x9e5fd920, 0x00463005},
{0x93997f11, 0x9403412c, 0xdfb2323f, 0x845557b3, 0x2d50c7fc, 0x66f2eaaa, 0xc103f92f, 0x992358fb, 0x5d7a3179,
0x01d60217, 0xd2af5da0, 0x0077b354},
{0xc1000ea4, 0x7ac2ca7a, 0x7f8d9495, 0x937db751, 0x0de62931, 0x401b3873, 0x980129ba, 0x59be7529, 0xa545a303,
0x2ba8f85d, 0xb6705512, 0x00573e3a},
{0x2c1b22e6, 0xb55712f9, 0x0f91cddd, 0x66cfc0f3, 0x8bb345d8, 0x8d5fcd42, 0x86c0abc3, 0x61e4cf98, 0x432fe8f3,
0x93556354, 0xad005fb6, 0x00ff87d5},
{0x7aba560e, 0x05065a97, 0x7918b9db, 0x333ff005, 0xdf6be708, 0x03938ae1, 0x7410a77b, 0x922d3376, 0x03a15063,
0xa5aeaa56, 0x4aea89e5, 0x01542cb6},
{0xe4d6a772, 0x61a6a2d6, 0x6e6239a7, 0xc18c9ef7, 0x04cac70f, 0x8772bb3f, 0x16c5916b, 0x8bbb4185, 0x46335dc0,
0x4aa656e2, 0x842c1664, 0x008187ac},
{0xdd4e93c5, 0xa002ea0a, 0x07458704, 0xb40a45e8, 0xbaa65f2a, 0xee9ee3ea, 0x8f3b8a87, 0xeffa4f9e, 0x95b5feba,
0xb6e03897, 0x81751c63, 0x003c41de},
{0x13043a4a, 0x50221a3b, 0xda73331a, 0x6537fca8, 0x8e85077c, 0x8b74cef4, 0x0e5bbe67, 0x65705341, 0xefa22d23,
0xf0f56caa, 0xd1865d98, 0x001f8eb5},
{0x3e26a605, 0xd9af8944, 0x6970166f, 0xad0efb6e, 0x2c7464ec, 0xc16d7972, 0xf788281b, 0xe0de4b04, 0xaa878b0e,
0x0c049e55, 0x63e2e7cd, 0x0135383a},
{0x6f6893f7, 0x6b12c42e, 0x44bbbf63, 0x831f38c0, 0x191be6c9, 0xa57797d4, 0x447475cb, 0x6af7f695, 0x4b8be189,
0x3295e9e7, 0x350d0aad, 0x00a9a32b},
{0x7656ef1d, 0xc2243f86, 0xf4211219, 0x3e4c3bc3, 0x3c9a3d21, 0xaa4db6e0, 0xe8a4c946, 0x29ac638a, 0xa4cf856e,
0x21449f8b, 0x7d4c9c67, 0x018cf097},
{0x6a8e0139, 0x18e472a2, 0xd6b1c835, 0xcc7c80fd, 0x6546fc0a, 0x1f760883, 0x4ea3417c, 0x5bcfc1fb, 0xe9acb8b0,
0x52c9a29b, 0xd9f265a2, 0x01a6d8b2},
{0xebb83ac0, 0x95eb1dc8, 0x9f390cf2, 0x1e8d70f5, 0xb0d85145, 0xf9e4955d, 0x89720ee1, 0xe9690d30, 0x50fc879f,
0x629972a5, 0x69ccd670, 0x00456e23},
{0x83f38be4, 0xfbfb11a1, 0x388e6726, 0xb90a19b9, 0xc860d62c, 0x3fc10bc7, 0xc3c4e575, 0xc9fe043e, 0x7396d780,
0x67aeff74, 0x01cadaee, 0x019059fa},
{0xfd581be8, 0x43506d6e, 0x018b1b76, 0xf09563e6, 0xe87f9d80, 0x5cd193b2, 0x0a933402, 0x18ba3260, 0x50524c77,
0x4de839d9, 0xd90315ce, 0x0018c2ed},
{0xa737701d, 0xf900eb81, 0x995e6672, 0x6874c90e, 0xa495900b, 0x69ade94a, 0xd07bd4b1, 0xd5f358e7, 0x6f88e8e4,
0xbd437e9d, 0x1d6b88cf, 0x0130d706},
{0xfc29b95f, 0x064629bd, 0xb546585c, 0x0a897bff, 0x54a80d9a, 0x856c8d4f, 0x944568ff, 0x85410cc4, 0x59fc4370,
0xc1978c65, 0xc668dc52, 0x017c86c8},
{0xf6109131, 0x65cecd55, 0x7d2f52e5, 0x6d7e892e, 0xb90b2403, 0xe9a09007, 0xae0a060d, 0x92ca9aac, 0xa22b1e96,
0x5ce1cc4f, 0x45201e6f, 0x012eb33c},
{0x20d1aac5, 0x9d2cb4cf, 0xded22997, 0x3e4a1e77, 0x07fae2e2, 0x09d692f7, 0xd49bdcbe, 0x6a6aa4f8, 0x09c01cab,
0xa8e21ead, 0x6b03b72e, 0x01a19e81},
{0x935650ca, 0xf3d94623, 0x2ffd937e, 0x4a688a46, 0xa622b139, 0xf55fd53a, 0x7a1a1e40, 0x227406aa, 0x9a3fea60,
0x40dd4504, 0x1edbb584, 0x00fc2332},
{0xf28db3fc, 0x9707402f, 0xc28593f1, 0x3d898bd7, 0xb30effcd, 0xcaee2dfd, 0x4fb6ec9d, 0xff1b0790, 0x09ed1120,
0x9cb0597e, 0xb78d15e9, 0x005c73a5},
{0xb0a8a3b9, 0x739a4c2e, 0xc57196ae, 0x083bde21, 0xba602f29, 0x247eb070, 0x1c2c7132, 0x4ba1dd6a, 0xe2187c6c,
0x4ce59fb6, 0x606880b1, 0x0014a7b5},
{0x484baf56, 0xdd0eccab, 0x4541b101, 0xe6c80eaf, 0xf7964f64, 0x35b8a558, 0xc50ccf94, 0xb3b824d4, 0x21c71aeb,
0xe1f6b4c8, 0x23031df0, 0x01a8a647},
{0x592a9620, 0x5338dc01, 0xd94a401b, 0xb217f96d, 0xf830b00e, 0xfefb6601, 0xafd3dee4, 0x1ec061b5, 0x05a199bd,
0x0d5d4d3c, 0xc8489913, 0x0196c768},
{0x1f980ca0, 0x4acb430e, 0x71c6821c, 0x8973a3cc, 0xb3e9aa75, 0x74414c20, 0x0c13f042, 0x79212a5f, 0x375c705b,
0x5c44d226, 0x29439af2, 0x000a2fdd},
{0xa387b60c, 0xf01901e6, 0x4561ff3d, 0xa7b1b7dc, 0x0558e085, 0x5d82d374, 0xf2bc1d29, 0x519298e5, 0x3d332207,
0x0ad719a8, 0xea19a807, 0x0150a138},
{0x9deb8e06, 0x7c6b3eb1, 0x28206b6c, 0x3a8f53c4, 0x7fed1065, 0x039f575f, 0x40c1f898, 0x31be74ba, 0x790ac003,
0x76db938e, 0x5508c5e4, 0x0096d5e1},
{0xb83f8358, 0x3e940e0e, 0x372a4b8b, 0x204d80e0, 0xa820b2ec, 0x956454b2, 0x2cc8078c, 0x8e2cb3d4, 0xc6f81363,
0xdd0d3e12, 0x49041a64, 0x0052f327},
{0x2aec0be2, 0x37ca2eb7, 0x555cc652, 0x05093570, 0xd2588d31, 0xe62f1adb, 0x798be240, 0x2fd2518e, 0x0ff6b579,
0x9302d4e3, 0x6ee95e5d, 0x0025ca57},
{0x233eed68, 0xcc664858, 0xece3a327, 0x600ca1ac, 0x93a2e34f, 0x330d1102, 0xdb5e3bb4, 0xc84ab55f, 0xe4d5576e,
0x5179c101, 0x0938f714, 0x00efb20e},
{0xfdddaf5c, 0x907f96e7, 0x1ffe49da, 0x348dab77, 0xc14ab779, 0x3eca44ad, 0x4cdc5d98, 0xe9b10b2e, 0xa95c5a36,
0x65a25d16, 0x6e616518, 0x00c9f759},
{0x7a5aff62, 0x9497d331, 0xb57cd01d, 0x21896195, 0x6c7ba745, 0xe09e22f7, 0x5a7acff0, 0xcc9f1064, 0xc93c46b0,
0x7b867cdf, 0x23eba5ae, 0x01a05dcb},
{0x4dcc71f4, 0xa56a8e33, 0xcbebdba2, 0xc480b083, 0x36ea43af, 0x748448fa, 0xe7859f3c, 0xee9b4b0e, 0x5af41919,
0x9ab2bb09, 0x65caa0ea, 0x0127262d},
{0x352a05cc, 0x77c7d12f, 0xdc7160c9, 0xb91ca5be, 0x5a3feda0, 0x245106da, 0x7669f7cd, 0xfd45012d, 0xdc5489fa,
0xc4774629, 0x2872daa0, 0x00241273},
{0x0d3e0b0b, 0x1838ae6f, 0xff67fc2c, 0x7fcc9b21, 0x23956100, 0xaedca59e, 0x1e79aa4b, 0x572ed634, 0xc7f0673c,
0xaeeda160, 0xc8047256, 0x00360e2c},
{0xe05044f9, 0xec5e4514, 0x7ec9b4ef, 0xe915b7e7, 0x9c4bec48, 0x9fb78cd8, 0xa38d95a3, 0xd7b84113, 0xb86fd119,
0x7be64440, 0xe4f9e70a, 0x009e3a60},
{0xc7435591, 0xc61cc546, 0xe5e94dc4, 0xea99a96f, 0xdb8ff17d, 0x5b10e2b4, 0x3dd0ff10, 0x13f8fb9d, 0xe118b9e9,
0xcbb1c0ce, 0x7ebf8a0d, 0x00b37258},
{0xce5943e7, 0xd44fdb9d, 0x79fa927a, 0xcb7d41ea, 0xdcee72ca, 0x9a4bcebf, 0x11634905, 0x2317799d, 0x584055ac,
0x3f1c302e, 0xdc2d0017, 0x013ef021},
{0xa78a1578, 0x345cb052, 0x5961b8fe, 0x1ed4d48a, 0x74a5e2af, 0x5858e93c, 0x0fd17e9f, 0xaf643f0a, 0x79d94009,
0x61530753, 0xde7b2f53, 0x010a3393},
{0x813925df, 0x548b1d28, 0xca3e79b6, 0xabab3a4e, 0x7e51071a, 0xb3c9c068, 0x6c5fcedb, 0x8014e879, 0x95d9facc,
0x3ba5db77, 0x7f5c3d2f, 0x0105c419},
{0x26bc1104, 0xbb9cbd28, 0xe03cc852, 0x27f09abb, 0x22e5be61, 0x02763b4a, 0xb94fa254, 0xa3940542, 0xff34c35f,
0xcf058850, 0x1482533c, 0x019f538f},
{0xb3f42de9, 0xf2126047, 0xbeb0a1b8, 0xdb0451c4, 0x9aabc291, 0x1a945bc0, 0x7fe3a6f2, 0x13d08312, 0x390e1c07,
0xd8fb13f1, 0x6b30562b, 0x005a41c4},
{0xe8b3d5dd, 0x1c60fcc5, 0x75b3a464, 0x5d7babba, 0xf3989910, 0x0d9f52c7, 0x9beec571, 0x464a2840, 0x79689d4b,
0x139c496f, 0x099e64c4, 0x0022c6a3},
{0x023e0cd1, 0x9df6c2d5, 0xa6b747de, 0x8e23def9, 0x90da6876, 0x7bc83eee, 0xc88bb007, 0xdaeac352, 0x68bb6a7f,
0x45cabb6f, 0x94697b34, 0x001e7154},
{0x0203d905, 0xffcee91d, 0xc99df56d, 0xd878ee01, 0x210d754c, 0xa0e882f9, 0x7d0aec6a, 0x26c96db8, 0x8ff7afe4,
0x46e2e145, 0x54749283, 0x015cd1b0}}};
static constexpr storage_array<omegas_count, limbs_count> inv = {
{{0x00000001, 0x42846000, 0x18000000, 0x0b85aea2, 0xdd04a400, 0x8f79b117, 0x807a89c7, 0x8d116cf9, 0x3650a49d,
0x631d82e0, 0x0be28875, 0x00d71d23},
{0x00000001, 0x63c69000, 0x24000000, 0x114885f3, 0xcb86f600, 0x573689a3, 0x40b7ceab, 0x539a2376, 0x5178f6ec,
0x14ac4450, 0x91d3ccb0, 0x0142abb4},
{0x00000001, 0x7467a800, 0xaa000000, 0x1429f19b, 0xc2c81f00, 0x3b14f5e9, 0xa0d6711d, 0xb6de7eb4, 0x5f0d2013,
0x6d73a508, 0x54cc6ecd, 0x017872fd},
{0x00000001, 0x7cb83400, 0xed000000, 0x159aa76f, 0xbe68b380, 0x2d042c0c, 0xd0e5c256, 0x6880ac53, 0x65d734a7,
0x19d75564, 0xb648bfdc, 0x019356a1},
{0x00000001, 0x80e07a00, 0x0e800000, 0x1653025a, 0x3c38fdc0, 0xa5fbc71e, 0x68ed6af2, 0x4151c323, 0x693c3ef1,
0x70092d92, 0xe706e863, 0x01a0c873},
{0x00000001, 0x82f49d00, 0x1f400000, 0x16af2fcf, 0xfb2122e0, 0xe27794a6, 0x34f13f40, 0x2dba4e8b, 0x6aeec416,
0x1b2219a9, 0xff65fca7, 0x01a7815c},
{0x00000001, 0x83feae80, 0xa7a00000, 0x16dd4689, 0x5a953570, 0x00b57b6b, 0x1af32968, 0xa3ee943f, 0xebc806a8,
0xf0ae8fb4, 0x8b9586c8, 0x01aaddd1},
{0x00000001, 0x8483b740, 0xebd00000, 0x16f451e6, 0x8a4f3eb8, 0x8fd46ecd, 0x0df41e7b, 0xdf08b719, 0xac34a7f1,
0xdb74caba, 0xd1ad4bd9, 0x01ac8c0b},
{0x00000001, 0x84c63ba0, 0x8de80000, 0x16ffd795, 0xa22c435c, 0x5763e87e, 0x07749905, 0x7c95c886, 0x8c6af896,
0x50d7e83d, 0xf4b92e62, 0x01ad6328},
{0x00000001, 0x84e77dd0, 0xdef40000, 0x17059a6c, 0x2e1ac5ae, 0x3b2ba557, 0x8434d64a, 0xcb5c513c, 0xfc8620e8,
0x8b8976fe, 0x863f1fa6, 0x01adceb7},
{0x00000001, 0x84f81ee8, 0x877a0000, 0x17087bd8, 0x741206d7, 0xad0f83c3, 0xc294f4ec, 0xf2bf9597, 0xb493b511,
0xa8e23e5f, 0xcf021848, 0x01ae047e},
{0x00000001, 0x85006f74, 0x5bbd0000, 0x9709ec8e, 0x970da76b, 0xe60172f9, 0x61c5043d, 0x867137c5, 0x109a7f26,
0xb78ea210, 0x73639499, 0x01ae1f62},
{0x00000001, 0x850497ba, 0x45de8000, 0xd70aa4e9, 0xa88b77b5, 0x827a6a94, 0x315d0be6, 0xd04a08dc, 0x3e9de430,
0x3ee4d3e8, 0x459452c2, 0x01ae2cd4},
{0x00000001, 0x8506abdd, 0xbaef4000, 0xf70b0116, 0x314a5fda, 0xd0b6e662, 0x99290fba, 0xf5367167, 0x559f96b5,
0x828fecd4, 0x2eacb1d6, 0x01ae338d},
{0x80000001, 0x8507b5ee, 0x7577a000, 0x870b2f2d, 0xf5a9d3ed, 0xf7d52448, 0x4d0f11a4, 0x87aca5ad, 0x61206ff8,
0xa465794a, 0xa338e160, 0x01ae36e9},
{0x40000001, 0x85083af7, 0xd2bbd000, 0xcf0b4638, 0x57d98df6, 0x0b64433c, 0x2702129a, 0xd0e7bfd0, 0x66e0dc99,
0xb5503f85, 0xdd7ef925, 0x01ae3897},
{0xa0000001, 0x85087d7b, 0x815de800, 0x730b51be, 0x08f16afb, 0x952bd2b6, 0x93fb9314, 0x75854ce1, 0xe9c112ea,
0x3dc5a2a2, 0xfaa20508, 0x01ae396e},
{0xd0000001, 0x85089ebd, 0x58aef400, 0xc50b5781, 0xe17d597d, 0xda0f9a72, 0x4a785351, 0xc7d4136a, 0xab312e12,
0x82005431, 0x89338af9, 0x01ae39da},
{0xe8000001, 0x8508af5e, 0xc4577a00, 0xee0b5a62, 0x4dc350be, 0x7c817e51, 0xa5b6b370, 0xf0fb76ae, 0x0be93ba6,
0x241dacf9, 0x507c4df2, 0x01ae3a10},
{0x74000001, 0x8508b7af, 0x7a2bbd00, 0x828b5bd3, 0x83e64c5f, 0xcdba7040, 0xd355e37f, 0x058f2850, 0xbc454271,
0x752c595c, 0x3420af6e, 0x01ae3a2b},
{0xba000001, 0x8508bbd7, 0xd515de80, 0xcccb5c8b, 0x1ef7ca2f, 0x7656e938, 0xea257b87, 0x0fd90121, 0x947345d6,
0x9db3af8e, 0xa5f2e02c, 0x01ae3a38},
{0xdd000001, 0x8508bdeb, 0x028aef40, 0xf1eb5ce8, 0xec808917, 0x4aa525b3, 0x758d478b, 0x94fded8a, 0x808a4788,
0xb1f75aa7, 0x5edbf88b, 0x01ae3a3f},
{0xee800001, 0x8508bef5, 0x194577a0, 0x047b5d16, 0xd344e88c, 0x34cc43f1, 0xbb412d8d, 0xd79063be, 0xf695c861,
0x3c193033, 0xbb5084bb, 0x01ae3a42},
{0xf7400001, 0x8508bf7a, 0x24a2bbd0, 0x0dc35d2d, 0xc6a71846, 0x29dfd310, 0xde1b208e, 0x78d99ed8, 0x319b88ce,
0x012a1afa, 0x698acad3, 0x01ae3a44},
{0x7ba00001, 0x8508bfbd, 0xaa515de8, 0x12675d38, 0x40583023, 0xa4699aa0, 0xef881a0e, 0xc97e3c65, 0x4f1e6904,
0xe3b2905d, 0x40a7edde, 0x01ae3a45},
{0xbdd00001, 0x8508bfde, 0x6d28aef4, 0x94b95d3e, 0xfd30bc11, 0xe1ae7e67, 0x783e96ce, 0xf1d08b2c, 0xdddfd91f,
0xd4f6cb0e, 0xac367f64, 0x01ae3a45},
{0x5ee80001, 0x8508bfef, 0x4e94577a, 0xd5e25d41, 0xdb9d0208, 0x0050f04b, 0xbc99d52f, 0x85f9b28f, 0xa540912d,
0xcd98e867, 0xe1fdc827, 0x01ae3a45},
{0xaf740001, 0x8508bff7, 0xbf4a2bbd, 0x7676dd42, 0xcad32504, 0x0fa2293d, 0x5ec7745f, 0x500e4641, 0x08f0ed34,
0x49e9f714, 0xfce16c89, 0x01ae3a45},
{0xd7ba0001, 0x0508bffb, 0x77a515df, 0x46c11d43, 0xc26e3682, 0x174ac5b6, 0x2fde43f7, 0xb518901a, 0x3ac91b37,
0x08127e6a, 0x0a533eba, 0x01ae3a46},
{0xebdd0001, 0xc508bffd, 0xd3d28aef, 0x2ee63d43, 0x3e3bbf41, 0x1b1f13f3, 0x9869abc3, 0x679db506, 0x53b53239,
0x6726c215, 0x110c27d2, 0x01ae3a46},
{0xf5ee8001, 0x2508bffe, 0x01e94578, 0xa2f8cd44, 0x7c2283a0, 0x1d093b11, 0xccaf5fa9, 0x40e0477c, 0xe02b3dba,
0x96b0e3ea, 0x14689c5e, 0x01ae3a46},
{0x7af74001, 0x5508bfff, 0x18f4a2bc, 0x5d021544, 0x9b15e5d0, 0x1dfe4ea0, 0xe6d2399c, 0xad8190b7, 0xa666437a,
0xae75f4d5, 0x1616d6a4, 0x01ae3a46},
{0xbd7ba001, 0x6d08bfff, 0x247a515e, 0x3a06b944, 0x2a8f96e8, 0x9e78d868, 0x73e3a695, 0xe3d23555, 0x0983c65a,
0xba587d4b, 0x16edf3c7, 0x01ae3a46},
{0xdebdd001, 0x7908bfff, 0x2a3d28af, 0x28890b44, 0xf24c6f74, 0x5eb61d4b, 0x3a6c5d12, 0xfefa87a4, 0xbb1287ca,
0x4049c185, 0x17598259, 0x01ae3a46},
{0xef5ee801, 0xff08bfff, 0x2d1e9457, 0x1fca3444, 0xd62adbba, 0xbed4bfbd, 0x9db0b850, 0x0c8eb0cb, 0x13d9e883,
0x034263a3, 0x178f49a2, 0x01ae3a46},
{0xf7af7401, 0x4208bfff, 0x2e8f4a2c, 0x1b6ac8c4, 0xc81a11dd, 0xeee410f6, 0x4f52e5ef, 0x1358c55f, 0xc03d98df,
0x64beb4b1, 0x17aa2d46, 0x01ae3a46},
{0xfbd7ba01, 0x6388bfff, 0x2f47a516, 0x993b1304, 0x4111acee, 0x86ebb993, 0x2823fcbf, 0x16bdcfa9, 0x166f710d,
0x957cdd39, 0x17b79f18, 0x01ae3a46},
{0xfdebdd01, 0x7448bfff, 0x2fa3d28b, 0x58233824, 0x7d8d7a77, 0x52ef8de1, 0x148c8827, 0x187054ce, 0xc1885d24,
0xaddbf17c, 0x17be5801, 0x01ae3a46},
{0xfef5ee81, 0xfca8bfff, 0x2fd1e945, 0xb7974ab4, 0x9bcb613b, 0x38f17808, 0x8ac0cddb, 0x99499760, 0x9714d32f,
0x3a0b7b9e, 0x17c1b476, 0x01ae3a46},
{0xff7af741, 0x40d8bfff, 0x2fe8f4a3, 0xe75153fc, 0x2aea549d, 0x2bf26d1c, 0xc5daf0b5, 0x59b638a9, 0x81db0e35,
0x802340af, 0x17c362b0, 0x01ae3a46},
{0xffbd7ba1, 0xe2f0bfff, 0x2ff47a51, 0xff2e58a0, 0xf279ce4e, 0x2572e7a5, 0x63680222, 0x39ec894e, 0xf73e2bb8,
0xa32f2337, 0x17c439cd, 0x01ae3a46},
{0xffdebdd1, 0x33fcbfff, 0x2ffa3d29, 0x8b1cdaf2, 0xd6418b27, 0xa23324ea, 0xb22e8ad8, 0xaa07b1a0, 0x31efba79,
0x34b5147c, 0x17c4a55c, 0x01ae3a46},
{0xffef5ee9, 0xdc82bfff, 0x2ffd1e94, 0xd1141c1b, 0x48256993, 0xe093438d, 0xd991cf33, 0x621545c9, 0x4f4881da,
0x7d780d1e, 0x17c4db23, 0x01ae3a46},
{0xfff7af75, 0xb0c5bfff, 0xaffe8f4a, 0xf40fbcaf, 0x811758c9, 0x7fc352de, 0x6d437161, 0xbe1c0fde, 0x5df4e58a,
0x21d9896f, 0x17c4f607, 0x01ae3a46},
{0xfffbd7bb, 0x9ae73fff, 0xefff47a5, 0x058d8cf9, 0x1d905065, 0x4f5b5a87, 0xb71c4278, 0xec1f74e8, 0xe54b1762,
0xf40a4797, 0x17c50378, 0x01ae3a46},
{0xfffdebde, 0x0ff7ffff, 0x0fffa3d3, 0x8e4c751f, 0x6bcccc32, 0xb7275e5b, 0xdc08ab03, 0x0321276d, 0x28f6304f,
0xdd22a6ac, 0x17c50a31, 0x01ae3a46}}};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 5;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512,
0xbd37cb5c, 0x188282c8, 0xaa9d41bb, 0x85951e2c,
0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
static constexpr storage<limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36,
0x4fb82305, 0x6d182ad4, 0xca3e52d9, 0xbd7fb348,
0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
static constexpr storage<limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52,
0x57db6b9b, 0x7ea501f5, 0x203e5031, 0xc565f071,
0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
static constexpr storage<limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984,
0x0799c9de, 0xe7223ece, 0x6651cecb, 0x532777ee,
0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
static constexpr storage<limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888,
0xf832d204, 0xe458c282, 0x74b49a58, 0xde03ed72,
0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
static constexpr storage<limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f,
0xcee304c2, 0x2463b01a, 0x3d591bf1, 0x61ef11ac,
0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
};
// G1 and G2 generators
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512,
0xbd37cb5c, 0x188282c8, 0xaa9d41bb, 0x85951e2c,
0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36,
0x4fb82305, 0x6d182ad4, 0xca3e52d9, 0xbd7fb348,
0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52,
0x57db6b9b, 0x7ea501f5, 0x203e5031, 0xc565f071,
0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984,
0x0799c9de, 0xe7223ece, 0x6651cecb, 0x532777ee,
0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888,
0xf832d204, 0xe458c282, 0x74b49a58, 0xde03ed72,
0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f,
0xcee304c2, 0x2463b01a, 0x3d591bf1, 0x61ef11ac,
0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};

View File

@@ -42,10 +42,10 @@ extern "C" int projective_from_affine_bls12_377(BLS12_377::projective_t* out, BL
}
}
extern "C" int random_scalar_bls12_377(BLS12_377::scalar_field_t* out)
extern "C" int random_scalar_bls12_377(BLS12_377::scalar_t* out)
{
try {
out[0] = BLS12_377::scalar_field_t::rand_host();
out[0] = BLS12_377::scalar_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());

View File

@@ -9,17 +9,22 @@
#include "params.cuh"
namespace BLS12_381 {
typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_381::fp_config> scalar_t;
typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
static constexpr point_field_t gen_x = point_field_t{PARAMS_BLS12_381::g1_gen_x};
static constexpr point_field_t gen_y = point_field_t{PARAMS_BLS12_381::g1_gen_y};
static constexpr point_field_t b = point_field_t{PARAMS_BLS12_381::weierstrass_b};
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Projective<point_field_t, scalar_t, b, gen_x, gen_y> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
static constexpr g2_point_field_t g2_gen_x =
g2_point_field_t{point_field_t{PARAMS_BLS12_381::g2_gen_x_re}, point_field_t{PARAMS_BLS12_381::g2_gen_x_im}};
static constexpr g2_point_field_t g2_gen_y =
g2_point_field_t{point_field_t{PARAMS_BLS12_381::g2_gen_y_re}, point_field_t{PARAMS_BLS12_381::g2_gen_y_im}};
static constexpr g2_point_field_t g2_b = g2_point_field_t{
point_field_t{PARAMS_BLS12_381::weierstrass_b_g2_re}, point_field_t{PARAMS_BLS12_381::weierstrass_b_g2_im}};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Projective<g2_point_field_t, scalar_t, g2_b, g2_gen_x, g2_gen_y> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
} // namespace BLS12_381
} // namespace BLS12_381

View File

@@ -122,6 +122,42 @@ extern "C" int interpolate_scalars_batch_cuda_bls12_381(
}
}
extern "C" int interpolate_scalars_on_coset_cuda_bls12_381(
BLS12_381::scalar_t* d_out,
BLS12_381::scalar_t* d_evaluations,
BLS12_381::scalar_t* d_domain,
unsigned n,
BLS12_381::scalar_t* coset_powers,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try {
return interpolate(d_out, d_evaluations, d_domain, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_on_coset_cuda_bls12_381(
BLS12_381::scalar_t* d_out,
BLS12_381::scalar_t* d_evaluations,
BLS12_381::scalar_t* d_domain,
unsigned n,
unsigned batch_size,
BLS12_381::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bls12_381(
BLS12_381::projective_t* d_out,
BLS12_381::projective_t* d_evaluations,
@@ -190,9 +226,7 @@ extern "C" int evaluate_scalars_batch_cuda_bls12_381(
try {
BLS12_381::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
auto result_code = evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, 0);
cudaStreamDestroy(stream);
return result_code;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
@@ -231,10 +265,7 @@ extern "C" int evaluate_points_batch_cuda_bls12_381(
try {
BLS12_381::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
auto result_code =
evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
cudaStreamDestroy(stream);
return result_code;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
@@ -291,8 +322,7 @@ extern "C" int evaluate_points_on_coset_cuda_bls12_381(
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(
&stream); // TODO: don't create if default was passed, destroy what was created, same applies to all calls
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
@@ -340,42 +370,21 @@ extern "C" int ntt_inplace_batch_cuda_bls12_381(
}
}
extern "C" int
reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int ntt_inplace_coset_batch_cuda_bls12_381(
BLS12_381::scalar_t* d_inout,
BLS12_381::scalar_t* d_twiddles,
unsigned n,
unsigned batch_size,
bool inverse,
bool is_coset,
BLS12_381::scalar_t* coset,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(
BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, is_coset, coset, stream, true);
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
@@ -534,6 +543,49 @@ from_montgomery_aff_points_g2_cuda_bls12_381(BLS12_381::g2_affine_t* d_inout, un
}
#endif
extern "C" int
reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
cudaStreamSynchronize(stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(
BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_381(
BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
@@ -547,4 +599,4 @@ extern "C" int reverse_order_points_batch_cuda_bls12_381(
return -1;
}
}
#endif
#endif

View File

@@ -12,7 +12,7 @@ extern "C" int msm_cuda_bls12_381(
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0) // TODO: unify parameter types size_t/unsigned etc
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);

View File

@@ -3,38 +3,31 @@
namespace PARAMS_BLS12_381 {
struct fp_config {
// field structure size = 8 * 32 bit
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 32;
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
static constexpr unsigned modulus_bit_count = 255;
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805,
0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0xfffffffc, 0xfff96ffb, 0x4ef6900b,
0x26876015, 0xcce76020, 0xa675f520, 0xcfb69d4c};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2 * limbs_count> modulus_squared = {
0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
// 2*modulus^2
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
// note: doesnt actually fit into 384 bits, and shouldnt be used! is added for compilation
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
static constexpr unsigned modulus_bit_count = 255;
// m = floor(2^(2*modulus_bit_count) / modulus)
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad,
0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
0x00000004, 0xfffffff8, 0xfff2dffb, 0x9dfa401f, 0x736ec016, 0xa62b8008, 0x50cfdee1, 0x22a90579,
0x2aa71985, 0x09847dbd, 0x664d2877, 0x3a564fe5, 0xbcb3083c, 0x48304f6f, 0xd2f09de1, 0xd1fd83cf};
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad,
0xc1f823b4, 0x0e2d772d, 0x7fb78ddf, 0x8d54253b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
@@ -44,322 +37,137 @@ namespace PARAMS_BLS12_381 {
static constexpr storage<limbs_count> montgomery_r_inv = {0xfe75c040, 0x13f75b69, 0x09dc705f, 0xab6fca8f,
0x4f77266a, 0x7204078a, 0x30009d57, 0x1bbe8693};
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805,
// 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce,
// 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660,
// 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e},
// {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f,
// 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c,
// 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560,
// 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c,
// 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d,
// 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a,
// 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b,
// 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8},
// {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4,
// 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a,
// 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec,
// 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d,
// 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253,
// 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6,
// 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0,
// 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3},
// {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c,
// 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c,
// 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7,
// 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f,
// 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a,
// 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73,
// 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29,
// 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72},
// {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}}; Quick fix for
// linking issue
static constexpr storage<limbs_count> omega1 = {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402,
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega2 = {0x00000000, 0x00010000, 0x76030000, 0xec030002,
0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3 = {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240,
0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
static constexpr storage<limbs_count> omega4 = {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672,
0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
static constexpr storage<limbs_count> omega5 = {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c,
0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
static constexpr storage<limbs_count> omega6 = {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6,
0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
static constexpr storage<limbs_count> omega7 = {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64,
0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
static constexpr storage<limbs_count> omega8 = {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3,
0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
static constexpr storage<limbs_count> omega9 = {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c,
0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
static constexpr storage<limbs_count> omega10 = {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0,
0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
static constexpr storage<limbs_count> omega11 = {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14,
0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
static constexpr storage<limbs_count> omega12 = {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171,
0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
static constexpr storage<limbs_count> omega13 = {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce,
0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
static constexpr storage<limbs_count> omega14 = {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727,
0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
static constexpr storage<limbs_count> omega15 = {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e,
0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
static constexpr storage<limbs_count> omega16 = {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a,
0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
static constexpr storage<limbs_count> omega17 = {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93,
0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
static constexpr storage<limbs_count> omega18 = {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d,
0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
static constexpr storage<limbs_count> omega19 = {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673,
0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
static constexpr storage<limbs_count> omega20 = {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa,
0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
static constexpr storage<limbs_count> omega21 = {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f,
0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
static constexpr storage<limbs_count> omega22 = {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42,
0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
static constexpr storage<limbs_count> omega23 = {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e,
0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
static constexpr storage<limbs_count> omega24 = {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31,
0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
static constexpr storage<limbs_count> omega25 = {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c,
0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
static constexpr storage<limbs_count> omega26 = {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28,
0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
static constexpr storage<limbs_count> omega27 = {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f,
0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
static constexpr storage<limbs_count> omega28 = {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a,
0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
static constexpr storage<limbs_count> omega29 = {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157,
0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
static constexpr storage<limbs_count> omega30 = {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e,
0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
static constexpr storage<limbs_count> omega31 = {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9,
0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
static constexpr storage<limbs_count> omega32 = {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2,
0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
static constexpr storage_array<omegas_count, limbs_count> omega = {
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8, omega9, omega10, omega11,
omega12, omega13, omega14, omega15, omega16, omega17, omega18, omega19, omega20, omega21, omega22,
omega23, omega24, omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
};
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402,
// 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334,
// 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff,
// 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b,
// 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c},
// {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b,
// 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17,
// 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e,
// 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41,
// 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1,
// 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340,
// 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f,
// 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1},
// {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5,
// 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c,
// 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2,
// 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b,
// 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3,
// 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b,
// 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503,
// 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9},
// {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62,
// 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73,
// 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd,
// 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d,
// 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9,
// 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724,
// 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f,
// 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
// Quick fix for linking issue
static constexpr storage<limbs_count> omega_inv1 = {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402,
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
static constexpr storage<limbs_count> omega_inv2 = {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400,
0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
static constexpr storage<limbs_count> omega_inv3 = {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036,
0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
static constexpr storage<limbs_count> omega_inv4 = {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896,
0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
static constexpr storage<limbs_count> omega_inv5 = {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f,
0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
static constexpr storage<limbs_count> omega_inv6 = {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501,
0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
static constexpr storage<limbs_count> omega_inv7 = {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582,
0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
static constexpr storage<limbs_count> omega_inv8 = {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03,
0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
static constexpr storage<limbs_count> omega_inv9 = {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf,
0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
static constexpr storage<limbs_count> omega_inv10 = {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41,
0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
static constexpr storage<limbs_count> omega_inv11 = {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32,
0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
static constexpr storage<limbs_count> omega_inv12 = {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e,
0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
static constexpr storage<limbs_count> omega_inv13 = {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6,
0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
static constexpr storage<limbs_count> omega_inv14 = {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4,
0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
static constexpr storage<limbs_count> omega_inv15 = {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d,
0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
static constexpr storage<limbs_count> omega_inv16 = {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a,
0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
static constexpr storage<limbs_count> omega_inv17 = {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb,
0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
static constexpr storage<limbs_count> omega_inv18 = {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4,
0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
static constexpr storage<limbs_count> omega_inv19 = {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b,
0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
static constexpr storage<limbs_count> omega_inv20 = {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a,
0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
static constexpr storage<limbs_count> omega_inv21 = {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e,
0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
static constexpr storage<limbs_count> omega_inv22 = {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab,
0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
static constexpr storage<limbs_count> omega_inv23 = {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673,
0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
static constexpr storage<limbs_count> omega_inv24 = {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a,
0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
static constexpr storage<limbs_count> omega_inv25 = {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097,
0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
static constexpr storage<limbs_count> omega_inv26 = {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8,
0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
static constexpr storage<limbs_count> omega_inv27 = {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0,
0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
static constexpr storage<limbs_count> omega_inv28 = {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d,
0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
static constexpr storage<limbs_count> omega_inv29 = {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0,
0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
static constexpr storage<limbs_count> omega_inv30 = {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9,
0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
static constexpr storage<limbs_count> omega_inv31 = {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5,
0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
static constexpr storage<limbs_count> omega_inv32 = {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d,
0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
{0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000},
{0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d},
{0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e},
{0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb},
{0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac},
{0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802},
{0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59},
{0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667},
{0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098},
{0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b},
{0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0},
{0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8},
{0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8},
{0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911},
{0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd},
{0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333},
{0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db},
{0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83},
{0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f},
{0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5},
{0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3},
{0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd},
{0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc},
{0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd},
{0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580},
{0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d},
{0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d},
{0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f},
{0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b},
{0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72},
{0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}}};
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
};
// Quick fix for linking issue
static constexpr storage<limbs_count> inv1 = {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201,
0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
static constexpr storage<limbs_count> inv2 = {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02,
0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
static constexpr storage<limbs_count> inv3 = {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82,
0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
static constexpr storage<limbs_count> inv4 = {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2,
0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
static constexpr storage<limbs_count> inv5 = {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2,
0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
static constexpr storage<limbs_count> inv6 = {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72,
0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
static constexpr storage<limbs_count> inv7 = {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba,
0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
static constexpr storage<limbs_count> inv8 = {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e,
0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
static constexpr storage<limbs_count> inv9 = {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530,
0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
static constexpr storage<limbs_count> inv10 = {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499,
0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
static constexpr storage<limbs_count> inv11 = {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e,
0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
static constexpr storage<limbs_count> inv12 = {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828,
0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
static constexpr storage<limbs_count> inv13 = {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615,
0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
static constexpr storage<limbs_count> inv14 = {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c,
0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
static constexpr storage<limbs_count> inv15 = {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87,
0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
static constexpr storage<limbs_count> inv16 = {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045,
0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
static constexpr storage<limbs_count> inv17 = {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24,
0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
static constexpr storage<limbs_count> inv18 = {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13,
0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
static constexpr storage<limbs_count> inv19 = {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b,
0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
static constexpr storage<limbs_count> inv20 = {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7,
0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
static constexpr storage<limbs_count> inv21 = {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965,
0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
static constexpr storage<limbs_count> inv22 = {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4,
0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
static constexpr storage<limbs_count> inv23 = {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b,
0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
static constexpr storage<limbs_count> inv24 = {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf,
0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
static constexpr storage<limbs_count> inv25 = {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159,
0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
static constexpr storage<limbs_count> inv26 = {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae,
0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
static constexpr storage<limbs_count> inv27 = {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358,
0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
static constexpr storage<limbs_count> inv28 = {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad,
0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
static constexpr storage<limbs_count> inv29 = {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8,
0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
static constexpr storage<limbs_count> inv30 = {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed,
0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
static constexpr storage<limbs_count> inv31 = {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8,
0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
static constexpr storage<limbs_count> inv32 = {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd,
0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
{{0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753},
{0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753},
{0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e},
{0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4},
{0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c},
{0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee},
{0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d},
{0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25},
{0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e},
{0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508},
{0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d},
{0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63},
{0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7},
{0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1},
{0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac},
{0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003},
{0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c},
{0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7},
{0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950},
{0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2},
{0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6},
{0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5},
{0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9},
{0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960},
{0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6},
{0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf},
{0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f},
{0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533},
{0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff},
{0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287},
{0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6},
{0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}}};
static constexpr storage_array<omegas_count, limbs_count> inv = {
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8, inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24, inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
};
{{0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9},
{0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e},
{0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268},
{0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd},
{0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18},
{0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5},
{0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04},
{0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab},
{0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f},
{0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9},
{0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e},
{0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878},
{0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5},
{0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c},
{0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77},
{0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365},
{0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c},
{0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57},
{0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5},
{0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014},
{0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3},
{0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583},
{0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b},
{0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df},
{0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719},
{0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736},
{0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744},
{0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b},
{0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f},
{0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751},
{0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752},
{0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}}};
};
struct fq_config {
// field structure size = 12 * 32 bit
static constexpr unsigned limbs_count = 12;
// modulus =
// 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
static constexpr unsigned modulus_bit_count = 381;
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
// modulus*2 =
// 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd,
0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709,
0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
// modulus*4 =
// 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
0x0d2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
static constexpr storage<2 * limbs_count> modulus_wide = {
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
// modulus^2
static constexpr storage<2 * limbs_count> modulus_squared = {
0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
// 2*modulus^2
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
// 4*modulus^2
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
static constexpr unsigned modulus_bit_count = 381;
// m = floor(2^(2*modulus_bit_count) / modulus)
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7,
0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
@@ -368,37 +176,38 @@ namespace PARAMS_BLS12_381 {
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> montgomery_r = {0x0005555, 0x60100000, 0xeac00004, 0x15400014,
0x94f09dbe, 0x8cf2d5f0, 0xc7aed409, 0xb88b47b0,
0xcb453289, 0x4e45849b, 0x6801965b, 0x5feee15c};
static constexpr storage<limbs_count> montgomery_r_inv = {0x05c40fe, 0xaa212c9c, 0xccfd7e14, 0x70093ae9,
0xc85a96b4, 0x6d05c02d, 0x025fecd3, 0x1f193851,
0xeb48f4c6, 0x84d32f44, 0xed8ffb1a, 0xbefcc91e};
static constexpr storage<limbs_count> montgomery_r = {0x0002fffd, 0x76090000, 0xc40c0002, 0xebf4000b,
0x53c758ba, 0x5f489857, 0x70525745, 0x77ce5853,
0xa256ec6d, 0x5c071a97, 0xfa80e493, 0x15f65ec3};
static constexpr storage<limbs_count> montgomery_r_inv = {0x380b4820, 0xf4d38259, 0xd898fafb, 0x7fe11274,
0x14956dc8, 0x343ea979, 0x58a88de9, 0x1797ab14,
0x3c4f538b, 0xed5e6427, 0xe8fb0ce9, 0x14fec701};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f,
0x171bac58, 0xa14e3a3f, 0x9774b905, 0xc3688c4f,
0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
static constexpr storage<limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744,
0x2c04b3ed, 0x00db18cb, 0xd5d00af6, 0xfcf5e095,
0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
static constexpr storage<limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326,
0x7ae3d177, 0xb4510b64, 0xfa403b02, 0xc6e47ad4,
0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
static constexpr storage<limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112,
0xdc7f5049, 0xb5da61bb, 0x9920b61a, 0x596bd0d0,
0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc,
0x5160d12c, 0x6d429a69, 0x8cbdd3a7, 0xadfd9baa,
0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
static constexpr storage<limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27,
0x572e99ab, 0x267492ab, 0x85a763af, 0xcb3e287e,
0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
};
// G1 and G2 generators
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f,
0x171bac58, 0xa14e3a3f, 0x9774b905, 0xc3688c4f,
0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744,
0x2c04b3ed, 0x00db18cb, 0xd5d00af6, 0xfcf5e095,
0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326,
0x7ae3d177, 0xb4510b64, 0xfa403b02, 0xc6e47ad4,
0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112,
0xdc7f5049, 0xb5da61bb, 0x9920b61a, 0x596bd0d0,
0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc,
0x5160d12c, 0x6d429a69, 0x8cbdd3a7, 0xadfd9baa,
0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27,
0x572e99ab, 0x267492ab, 0x85a763af, 0xcb3e287e,
0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};

View File

@@ -42,10 +42,10 @@ extern "C" int projective_from_affine_bls12_381(BLS12_381::projective_t* out, BL
}
}
extern "C" int random_scalar_bls12_381(BLS12_381::scalar_field_t* out)
extern "C" int random_scalar_bls12_381(BLS12_381::scalar_t* out)
{
try {
out[0] = BLS12_381::scalar_field_t::rand_host();
out[0] = BLS12_381::scalar_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());

View File

@@ -2,4 +2,4 @@
#include "msm.cu"
#include "poseidon.cu"
#include "projective.cu"
#include "ve_mod_mult.cu"
#include "ve_mod_mult.cu"

View File

@@ -15,6 +15,8 @@ extern "C" int32_t vec_mod_mult_point_bls12_381(
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
@@ -32,6 +34,8 @@ extern "C" int32_t vec_mod_mult_scalar_bls12_381(
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
@@ -42,6 +46,18 @@ extern "C" int32_t vec_mod_mult_scalar_bls12_381(
}
}
extern "C" int32_t vec_mod_mult_device_scalar_bls12_381(
BLS12_381::scalar_t* inout, BLS12_381::scalar_t* scalar_vec, size_t n_elements, size_t device_id)
{
try {
vector_mod_mult_device<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elements);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bls12_381(
BLS12_381::scalar_t* matrix_flattened,
BLS12_381::scalar_t* input,
@@ -50,6 +66,8 @@ extern "C" int32_t matrix_vec_mod_mult_bls12_381(
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments, stream);
@@ -59,4 +77,4 @@ extern "C" int32_t matrix_vec_mod_mult_bls12_381(
return -1;
}
}
#endif
#endif

View File

@@ -9,17 +9,22 @@
#include "params.cuh"
namespace BN254 {
typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BN254::fp_config> scalar_t;
typedef Field<PARAMS_BN254::fq_config> point_field_t;
static constexpr point_field_t gen_x = point_field_t{PARAMS_BN254::g1_gen_x};
static constexpr point_field_t gen_y = point_field_t{PARAMS_BN254::g1_gen_y};
static constexpr point_field_t b = point_field_t{PARAMS_BN254::weierstrass_b};
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Projective<point_field_t, scalar_t, b, gen_x, gen_y> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
static constexpr g2_point_field_t g2_gen_x =
g2_point_field_t{point_field_t{PARAMS_BN254::g2_gen_x_re}, point_field_t{PARAMS_BN254::g2_gen_x_im}};
static constexpr g2_point_field_t g2_gen_y =
g2_point_field_t{point_field_t{PARAMS_BN254::g2_gen_y_re}, point_field_t{PARAMS_BN254::g2_gen_y_im}};
static constexpr g2_point_field_t g2_b = g2_point_field_t{
point_field_t{PARAMS_BN254::weierstrass_b_g2_re}, point_field_t{PARAMS_BN254::weierstrass_b_g2_im}};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Projective<g2_point_field_t, scalar_t, g2_b, g2_gen_x, g2_gen_y> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
} // namespace BN254
} // namespace BN254

View File

@@ -33,8 +33,8 @@ namespace PARAMS_BN254 {
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> montgomery_r = {0x4ffffffb, 0xac96341c, 0x9f60cd29, 0x36fc7695,
0x7879462e, 0x666ea36f, 0x9a07df2f, 0xe0a77c1};
static constexpr storage<limbs_count> montgomery_r_inv = {0x6db1194e, 0xdc5ba005, 0xe111ec87, 0x90ef5a9,
0x7879462e, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
static constexpr storage<limbs_count> montgomery_r_inv = {0x6db1194e, 0xdc5ba005, 0xe111ec87, 0x090ef5a9,
0xaeb85d5d, 0xc8260de4, 0x82c5551c, 0x15ebf951};
static constexpr storage_array<omegas_count, limbs_count> omega = {
@@ -155,30 +155,30 @@ namespace PARAMS_BN254 {
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0xa78eb28,
0x7879462c, 0x666ea36f, 0x9a07df2f, 0xe0a77c1};
static constexpr storage<limbs_count> montgomery_r_inv = {0x14afa37, 0xed84884a, 0x278edf8, 0xeb202285,
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0x0a78eb28,
0x7879462c, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
static constexpr storage<limbs_count> montgomery_r_inv = {0x014afa37, 0xed84884a, 0x0278edf8, 0xeb202285,
0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4,
0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
static constexpr storage<limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933,
0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
static constexpr storage<limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769,
0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
static constexpr storage<limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133,
0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
};
// G1 and G2 generators
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4,
0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933,
0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769,
0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133,
0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {

View File

@@ -42,10 +42,10 @@ extern "C" int projective_from_affine_bn254(BN254::projective_t* out, BN254::aff
}
}
extern "C" int random_scalar_bn254(BN254::scalar_field_t* out)
extern "C" int random_scalar_bn254(BN254::scalar_t* out)
{
try {
out[0] = BN254::scalar_field_t::rand_host();
out[0] = BN254::scalar_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());

View File

@@ -0,0 +1,30 @@
#pragma once
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#if defined(G2_DEFINED)
#undef G2_DEFINED
#include "../bls12_377/params.cuh"
#define G2_DEFINED
#else
#include "../bls12_377/params.cuh"
#endif
#include "params.cuh"
namespace BW6_761 {
typedef Field<PARAMS_BLS12_377::fq_config> scalar_t;
typedef Field<PARAMS_BW6_761::fq_config> point_field_t;
static constexpr point_field_t gen_x = point_field_t{PARAMS_BW6_761::g1_gen_x};
static constexpr point_field_t gen_y = point_field_t{PARAMS_BW6_761::g1_gen_y};
static constexpr point_field_t b = point_field_t{PARAMS_BW6_761::weierstrass_b};
typedef Projective<point_field_t, scalar_t, b, gen_x, gen_y> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
static constexpr point_field_t g2_gen_x = point_field_t{PARAMS_BW6_761::g2_gen_x};
static constexpr point_field_t g2_gen_y = point_field_t{PARAMS_BW6_761::g2_gen_y};
static constexpr point_field_t g2_b = point_field_t{PARAMS_BW6_761::g2_weierstrass_b};
typedef Projective<point_field_t, scalar_t, g2_b, g2_gen_x, g2_gen_y> g2_projective_t;
typedef Affine<point_field_t> g2_affine_t;
#endif
} // namespace BW6_761

View File

@@ -0,0 +1,591 @@
#ifndef _BW6_761_LDE
#define _BW6_761_LDE
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "../../utils/mont.cuh"
#include "curve_config.cuh"
#include <cuda.h>
extern "C" BW6_761::scalar_t* build_domain_cuda_bw6_761(
uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
if (inverse) {
return fill_twiddle_factors_array(domain_size, BW6_761::scalar_t::omega_inv(logn), stream);
} else {
return fill_twiddle_factors_array(domain_size, BW6_761::scalar_t::omega(logn), stream);
}
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int
ntt_cuda_bw6_761(BW6_761::scalar_t* arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return ntt_end2end_template<BW6_761::scalar_t, BW6_761::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
ecntt_cuda_bw6_761(BW6_761::projective_t* arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return ntt_end2end_template<BW6_761::projective_t, BW6_761::scalar_t>(
arr, n, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bw6_761(
BW6_761::scalar_t* arr,
uint32_t arr_size,
uint32_t batch_size,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BW6_761::scalar_t, BW6_761::scalar_t>(
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bw6_761(
BW6_761::projective_t* arr,
uint32_t arr_size,
uint32_t batch_size,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BW6_761::projective_t, BW6_761::scalar_t>(
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_evaluations,
BW6_761::scalar_t* d_domain,
unsigned n,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_evaluations,
BW6_761::scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_on_coset_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_evaluations,
BW6_761::scalar_t* d_domain,
unsigned n,
BW6_761::scalar_t* coset_powers,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try {
return interpolate(d_out, d_evaluations, d_domain, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_on_coset_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_evaluations,
BW6_761::scalar_t* d_domain,
unsigned n,
unsigned batch_size,
BW6_761::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::projective_t* d_evaluations,
BW6_761::scalar_t* d_domain,
unsigned n,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::projective_t* d_evaluations,
BW6_761::scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::projective_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::projective_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
BW6_761::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BW6_761::scalar_t* coset_powers,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bw6_761(
BW6_761::scalar_t* d_out,
BW6_761::scalar_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BW6_761::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::projective_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BW6_761::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::projective_t* d_coefficients,
BW6_761::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BW6_761::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_inplace_batch_cuda_bw6_761(
BW6_761::scalar_t* d_inout,
BW6_761::scalar_t* d_twiddles,
unsigned n,
unsigned batch_size,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
BW6_761::scalar_t* _null = nullptr;
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, false, _null, stream, true);
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_inplace_coset_batch_cuda_bw6_761(
BW6_761::scalar_t* d_inout,
BW6_761::scalar_t* d_twiddles,
unsigned n,
unsigned batch_size,
bool inverse,
bool is_coset,
BW6_761::scalar_t* coset,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, is_coset, coset, stream, true);
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int sub_scalars_cuda_bw6_761(
BW6_761::scalar_t* d_out, BW6_761::scalar_t* d_in1, BW6_761::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return sub_polys(d_out, d_in1, d_in2, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int add_scalars_cuda_bw6_761(
BW6_761::scalar_t* d_out, BW6_761::scalar_t* d_in1, BW6_761::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return add_polys(d_out, d_in1, d_in2, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int to_montgomery_scalars_cuda_bw6_761(BW6_761::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery(d_inout, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int from_montgomery_scalars_cuda_bw6_761(BW6_761::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery(d_inout, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
to_montgomery_proj_points_cuda_bw6_761(BW6_761::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BW6_761::point_field_t*)d_inout, 3 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
from_montgomery_proj_points_cuda_bw6_761(BW6_761::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BW6_761::point_field_t*)d_inout, 3 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int to_montgomery_aff_points_cuda_bw6_761(BW6_761::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BW6_761::point_field_t*)d_inout, 2 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int from_montgomery_aff_points_cuda_bw6_761(BW6_761::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BW6_761::point_field_t*)d_inout, 2 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#if defined(G2_DEFINED)
extern "C" int
to_montgomery_proj_points_g2_cuda_bw6_761(BW6_761::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BW6_761::point_field_t*)d_inout, 6 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
from_montgomery_proj_points_g2_cuda_bw6_761(BW6_761::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BW6_761::point_field_t*)d_inout, 6 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
to_montgomery_aff_points_g2_cuda_bw6_761(BW6_761::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BW6_761::point_field_t*)d_inout, 4 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
from_montgomery_aff_points_g2_cuda_bw6_761(BW6_761::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BW6_761::point_field_t*)d_inout, 4 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#endif
extern "C" int
reverse_order_scalars_cuda_bw6_761(BW6_761::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
cudaStreamSynchronize(stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bw6_761(
BW6_761::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
reverse_order_points_cuda_bw6_761(BW6_761::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bw6_761(
BW6_761::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -0,0 +1,216 @@
#ifndef _BW6_761_MSM
#define _BW6_761_MSM
#include "../../appUtils/msm/msm.cu"
#include "curve_config.cuh"
#include <cuda.h>
#include <stdexcept>
extern "C" int msm_cuda_bw6_761(
BW6_761::projective_t* out,
BW6_761::affine_t points[],
BW6_761::scalar_t scalars[],
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
large_msm<BW6_761::scalar_t, BW6_761::projective_t, BW6_761::affine_t>(
scalars, points, count, out, false, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bw6_761(
BW6_761::projective_t* out,
BW6_761::affine_t points[],
BW6_761::scalar_t scalars[],
size_t batch_size,
size_t msm_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
batched_large_msm<BW6_761::scalar_t, BW6_761::projective_t, BW6_761::affine_t>(
scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C" int commit_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::scalar_t* d_scalars,
BW6_761::affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C" int commit_batch_cuda_bw6_761(
BW6_761::projective_t* d_out,
BW6_761::scalar_t* d_scalars,
BW6_761::affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#if defined(G2_DEFINED)
extern "C" int msm_g2_cuda_bw6_761(
BW6_761::g2_projective_t* out,
BW6_761::g2_affine_t points[],
BW6_761::scalar_t scalars[],
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
large_msm<BW6_761::scalar_t, BW6_761::g2_projective_t, BW6_761::g2_affine_t>(
scalars, points, count, out, false, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_g2_cuda_bw6_761(
BW6_761::g2_projective_t* out,
BW6_761::g2_affine_t points[],
BW6_761::scalar_t scalars[],
size_t batch_size,
size_t msm_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
batched_large_msm<BW6_761::scalar_t, BW6_761::g2_projective_t, BW6_761::g2_affine_t>(
scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM in G2 group.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut G2 point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points G2 affine points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C" int commit_g2_cuda_bw6_761(
BW6_761::g2_projective_t* d_out,
BW6_761::scalar_t* d_scalars,
BW6_761::g2_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
cudaStreamCreate(&stream);
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut G2 point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for
* each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C" int commit_batch_g2_cuda_bw6_761(
BW6_761::g2_projective_t* d_out,
BW6_761::scalar_t* d_scalars,
BW6_761::g2_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#endif
#endif

View File

@@ -0,0 +1,96 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BW6_761 {
struct fq_config {
static constexpr unsigned limbs_count = 24;
static constexpr unsigned modulus_bit_count = 761;
static constexpr storage<limbs_count> modulus = {
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
static constexpr storage<limbs_count> modulus_2 = {
0x00000116, 0xe93a0000, 0xe0000105, 0xcd227cd0, 0xd5e1486f, 0x2c19f15d, 0xaccf51f0, 0x31422d84,
0xe7d7fe5d, 0xe3b9a7b8, 0x25f3fb20, 0x0d1391da, 0x4b684609, 0x079d75fe, 0xcb09d232, 0xe0f74c71,
0x010f7c82, 0xa504ebdf, 0x03a28d10, 0x724c30d5, 0x09f5fe7d, 0xa30f9280, 0xf7079c15, 0x0245d049};
static constexpr storage<limbs_count> modulus_4 = {
0x0000022c, 0xd2740000, 0xc000020b, 0x9a44f9a1, 0xabc290df, 0x5833e2bb, 0x599ea3e0, 0x62845b09,
0xcfaffcba, 0xc7734f71, 0x4be7f641, 0x1a2723b4, 0x96d08c12, 0x0f3aebfc, 0x9613a464, 0xc1ee98e3,
0x021ef905, 0x4a09d7be, 0x07451a21, 0xe49861aa, 0x13ebfcfa, 0x461f2500, 0xee0f382b, 0x048ba093};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x0000008b, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_squared = {
0x00004b79, 0xa27e0000, 0xa0008e35, 0xbae96db2, 0x82ebf7b1, 0x4aaf1d22, 0x7224cb3d, 0x7908fd92,
0x29b17ed1, 0x6fe68290, 0xafc968db, 0xfe1b7282, 0x9028bbf0, 0xe1e548cb, 0x3a8ffc03, 0x09094ed6,
0x61e9cf95, 0xd63ea631, 0x54918abf, 0xe834ca62, 0x52aa651e, 0xe52594ed, 0xb4c46a4f, 0xe2423252,
0x6c09aae4, 0xa8cf17d8, 0xc5f5cee5, 0x2d80ffb0, 0x55bbc10d, 0x2dede100, 0xe2360382, 0x1f4e7a7c,
0xae2fe433, 0x586c3847, 0x78eadae1, 0x915c56e1, 0x69a5ce00, 0xa35b2945, 0x767c08ca, 0x9d66e7fe,
0xd8b88c77, 0x7e44cf6a, 0x67c9c873, 0xb29bfc93, 0xbbc80af9, 0x6a24005a, 0xc64ce3d5, 0x00014a92};
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
0x000096f2, 0x44fc0000, 0x40011c6b, 0x75d2db65, 0x05d7ef63, 0x955e3a45, 0xe449967a, 0xf211fb24,
0x5362fda2, 0xdfcd0520, 0x5f92d1b6, 0xfc36e505, 0x205177e1, 0xc3ca9197, 0x751ff807, 0x12129dac,
0xc3d39f2a, 0xac7d4c62, 0xa923157f, 0xd06994c4, 0xa554ca3d, 0xca4b29da, 0x6988d49f, 0xc48464a5,
0xd81355c9, 0x519e2fb0, 0x8beb9dcb, 0x5b01ff61, 0xab77821a, 0x5bdbc200, 0xc46c0704, 0x3e9cf4f9,
0x5c5fc866, 0xb0d8708f, 0xf1d5b5c2, 0x22b8adc2, 0xd34b9c01, 0x46b6528a, 0xecf81195, 0x3acdcffc,
0xb17118ef, 0xfc899ed5, 0xcf9390e6, 0x6537f926, 0x779015f3, 0xd44800b5, 0x8c99c7aa, 0x00029525};
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
0x00012de4, 0x89f80000, 0x800238d6, 0xeba5b6ca, 0x0bafdec6, 0x2abc748a, 0xc8932cf5, 0xe423f649,
0xa6c5fb45, 0xbf9a0a40, 0xbf25a36d, 0xf86dca0a, 0x40a2efc3, 0x8795232e, 0xea3ff00f, 0x24253b58,
0x87a73e54, 0x58fa98c5, 0x52462aff, 0xa0d32989, 0x4aa9947b, 0x949653b5, 0xd311a93f, 0x8908c94a,
0xb026ab93, 0xa33c5f61, 0x17d73b96, 0xb603fec3, 0x56ef0434, 0xb7b78401, 0x88d80e08, 0x7d39e9f3,
0xb8bf90cc, 0x61b0e11e, 0xe3ab6b85, 0x45715b85, 0xa6973802, 0x8d6ca515, 0xd9f0232a, 0x759b9ff9,
0x62e231de, 0xf9133dab, 0x9f2721cd, 0xca6ff24d, 0xef202be6, 0xa890016a, 0x19338f55, 0x00052a4b};
static constexpr storage<limbs_count> m = {0x2507e899, 0x11629ccd, 0x2e4424dd, 0xab1eef5b, 0x481d2cfa, 0xb82146a9,
0x34e4227b, 0xf3182afa, 0xbeb25621, 0xf615fdb5, 0xccc261d6, 0xc4d8988c,
0xaaf4fab0, 0x3590d652, 0x2ab9ff30, 0x9c5d0a04, 0x6ec3f460, 0xf6e8534f,
0x88075ab4, 0xe8d78b06, 0x6f3fc8fe, 0xa8d3675b, 0x7bc5cd4b, 0x03852086};
static constexpr storage<limbs_count> one = {
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> montgomery_r = {
0xffff85d5, 0x0202ffff, 0x8fff8ce7, 0x5a582635, 0x827faade, 0x9e996e43, 0x0ee47df4, 0xda6aff32,
0x1d94b80b, 0xece9cb3e, 0x5248240b, 0xc0e667a2, 0xdcad3905, 0xa74da5bf, 0x462f2103, 0x2352e7fe,
0x08b1c87c, 0x7b565880, 0xe711022f, 0x45848a63, 0x9f65a9df, 0xd7a81ebb, 0xf127e87d, 0x0051f77e};
static constexpr storage<limbs_count> montgomery_r_inv = {
0x181fa3f1, 0x27c2b2a0, 0x25a0e1b8, 0x7d9ca9f9, 0x0a004a5d, 0x35a910f0, 0xdb6b8539, 0x54655b3f,
0x7695ef18, 0x5e763565, 0x4fae56bb, 0x226022c2, 0xb70d7652, 0x80e7f067, 0x72116b89, 0x435a8b4a,
0x5d84e0d4, 0xac258fd6, 0x4427c7b2, 0x47ee8ac5, 0xd04e621b, 0x478c4048, 0x2add3e93, 0x00e0aa7d};
};
// G1 and G2 generators
static constexpr storage<fq_config::limbs_count> g1_gen_x = {
0x66e5b43d, 0x4088f3af, 0xa6af603f, 0x055928ac, 0x56133e82, 0x6750dd03, 0x280ca27f, 0x03758f9a,
0xc9ea0971, 0x5bd71fa0, 0x47729b90, 0xa17a54ce, 0x94c2e746, 0x11dbfcd2, 0xc15520ac, 0x79017ffa,
0x85f56fc7, 0xee05c54b, 0x551b27f0, 0xe6a0cfb7, 0xa477beae, 0xb277ce98, 0x0ea190c8, 0x01075b02};
static constexpr storage<fq_config::limbs_count> g1_gen_y = {
0xb4e95363, 0xbafc8f2d, 0x0b20d2a1, 0xad1cb2be, 0xcad0fb93, 0xb2b08119, 0xb3053253, 0x9f9df141,
0x6fc2cdd4, 0xbe3fb90b, 0x717a4c55, 0xcc685d31, 0x71b5b806, 0xc5b8fa17, 0xaf7e0dba, 0x265909f1,
0xa2e573a3, 0x1a7348d2, 0x884c9ec6, 0x0f952589, 0x45cc2a42, 0xe6fd637b, 0x0a6fc574, 0x0058b84e};
static constexpr storage<fq_config::limbs_count> g2_gen_x = {
0xcd025f1c, 0xa830c194, 0xe1bf995b, 0x6410cf4f, 0xc2ad54b0, 0x00e96efb, 0x3cd208d7, 0xce6948cb,
0x00e1b6ba, 0x963317a3, 0xac70e7c7, 0xc5bbcae9, 0xf09feb58, 0x734ec3f1, 0xab3da268, 0x26b41c5d,
0x13890f6d, 0x4c062010, 0xc5a7115f, 0xd61053aa, 0x69d660f9, 0xc852a82e, 0x41d9b816, 0x01101332};
static constexpr storage<fq_config::limbs_count> g2_gen_y = {
0x28c73b61, 0xeb70a167, 0xf9eac689, 0x91ec0594, 0x3c5a02a5, 0x58aa2d3a, 0x504affc7, 0x3ea96fcd,
0xffa82300, 0x8906c170, 0xd2c712b8, 0x64f293db, 0x33293fef, 0x94c97eb7, 0x0b95a59c, 0x0a1d86c8,
0x53ffe316, 0x81a78e27, 0xcec2181c, 0x26b7cf9a, 0xe4b6d2dc, 0x8179eb10, 0x7761369f, 0x0017c335};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {
0x0000008a, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
static constexpr storage<fq_config::limbs_count> g2_weierstrass_b = {
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
} // namespace PARAMS_BW6_761

View File

@@ -0,0 +1,113 @@
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include <cuda.h>
extern "C" int random_projective_bw6_761(BW6_761::projective_t* out)
{
try {
out[0] = BW6_761::projective_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" BW6_761::projective_t projective_zero_bw6_761() { return BW6_761::projective_t::zero(); }
extern "C" bool projective_is_on_curve_bw6_761(BW6_761::projective_t* point1)
{
return BW6_761::projective_t::is_on_curve(*point1);
}
extern "C" int projective_to_affine_bw6_761(BW6_761::affine_t* out, BW6_761::projective_t* point1)
{
try {
out[0] = BW6_761::projective_t::to_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int projective_from_affine_bw6_761(BW6_761::projective_t* out, BW6_761::affine_t* point1)
{
try {
out[0] = BW6_761::projective_t::from_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int random_scalar_bw6_761(BW6_761::scalar_t* out)
{
try {
out[0] = BW6_761::scalar_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" bool eq_bw6_761(BW6_761::projective_t* point1, BW6_761::projective_t* point2)
{
return (*point1 == *point2) &&
!((point1->x == BW6_761::point_field_t::zero()) && (point1->y == BW6_761::point_field_t::zero()) &&
(point1->z == BW6_761::point_field_t::zero())) &&
!((point2->x == BW6_761::point_field_t::zero()) && (point2->y == BW6_761::point_field_t::zero()) &&
(point2->z == BW6_761::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bw6_761(BW6_761::g2_projective_t* point1, BW6_761::g2_projective_t* point2)
{
return (*point1 == *point2) &&
!((point1->x == BW6_761::point_field_t::zero()) && (point1->y == BW6_761::point_field_t::zero()) &&
(point1->z == BW6_761::point_field_t::zero())) &&
!((point2->x == BW6_761::point_field_t::zero()) && (point2->y == BW6_761::point_field_t::zero()) &&
(point2->z == BW6_761::point_field_t::zero()));
}
extern "C" int random_g2_projective_bw6_761(BW6_761::g2_projective_t* out)
{
try {
out[0] = BW6_761::g2_projective_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int g2_projective_to_affine_bw6_761(BW6_761::g2_affine_t* out, BW6_761::g2_projective_t* point1)
{
try {
out[0] = BW6_761::g2_projective_t::to_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int g2_projective_from_affine_bw6_761(BW6_761::g2_projective_t* out, BW6_761::g2_affine_t* point1)
{
try {
out[0] = BW6_761::g2_projective_t::from_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" bool g2_projective_is_on_curve_bw6_761(BW6_761::g2_projective_t* point1)
{
return BW6_761::g2_projective_t::is_on_curve(*point1);
}
#endif

View File

@@ -0,0 +1,4 @@
#include "lde.cu"
#include "msm.cu"
#include "projective.cu"
#include "ve_mod_mult.cu"

View File

@@ -0,0 +1,76 @@
#ifndef _BW6_761_VEC_MULT
#define _BW6_761_VEC_MULT
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "../../utils/storage.cuh"
#include "curve_config.cuh"
#include <iostream>
#include <stdio.h>
extern "C" int32_t vec_mod_mult_point_bw6_761(
BW6_761::projective_t* inout,
BW6_761::scalar_t* scalar_vec,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
vector_mod_mult<BW6_761::projective_t, BW6_761::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_bw6_761(
BW6_761::scalar_t* inout, BW6_761::scalar_t* scalar_vec, size_t n_elments, size_t device_id, cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
vector_mod_mult<BW6_761::scalar_t, BW6_761::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_device_scalar_bw6_761(
BW6_761::scalar_t* inout, BW6_761::scalar_t* scalar_vec, size_t n_elements, size_t device_id)
{
try {
vector_mod_mult_device<BW6_761::scalar_t, BW6_761::scalar_t>(scalar_vec, inout, inout, n_elements);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bw6_761(
BW6_761::scalar_t* matrix_flattened,
BW6_761::scalar_t* input,
BW6_761::scalar_t* output,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
matrix_mod_mult<BW6_761::scalar_t>(matrix_flattened, input, output, n_elments, stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

View File

@@ -9,17 +9,22 @@
#include "params.cuh"
namespace ${CURVE_NAME_U} {
typedef Field<PARAMS_${CURVE_NAME_U}::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_${CURVE_NAME_U}::fp_config> scalar_t;
typedef Field<PARAMS_${CURVE_NAME_U}::fq_config> point_field_t;
static constexpr point_field_t gen_x = point_field_t{PARAMS_${CURVE_NAME_U}::g1_gen_x};
static constexpr point_field_t gen_y = point_field_t{PARAMS_${CURVE_NAME_U}::g1_gen_y};
static constexpr point_field_t b = point_field_t{PARAMS_${CURVE_NAME_U}::weierstrass_b};
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Projective<point_field_t, scalar_t, b, gen_x, gen_y> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_${CURVE_NAME_U}::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
static constexpr g2_point_field_t g2_gen_x = g2_point_field_t{
point_field_t{PARAMS_${CURVE_NAME_U}::g2_gen_x_re}, point_field_t{PARAMS_${CURVE_NAME_U}::g2_gen_x_im}};
static constexpr g2_point_field_t g2_gen_y = g2_point_field_t{
point_field_t{PARAMS_${CURVE_NAME_U}::g2_gen_y_re}, point_field_t{PARAMS_${CURVE_NAME_U}::g2_gen_y_im}};
static constexpr g2_point_field_t g2_b = g2_point_field_t{
point_field_t{PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_re}, point_field_t{PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_im}};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Projective<g2_point_field_t, scalar_t, g2_b, g2_gen_x, g2_gen_y> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}
} // namespace ${CURVE_NAME_U}

View File

@@ -20,7 +20,6 @@ namespace PARAMS_${curve_name_U} {
static constexpr storage<limbs_count> zero = {${fp_zero}};
static constexpr storage<limbs_count> montgomery_r = {${fp_montgomery_r}};
static constexpr storage<limbs_count> montgomery_r_inv = {${fp_montgomery_r_inv}};
static constexpr storage_array<omegas_count, limbs_count> omega = { {
${omega}
@@ -53,18 +52,19 @@ namespace PARAMS_${curve_name_U} {
static constexpr storage<limbs_count> montgomery_r = {${fq_montgomery_r}};
static constexpr storage<limbs_count> montgomery_r_inv = {${fq_montgomery_r_inv}};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
static constexpr uint32_t i_squared = ${nonresidue};
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> g1_gen_x = {${fq_gen_x}};
static constexpr storage<limbs_count> g1_gen_y = {${fq_gen_y}};
static constexpr storage<limbs_count> g2_gen_x_re = {${fq_gen_x_re}};
static constexpr storage<limbs_count> g2_gen_x_im = {${fq_gen_x_im}};
static constexpr storage<limbs_count> g2_gen_y_re = {${fq_gen_y_re}};
static constexpr storage<limbs_count> g2_gen_y_im = {${fq_gen_y_im}};
static constexpr bool i_squared_is_negative = ${nonresidue_is_negative};
};
// G1 and G2 generators
static constexpr storage<fq_config::limbs_count> g1_gen_x = {${fq_gen_x}};
static constexpr storage<fq_config::limbs_count> g1_gen_y = {${fq_gen_y}};
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {${fq_gen_x_re}};
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {${fq_gen_x_im}};
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {${fq_gen_y_re}};
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {${fq_gen_y_im}};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {${weier_b}};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {${weier_b_g2_re}};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {${weier_b_g2_im}};

View File

@@ -41,9 +41,9 @@ extern "C" int projective_from_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::projectiv
}
}
extern "C" int random_scalar_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_field_t* out) {
extern "C" int random_scalar_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* out) {
try {
out[0] = ${CURVE_NAME_U}::scalar_field_t::rand_host();
out[0] = ${CURVE_NAME_U}::scalar_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());

View File

@@ -1,3 +1,4 @@
#include "bls12_377/supported_operations.cu"
#include "bls12_381/supported_operations.cu"
#include "bn254/supported_operations.cu"
#include "bn254/supported_operations.cu"
// #include "bw6_761/supported_operations.cu"

View File

@@ -34,10 +34,6 @@ public:
return Field{scalar};
}
static constexpr HOST_DEVICE_INLINE Field generator_x() { return Field{CONFIG::g1_gen_x}; }
static constexpr HOST_DEVICE_INLINE Field generator_y() { return Field{CONFIG::g1_gen_y}; }
static HOST_INLINE Field omega(uint32_t logn)
{
if (logn == 0) { return Field{CONFIG::one}; }

View File

@@ -2,7 +2,7 @@
#include "affine.cuh"
template <typename FF, class SCALAR_FF, const FF& B_VALUE>
template <typename FF, class SCALAR_FF, const FF& B_VALUE, const FF& GENERATOR_X, const FF& GENERATOR_Y>
class Projective
{
friend Affine<FF>;
@@ -22,7 +22,7 @@ public:
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF>& point) { return {point.x, point.y, FF::one()}; }
static HOST_DEVICE_INLINE Projective generator() { return {FF::generator_x(), FF::generator_y(), FF::one()}; }
static HOST_DEVICE_INLINE Projective generator() { return {GENERATOR_X, GENERATOR_Y, FF::one()}; }
static HOST_DEVICE_INLINE Projective neg(const Projective& point) { return {point.x, FF::neg(point.y), point.z}; }

View File

@@ -30,22 +30,22 @@ protected:
projective_t* points2{};
g2_projective_t* g2_points1{};
g2_projective_t* g2_points2{};
scalar_field_t* scalars1{};
scalar_field_t* scalars2{};
scalar_t* scalars1{};
scalar_t* scalars2{};
projective_t* zero_points{};
g2_projective_t* g2_zero_points{};
scalar_field_t* zero_scalars{};
scalar_field_t* one_scalars{};
scalar_t* zero_scalars{};
scalar_t* one_scalars{};
affine_t* aff_points{};
g2_affine_t* g2_aff_points{};
projective_t* res_points1{};
projective_t* res_points2{};
g2_projective_t* g2_res_points1{};
g2_projective_t* g2_res_points2{};
scalar_field_t* res_scalars1{};
scalar_field_t* res_scalars2{};
scalar_field_t::Wide* res_scalars_wide{};
scalar_field_t::Wide* res_scalars_wide_full{};
scalar_t* res_scalars1{};
scalar_t* res_scalars2{};
scalar_t::Wide* res_scalars_wide{};
scalar_t::Wide* res_scalars_wide_full{};
PrimitivesTest()
{
@@ -54,22 +54,22 @@ protected:
assert(!cudaMallocManaged(&points2, n * sizeof(projective_t)));
assert(!cudaMallocManaged(&g2_points1, n * sizeof(g2_projective_t)));
assert(!cudaMallocManaged(&g2_points2, n * sizeof(g2_projective_t)));
assert(!cudaMallocManaged(&scalars1, n * sizeof(scalar_field_t)));
assert(!cudaMallocManaged(&scalars2, n * sizeof(scalar_field_t)));
assert(!cudaMallocManaged(&scalars1, n * sizeof(scalar_t)));
assert(!cudaMallocManaged(&scalars2, n * sizeof(scalar_t)));
assert(!cudaMallocManaged(&zero_points, n * sizeof(projective_t)));
assert(!cudaMallocManaged(&g2_zero_points, n * sizeof(g2_projective_t)));
assert(!cudaMallocManaged(&zero_scalars, n * sizeof(scalar_field_t)));
assert(!cudaMallocManaged(&one_scalars, n * sizeof(scalar_field_t)));
assert(!cudaMallocManaged(&zero_scalars, n * sizeof(scalar_t)));
assert(!cudaMallocManaged(&one_scalars, n * sizeof(scalar_t)));
assert(!cudaMallocManaged(&aff_points, n * sizeof(affine_t)));
assert(!cudaMallocManaged(&g2_aff_points, n * sizeof(g2_affine_t)));
assert(!cudaMallocManaged(&res_points1, n * sizeof(projective_t)));
assert(!cudaMallocManaged(&res_points2, n * sizeof(projective_t)));
assert(!cudaMallocManaged(&g2_res_points1, n * sizeof(g2_projective_t)));
assert(!cudaMallocManaged(&g2_res_points2, n * sizeof(g2_projective_t)));
assert(!cudaMallocManaged(&res_scalars1, n * sizeof(scalar_field_t)));
assert(!cudaMallocManaged(&res_scalars2, n * sizeof(scalar_field_t)));
assert(!cudaMallocManaged(&res_scalars_wide, n * sizeof(scalar_field_t::Wide)));
assert(!cudaMallocManaged(&res_scalars_wide_full, n * sizeof(scalar_field_t::Wide)));
assert(!cudaMallocManaged(&res_scalars1, n * sizeof(scalar_t)));
assert(!cudaMallocManaged(&res_scalars2, n * sizeof(scalar_t)));
assert(!cudaMallocManaged(&res_scalars_wide, n * sizeof(scalar_t::Wide)));
assert(!cudaMallocManaged(&res_scalars_wide_full, n * sizeof(scalar_t::Wide)));
}
~PrimitivesTest() override
@@ -105,22 +105,22 @@ protected:
ASSERT_EQ(device_populate_random<projective_t>(points2, n), cudaSuccess);
ASSERT_EQ(device_populate_random<g2_projective_t>(g2_points1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<g2_projective_t>(g2_points2, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_field_t>(scalars1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_field_t>(scalars2, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_t>(scalars1, n), cudaSuccess);
ASSERT_EQ(device_populate_random<scalar_t>(scalars2, n), cudaSuccess);
ASSERT_EQ(device_set<projective_t>(zero_points, projective_t::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<g2_projective_t>(g2_zero_points, g2_projective_t::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_field_t>(zero_scalars, scalar_field_t::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_field_t>(one_scalars, scalar_field_t::one(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_t>(zero_scalars, scalar_t::zero(), n), cudaSuccess);
ASSERT_EQ(device_set<scalar_t>(one_scalars, scalar_t::one(), n), cudaSuccess);
ASSERT_EQ(cudaMemset(aff_points, 0, n * sizeof(affine_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_aff_points, 0, n * sizeof(g2_affine_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_points1, 0, n * sizeof(projective_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_points2, 0, n * sizeof(projective_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_res_points1, 0, n * sizeof(g2_projective_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(g2_res_points2, 0, n * sizeof(g2_projective_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars1, 0, n * sizeof(scalar_field_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars2, 0, n * sizeof(scalar_field_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars_wide, 0, n * sizeof(scalar_field_t::Wide)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars_wide_full, 0, n * sizeof(scalar_field_t::Wide)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars1, 0, n * sizeof(scalar_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars2, 0, n * sizeof(scalar_t)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars_wide, 0, n * sizeof(scalar_t::Wide)), cudaSuccess);
ASSERT_EQ(cudaMemset(res_scalars_wide_full, 0, n * sizeof(scalar_t::Wide)), cudaSuccess);
}
};
@@ -324,19 +324,19 @@ TEST_F(PrimitivesTest, MP_LSB_MULT)
// LSB multiply, check correctness of first TLC + 1 digits result.
ASSERT_EQ(mp_lsb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
std::cout << "first GPU lsb mult output = 0x";
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
for (int i = 0; i < 2 * scalar_t::TLC; i++) {
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i];
}
std::cout << std::endl;
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
std::cout << "first GPU full mult output = 0x";
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
for (int i = 0; i < 2 * scalar_t::TLC; i++) {
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i];
}
std::cout << std::endl;
for (int j = 0; j < n; j++) {
for (int i = 0; i < scalar_field_t::TLC + 1; i++) {
for (int i = 0; i < scalar_t::TLC + 1; i++) {
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
}
}
@@ -347,20 +347,20 @@ TEST_F(PrimitivesTest, MP_MSB_MULT)
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
ASSERT_EQ(mp_msb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
std::cout << "first GPU msb mult output = 0x";
for (int i = 2 * scalar_field_t::TLC - 1; i >= 0; i--) {
for (int i = 2 * scalar_t::TLC - 1; i >= 0; i--) {
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
std::cout << "first GPU full mult output = 0x";
for (int i = 2 * scalar_field_t::TLC - 1; i >= 0; i--) {
for (int i = 2 * scalar_t::TLC - 1; i >= 0; i--) {
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
for (int i = 0; i < 2 * scalar_field_t::TLC - 1; i++) {
for (int i = 0; i < 2 * scalar_t::TLC - 1; i++) {
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
std::cout << "matched word idx = " << i << std::endl;
}
@@ -371,25 +371,25 @@ TEST_F(PrimitivesTest, INGO_MP_MULT)
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
ASSERT_EQ(ingo_mp_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
std::cout << "INGO = 0x";
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
for (int i = 0; i < 2 * scalar_t::TLC; i++) {
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
std::cout << "ZKSYNC = 0x";
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
for (int i = 0; i < 2 * scalar_t::TLC; i++) {
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
}
std::cout << std::endl;
for (int i = 0; i < 2 * scalar_field_t::TLC - 1; i++) {
for (int i = 0; i < 2 * scalar_t::TLC - 1; i++) {
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
std::cout << "matched word idx = " << i << std::endl;
}
for (int j = 0; j < n; j++) {
for (int i = 0; i < 2 * scalar_field_t::TLC - 1; i++) {
for (int i = 0; i < 2 * scalar_t::TLC - 1; i++) {
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
}
}

View File

@@ -4,13 +4,13 @@
#define G2_DEFINED
// TODO: change the curve depending on env variable
#include "../curves/bn254/curve_config.cuh"
#include "../curves/bw6_761/curve_config.cuh"
#include "extension_field.cuh"
#include "projective.cuh"
#endif
using namespace BN254;
using namespace BW6_761;
template <class T1, class T2>
__global__ void add_elements_kernel(const T1* x, const T2* y, T1* result, const unsigned count)
@@ -76,28 +76,28 @@ int vec_mul(const F* x, const G* y, G* result, const unsigned count)
return error ? error : cudaDeviceSynchronize();
}
__global__ void inv_field_elements_kernel(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
__global__ void inv_field_elements_kernel(const scalar_t* x, scalar_t* result, const unsigned count)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
if (gid >= count) return;
result[gid] = scalar_field_t::inverse(x[gid]);
result[gid] = scalar_t::inverse(x[gid]);
}
int field_vec_inv(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
int field_vec_inv(const scalar_t* x, scalar_t* result, const unsigned count)
{
inv_field_elements_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void sqr_field_elements_kernel(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
__global__ void sqr_field_elements_kernel(const scalar_t* x, scalar_t* result, const unsigned count)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
if (gid >= count) return;
result[gid] = scalar_field_t::sqr(x[gid]);
result[gid] = scalar_t::sqr(x[gid]);
}
int field_vec_sqr(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
int field_vec_sqr(const scalar_t* x, scalar_t* result, const unsigned count)
{
sqr_field_elements_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
int error = cudaGetLastError();
@@ -120,78 +120,78 @@ int point_vec_to_affine(const P* x, A* result, const unsigned count)
return error ? error : cudaDeviceSynchronize();
}
__global__ void mp_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
__global__ void mp_mult_kernel(const scalar_t* x, const scalar_t* y, scalar_t::Wide* result)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field_t::multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
scalar_t::multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int mp_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
int mp_mult(const scalar_t* x, scalar_t* y, scalar_t::Wide* result)
{
mp_mult_kernel<<<1, 32>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void mp_lsb_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
__global__ void mp_lsb_mult_kernel(const scalar_t* x, const scalar_t* y, scalar_t::Wide* result)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field_t::multiply_lsb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
scalar_t::multiply_lsb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int mp_lsb_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
int mp_lsb_mult(const scalar_t* x, scalar_t* y, scalar_t::Wide* result)
{
mp_lsb_mult_kernel<<<1, 32>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void mp_msb_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
__global__ void mp_msb_mult_kernel(const scalar_t* x, const scalar_t* y, scalar_t::Wide* result)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field_t::multiply_msb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
scalar_t::multiply_msb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int mp_msb_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
int mp_msb_mult(const scalar_t* x, scalar_t* y, scalar_t::Wide* result)
{
mp_msb_mult_kernel<<<1, 1>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void ingo_mp_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
__global__ void ingo_mp_mult_kernel(const scalar_t* x, const scalar_t* y, scalar_t::Wide* result)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field_t::ingo_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
scalar_t::ingo_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int ingo_mp_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
int ingo_mp_mult(const scalar_t* x, scalar_t* y, scalar_t::Wide* result)
{
ingo_mp_mult_kernel<<<1, 32>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void ingo_mp_msb_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
__global__ void ingo_mp_msb_mult_kernel(const scalar_t* x, const scalar_t* y, scalar_t::Wide* result)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
scalar_field_t::ingo_msb_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
scalar_t::ingo_msb_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
}
int ingo_mp_msb_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result, const unsigned n)
int ingo_mp_msb_mult(const scalar_t* x, scalar_t* y, scalar_t::Wide* result, const unsigned n)
{
ingo_mp_msb_mult_kernel<<<1, n>>>(x, y, result);
int error = cudaGetLastError();
return error ? error : cudaDeviceSynchronize();
}
__global__ void ingo_mp_mod_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t* result)
__global__ void ingo_mp_mod_mult_kernel(const scalar_t* x, const scalar_t* y, scalar_t* result)
{
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
result[gid] = x[gid] * y[gid];
}
int ingo_mp_mod_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t* result, const unsigned n)
int ingo_mp_mod_mult(const scalar_t* x, scalar_t* y, scalar_t* result, const unsigned n)
{
ingo_mp_mod_mult_kernel<<<1, n>>>(x, y, result);
int error = cudaGetLastError();

View File

@@ -61,6 +61,7 @@
#include "../curves/bls12_377/curve_config.cuh"
#include "../curves/bls12_381/curve_config.cuh"
#include "../curves/bn254/curve_config.cuh"
#include "../curves/bw6_761/curve_config.cuh"
/** @brief Wrapper class for templatized dynamic shared memory arrays.
*
@@ -269,6 +270,25 @@ struct SharedMemory<BN254::projective_t> {
return s_projective_t_bn254;
}
};
template <>
struct SharedMemory<BW6_761::scalar_t> {
__device__ BW6_761::scalar_t* getPointer()
{
extern __shared__ BW6_761::scalar_t s_scalar_t_bw6_761[];
return s_scalar_t_bw6_761;
}
};
template <>
struct SharedMemory<BW6_761::projective_t> {
__device__ BW6_761::projective_t* getPointer()
{
extern __shared__ BW6_761::projective_t s_projective_t_bw6_761[];
return s_projective_t_bw6_761;
}
};
#endif //_SHAREDMEM_H_
// Leave this at the end of the file

View File

@@ -1580,9 +1580,7 @@ pub(crate) mod tests_bls12_377 {
}
}
// https://github.com/ingonyama-zk/icicle/issues/218
#[test]
#[ignore]
fn test_point_batch_evaluation_on_coset() {
// checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup
let batch_size = 2;