Compare commits

...

35 Commits

Author SHA1 Message Date
guy-ingo
4f2e308707 Merge remote-tracking branch 'origin/dev' into halo2 2023-08-28 14:18:09 +03:00
DmytroTym
78e20f9add Minimal correct MSM (#162) 2023-08-28 10:14:54 +03:00
ImmanuelSegol
7a2fa20da7 Remove decimation from API (#165)
Resolves #154
2023-08-27 14:08:56 +03:00
ImmanuelSegol
27627ed2c1 refactor (#158) 2023-08-24 12:02:43 +03:00
guy-ingo
7bbcd03721 update ntt 2023-08-22 15:09:38 +03:00
Jeremy Felder
b6c87c3fd8 Fix formatting for all files (#153) 2023-08-20 11:35:28 +03:00
guy-ingo
03dd2ea520 merging 2023-08-17 16:59:03 +03:00
guy-ingo
f1984cfee2 twiddle factors, ntt function, conversions 2023-08-17 16:56:11 +03:00
Leon Hibnik
e04bd928e6 Merge pull request #145 from ingonyama-zk/fix/goicicle-setup-script
setup.sh update
2023-08-17 12:08:24 +03:00
Leon Hibnik
cb6ed6af59 Merge branch 'dev' into fix/goicicle-setup-script 2023-08-17 12:07:07 +03:00
Jeremy Felder
9ea3350589 Add language formatters (#132) 2023-08-17 09:41:58 +03:00
Leon Hibnik
f38a9a322c Merge branch 'dev' into fix/goicicle-setup-script 2023-08-16 16:43:32 +03:00
ImmanuelSegol
ad1e482252 missing functions (#152) 2023-08-16 16:38:20 +03:00
Jeremy Felder
273bd536db Merge branch 'dev' into fix/goicicle-setup-script 2023-08-16 16:14:36 +03:00
Jeremy Felder
1463edc413 CI: Run linux on self-hosted, Make windows download smaller and remove caching (#150) (#151) 2023-08-16 16:14:22 +03:00
Jeremy Felder
db93204dc7 CI: Run linux on self-hosted, Make windows download smaller and remove caching (#150) 2023-08-16 15:16:32 +03:00
ImmanuelSegol
e1b692b8ed Merge branch 'dev' into fix/goicicle-setup-script 2023-08-16 12:59:44 +03:00
Leon Hibnik
e6416f4110 Merge pull request #146 from ingonyama-zk/fix/zeroedgecase
bucket_method_msm - address 0 edge case
2023-08-16 12:00:46 +03:00
ImmanuelSegol
96facd58d5 refactor: dont throw error when all scalars are 0 2023-08-15 19:53:45 +03:00
Leon Hibnik
11fe11b071 Merge branch 'dev' into fix/goicicle-setup-script 2023-08-15 14:16:54 +03:00
DmytroTym
19d0730aad Correct MSM for weird scalar distributions (#143) 2023-08-15 13:14:46 +03:00
LeonHibnik
36133ba26c setup.sh update 2023-08-15 12:14:06 +03:00
ImmanuelSegol
a1d9fa6648 fix - add missing go wrappers for all curves + add missing constants for curves (#130) 2023-08-14 13:15:56 +03:00
Vitalii Hnatyk
2f21ec4aa7 large_msm compilation hotfix (#131)
hotfix for missing parameter in large_msm
2023-07-27 10:05:45 +02:00
Jeremy Felder
5b504c44b7 Fix badges 2023-07-20 08:57:50 +03:00
Jeremy Felder
d13143506e writing .so file requires sudo 2023-07-19 21:44:12 +03:00
ImmanuelSegol
94c73e637c Fix/cudacodegoicile (#128)
* refactor

* refactor

* Refactor

* Refactor

* refactor: add sh script

* refactor

* refactor

* refactor: fix path
2023-07-19 21:30:20 +03:00
Jeremy Felder
b71b041561 Integrate msm performance improvements (#129) 2023-07-19 16:32:59 +03:00
ImmanuelSegol
a9d6ac0e27 move header file import (#127) 2023-07-19 08:33:05 +03:00
ImmanuelSegol
8a11a2f60e some minor changes (#125)
Co-authored-by: Jeremy Felder <jeremy.felder1@gmail.com>
2023-07-18 20:17:16 +03:00
ImmanuelSegol
ab69139ade Goicicle (#77) 2023-07-16 14:31:41 +03:00
Vitalii Hnatyk
7a8191bcb4 NTT improvements (shared mem + inplace) (#116)
Resolves #112
2023-07-16 13:56:20 +03:00
Jeremy Felder
2a3f5a258a Merge pull request #106 from gkigiermo/fix/cuda-test-suite
Resolves #103
2023-06-15 11:48:36 +03:00
Guillermo Oyarzun
9023daeb4f Add c++17 requirement to cmake 2023-06-15 10:06:02 +02:00
Guillermo Oyarzun
4d83ba101c Fix curve config location and link to some namespace struct members 2023-06-13 23:52:48 +02:00
155 changed files with 27633 additions and 17414 deletions

38
.clang-format Normal file
View File

@@ -0,0 +1,38 @@
Language: Cpp
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveMacros: true
AlignTrailingComments: true
AllowAllParametersOfDeclarationOnNextLine: true
AllowShortBlocksOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: All
AllowShortIfStatementsOnASingleLine: true
AlwaysBreakTemplateDeclarations: true
BinPackArguments: true
BinPackParameters: false
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: true
AfterFunction: true
BreakBeforeBinaryOperators: false
BreakBeforeTernaryOperators: true
ColumnLimit: 120
ContinuationIndentWidth: 2
Cpp11BracedListStyle: true
DisableFormat: false
IndentFunctionDeclarationAfterType: false
IndentWidth: 2
KeepEmptyLinesAtTheStartOfBlocks: false
MaxEmptyLinesToKeep: 1
NamespaceIndentation: All
PointerAlignment: Left
SpaceBeforeAssignmentOperators: true
SpaceBeforeParens: ControlStatements
SpaceInEmptyParentheses: false
SpacesBeforeTrailingComments: 1
SpacesInAngles: false
SpacesInContainerLiterals: false
SpacesInCStyleCastParentheses: false
SpacesInParentheses: false
Standard: c++17
UseTab: Never

40
.github/workflows/build-dev.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
name: Build Dev
on:
pull_request:
branches:
- dev
paths:
- icicle/**
- src/**
- Cargo.toml
- build.rs
env:
CARGO_TERM_COLOR: always
ARCH_TYPE: sm_70
jobs:
build-linux:
runs-on: [self-hosted, Linux, X64, icicle]
steps:
- name: Checkout Repo
uses: actions/checkout@v3
- name: Build
run: cargo build --release --verbose
build-windows:
runs-on: windows-2022
steps:
- name: Checkout Repo
uses: actions/checkout@v3
- name: Download and Install Cuda
uses: Jimver/cuda-toolkit@v0.2.11
with:
cuda: '12.0.0'
method: 'network'
# https://docs.nvidia.com/cuda/archive/12.0.0/cuda-installation-guide-microsoft-windows/index.html
sub-packages: '["cudart", "nvcc", "thrust"]'
- name: Build
run: cargo build --release --verbose

View File

@@ -7,16 +7,6 @@ on:
- opened
branches:
- main
- dev
paths:
- icicle/**
- src/**
- Cargo.toml
- build.rs
push:
branches-ignore:
- main
- dev
paths:
- icicle/**
- src/**
@@ -29,32 +19,25 @@ env:
jobs:
build-linux:
runs-on: ubuntu-latest
runs-on: [self-hosted, Linux, X64, icicle]
steps:
# Checkout code
- uses: actions/checkout@v3
# Download (or from cache) and install CUDA Toolkit 12.1.0
- uses: Jimver/cuda-toolkit@v0.2.9
id: cuda-toolkit
with:
cuda: '12.1.0'
use-github-cache: true
# Build from cargo - Rust utils are preinstalled on latest images
# https://github.com/actions/runner-images/blob/main/images/linux/Ubuntu2204-Readme.md#rust-tools
- name: Checkout Repo
uses: actions/checkout@v3
- name: Build
run: cargo build --release --verbose
build-windows:
runs-on: windows-latest
runs-on: windows-2022
steps:
- uses: actions/checkout@v3
- uses: Jimver/cuda-toolkit@v0.2.9
id: cuda-toolkit
- name: Checkout Repo
uses: actions/checkout@v3
- name: Download and Install Cuda
uses: Jimver/cuda-toolkit@v0.2.11
with:
cuda: '12.1.0'
use-github-cache: true
cuda: '12.0.0'
method: 'network'
# https://docs.nvidia.com/cuda/archive/12.0.0/cuda-installation-guide-microsoft-windows/index.html
sub-packages: '["cudart", "nvcc", "thrust"]'
- name: Build
run: cargo build --release --verbose

3
.gitignore vendored
View File

@@ -5,6 +5,9 @@
*.cubin
*.bin
*.fatbin
*.so
*.nsys-rep
*.ncu-rep
**/target
**/.vscode
**/.*lock*csv#

10
.rustfmt.toml Normal file
View File

@@ -0,0 +1,10 @@
# https://github.com/rust-lang/rustfmt/blob/master/Configurations.md
# Stable Configs
chain_width = 0
max_width = 120
merge_derives = true
use_field_init_shorthand = true
use_try_shorthand = true
# Unstable Configs

View File

@@ -6,9 +6,11 @@
<div align="center">
![Build status](https://github.com/ingonyama-zk/icicle/actions/workflows/buil.yml/badge.svg)
![Build status](https://github.com/ingonyama-zk/icicle/actions/workflows/build.yml/badge.svg)
![Discord server](https://img.shields.io/discord/1063033227788423299?label=Discord&logo=Discord&logoColor=%23&style=plastic)
![Follow us on twitter](https://img.shields.io/twitter/follow/Ingo_zk?style=social)
</div>
## Background
@@ -161,6 +163,16 @@ Testing the new curve could be done by running the tests in ``tests_curve_name``
Join our [Discord Server][DISCORD] and find us on the icicle channel. We will be happy to work together to support your use case and talk features, bugs and design.
### Development Contributions
If you are changing code, please make sure to change your [git hooks path][HOOKS_DOCS] to the repo's [hooks directory][HOOKS_PATH] by running the following command:
```sh
git config core.hooksPath ./scripts/hooks
```
This will ensure our custom hooks are run and will make it easier to follow our coding guidelines.
### Hall of Fame
- [Robik](https://github.com/robik75), for his on-going support and mentorship
@@ -183,5 +195,7 @@ See [LICENSE-MIT][LMIT] for details.
[LMIT]: ./LICENSE
[DISCORD]: https://discord.gg/Y4SkbDf2Ff
[googletest]: https://github.com/google/googletest/
[HOOKS_DOCS]: https://git-scm.com/docs/githooks
[HOOKS_PATH]: ./scripts/hooks/
<!-- End Links -->

View File

@@ -34,16 +34,20 @@ fn bench_msm(c: &mut Criterion) {
#[cfg(feature = "g2")]
let mut d_g2_points = DeviceBuffer::from_slice(&g2_batch_points[..]).unwrap();
group.sample_size(30).bench_function(
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch_bls12_381(&mut d_points, &mut d_scalars, batch_size)),
);
group
.sample_size(30)
.bench_function(
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch_bls12_381(&mut d_points, &mut d_scalars, batch_size)),
);
#[cfg(feature = "g2")]
group.sample_size(10).bench_function(
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size))
);
group
.sample_size(10)
.bench_function(
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size)),
);
}
}
}

View File

@@ -2,33 +2,84 @@ extern crate criterion;
use criterion::{criterion_group, criterion_main, Criterion};
use icicle_utils::test_bls12_381::{interpolate_scalars_batch_bls12_381, interpolate_points_batch_bls12_381, set_up_scalars_bls12_381, set_up_points_bls12_381};
use icicle_utils::test_bls12_381::*;
const LOG_NTT_SIZES: [usize; 1] = [15];
const BATCH_SIZES: [usize; 2] = [8, 16];
const LOG_NTT_SIZES: [usize; 3] = [20, 9, 10];
const BATCH_SIZES: [usize; 3] = [1, 512, 1024];
fn bench_ntt(c: &mut Criterion) {
let mut group = c.benchmark_group("NTT");
for log_ntt_size in LOG_NTT_SIZES {
for batch_size in BATCH_SIZES {
let ntt_size = 1 << log_ntt_size;
if ntt_size * batch_size > 1 << 25 {
continue;
}
let scalar_samples = 20;
let (_, mut d_evals, mut d_domain) = set_up_scalars_bls12_381(ntt_size * batch_size, log_ntt_size, true);
let (_, mut d_points_evals, _) = set_up_points_bls12_381(ntt_size * batch_size, log_ntt_size, true);
group.sample_size(100).bench_function(
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size))
);
group
.sample_size(scalar_samples)
.bench_function(
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| evaluate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size)),
);
group.sample_size(10).bench_function(
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size))
);
group
.sample_size(scalar_samples)
.bench_function(
&format!("Scalar iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size)),
);
group
.sample_size(scalar_samples)
.bench_function(
&format!("Scalar inplace NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| ntt_inplace_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size, false, 0)),
);
group
.sample_size(scalar_samples)
.bench_function(
&format!("Scalar inplace iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| ntt_inplace_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size, true, 0)),
);
drop(d_evals);
drop(d_domain);
if ntt_size * batch_size > 1 << 18 {
continue;
}
let point_samples = 10;
let (_, mut d_points_evals, mut d_domain) =
set_up_points_bls12_381(ntt_size * batch_size, log_ntt_size, true);
group
.sample_size(point_samples)
.bench_function(
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| interpolate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size)),
);
group
.sample_size(point_samples)
.bench_function(
&format!("EC iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|b| b.iter(|| evaluate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size)),
);
drop(d_points_evals);
drop(d_domain);
}
}
}
criterion_group!(ntt_benches, bench_ntt);
criterion_main!(ntt_benches);

View File

@@ -26,8 +26,6 @@ fn main() {
nvcc.debug(false);
nvcc.flag(&arch);
nvcc.flag(&stream);
nvcc.files([
"./icicle/curves/index.cu",
]);
nvcc.files(["./icicle/curves/index.cu"]);
nvcc.compile("ingo_icicle"); //TODO: extension??
}

View File

@@ -3,11 +3,11 @@
"modulus_p" : 21888242871839275222246405745257275088548364400416034343698204186575808495617,
"bit_count_p" : 254,
"limb_p" : 8,
"ntt_size" : 28,
"ntt_size" : 20,
"modulus_q" : 21888242871839275222246405745257275088696311157297823662689037894645226208583,
"bit_count_q" : 254,
"limb_q" : 8,
"root_of_unity" : 19103219067921713944291392827692070036145651957329286315305642004821462161904,
"root_of_unity" : 19032961837237948602743626455740240236231119053033140765040043513661803148152,
"weierstrass_b" : 3,
"weierstrass_b_g2_re" : 19485874751759354771024239261021720505790618469301721065564631296452457478373,
"weierstrass_b_g2_im" : 266929791119991161246907387137283842545076965332900288569378510910307636690,

View File

@@ -204,14 +204,14 @@ newpath = f'./icicle/curves/{curve_name_lower}'
if not os.path.exists(newpath):
os.makedirs(newpath)
with open("./icicle/curves/curve_template/params.cuh", "r") as params_file:
with open("./icicle/curves/curve_template/params.cuh.tmpl", "r") as params_file:
params_file_template = Template(params_file.read())
params = get_params(config)
params_content = params_file_template.safe_substitute(params)
with open(f'./icicle/curves/{curve_name_lower}/params.cuh', 'w') as f:
f.write(params_content)
with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
with open("./icicle/curves/curve_template/lde.cu.tmpl", "r") as lde_file:
template_content = Template(lde_file.read())
lde_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
@@ -220,7 +220,7 @@ with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
with open(f'./icicle/curves/{curve_name_lower}/lde.cu', 'w') as f:
f.write(lde_content)
with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
with open("./icicle/curves/curve_template/msm.cu.tmpl", "r") as msm_file:
template_content = Template(msm_file.read())
msm_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
@@ -229,7 +229,7 @@ with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
with open(f'./icicle/curves/{curve_name_lower}/msm.cu', 'w') as f:
f.write(msm_content)
with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
with open("./icicle/curves/curve_template/ve_mod_mult.cu.tmpl", "r") as ve_mod_mult_file:
template_content = Template(ve_mod_mult_file.read())
ve_mod_mult_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
@@ -239,7 +239,7 @@ with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_f
f.write(ve_mod_mult_content)
with open(f'./icicle/curves/curve_template/curve_config.cuh', 'r') as cc:
with open(f'./icicle/curves/curve_template/curve_config.cuh.tmpl', 'r') as cc:
template_content = Template(cc.read())
cc_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
@@ -248,7 +248,7 @@ with open(f'./icicle/curves/curve_template/curve_config.cuh', 'r') as cc:
f.write(cc_content)
with open(f'./icicle/curves/curve_template/projective.cu', 'r') as proj:
with open(f'./icicle/curves/curve_template/projective.cu.tmpl', 'r') as proj:
template_content = Template(proj.read())
proj_content = template_content.safe_substitute(
CURVE_NAME_U=curve_name_upper,
@@ -258,7 +258,7 @@ with open(f'./icicle/curves/curve_template/projective.cu', 'r') as proj:
f.write(proj_content)
with open(f'./icicle/curves/curve_template/supported_operations.cu', 'r') as supp_ops:
with open(f'./icicle/curves/curve_template/supported_operations.cu.tmpl', 'r') as supp_ops:
template_content = Template(supp_ops.read())
supp_ops_content = template_content.safe_substitute()
with open(f'./icicle/curves/{curve_name_lower}/supported_operations.cu', 'w') as f:
@@ -313,4 +313,4 @@ with open('./src/curves/mod.rs', 'r+') as f:
with open('./src/lib.rs', 'r+') as f:
lib_text = f.read()
if lib_text.find(curve_name_lower) == -1:
f.write('\npub mod ' + curve_name_lower + ';')
f.write('\npub mod ' + curve_name_lower + ';')

156
examples/ntt/main.rs Normal file
View File

@@ -0,0 +1,156 @@
use std::time::Instant;
use icicle_utils::{curves::bls12_381::ScalarField_BLS12_381, test_bls12_381::*};
use rustacuda::prelude::DeviceBuffer;
const LOG_NTT_SIZES: [usize; 3] = [20, 10, 9];
const BATCH_SIZES: [usize; 3] = [1, 1 << 9, 1 << 10];
const MAX_POINTS_LOG2: usize = 18;
const MAX_SCALARS_LOG2: usize = 26;
fn bench_lde() {
for log_ntt_size in LOG_NTT_SIZES {
for batch_size in BATCH_SIZES {
let ntt_size = 1 << log_ntt_size;
fn ntt_scalars_batch_bls12_381(
d_inout: &mut DeviceBuffer<ScalarField_BLS12_381>,
d_twiddles: &mut DeviceBuffer<ScalarField_BLS12_381>,
batch_size: usize,
) -> i32 {
ntt_inplace_batch_bls12_381(d_inout, d_twiddles, batch_size, false, 0);
0
}
fn intt_scalars_batch_bls12_381(
d_inout: &mut DeviceBuffer<ScalarField_BLS12_381>,
d_twiddles: &mut DeviceBuffer<ScalarField_BLS12_381>,
batch_size: usize,
) -> i32 {
ntt_inplace_batch_bls12_381(d_inout, d_twiddles, batch_size, true, 0);
0
}
// copy
bench_ntt_template(
MAX_SCALARS_LOG2,
ntt_size,
batch_size,
log_ntt_size,
set_up_scalars_bls12_381,
evaluate_scalars_batch_bls12_381,
"NTT",
false,
100,
);
bench_ntt_template(
MAX_SCALARS_LOG2,
ntt_size,
batch_size,
log_ntt_size,
set_up_scalars_bls12_381,
interpolate_scalars_batch_bls12_381,
"iNTT",
true,
100,
);
bench_ntt_template(
MAX_POINTS_LOG2,
ntt_size,
batch_size,
log_ntt_size,
set_up_points_bls12_381,
evaluate_points_batch_bls12_381,
"EC NTT",
false,
20,
);
bench_ntt_template(
MAX_POINTS_LOG2,
ntt_size,
batch_size,
log_ntt_size,
set_up_points_bls12_381,
interpolate_points_batch_bls12_381,
"EC iNTT",
true,
20,
);
// inplace
bench_ntt_template(
MAX_SCALARS_LOG2,
ntt_size,
batch_size,
log_ntt_size,
set_up_scalars_bls12_381,
ntt_scalars_batch_bls12_381,
"NTT inplace",
false,
100,
);
bench_ntt_template(
MAX_SCALARS_LOG2,
ntt_size,
batch_size,
log_ntt_size,
set_up_scalars_bls12_381,
intt_scalars_batch_bls12_381,
"iNTT inplace",
true,
100,
);
}
}
}
fn bench_ntt_template<E, S, R>(
log_max_size: usize,
ntt_size: usize,
batch_size: usize,
log_ntt_size: usize,
set_data: fn(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<E>, DeviceBuffer<E>, DeviceBuffer<S>),
bench_fn: fn(d_evaluations: &mut DeviceBuffer<E>, d_domain: &mut DeviceBuffer<S>, batch_size: usize) -> R,
id: &str,
inverse: bool,
samples: usize,
) -> Option<(Vec<E>, R)> {
let count = ntt_size * batch_size;
let bench_id = format!("{} of size 2^{} in batch {}", id, log_ntt_size, batch_size);
if count > 1 << log_max_size {
println!("Bench size exceeded: {}", bench_id);
return None;
}
println!("{}", bench_id);
let (input, mut d_evals, mut d_domain) = set_data(ntt_size * batch_size, log_ntt_size, inverse);
let first = bench_fn(&mut d_evals, &mut d_domain, batch_size);
let start = Instant::now();
for _ in 0..samples {
bench_fn(&mut d_evals, &mut d_domain, batch_size);
}
let elapsed = start.elapsed();
println!(
"{} {:0?} us x {} = {:?}",
bench_id,
elapsed.as_micros() as f32 / (samples as f32),
samples,
elapsed
);
Some((input, first))
}
fn main() {
bench_lde();
}

23
go.mod Normal file
View File

@@ -0,0 +1,23 @@
module github.com/ingonyama-zk/icicle
go 1.20
require github.com/consensys/gnark-crypto v0.11.0
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/jfeliu007/goplantuml v1.6.2 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/spf13/afero v1.8.2 // indirect
golang.org/x/text v0.3.7 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
require (
github.com/bits-and-blooms/bitset v1.5.0 // indirect
github.com/consensys/bavard v0.1.13
github.com/mmcloughlin/addchain v0.4.0 // indirect
github.com/stretchr/testify v1.8.3
golang.org/x/sys v0.2.0 // indirect
rsc.io/tmplfunc v0.0.3 // indirect
)

459
go.sum Normal file
View File

@@ -0,0 +1,459 @@
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
cloud.google.com/go v0.44.1/go.mod h1:iSa0KzasP4Uvy3f1mN/7PiObzGgflwredwwASm/v6AU=
cloud.google.com/go v0.44.2/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
cloud.google.com/go v0.44.3/go.mod h1:60680Gw3Yr4ikxnPRS/oxxkBccT6SA1yMk63TGekxKY=
cloud.google.com/go v0.45.1/go.mod h1:RpBamKRgapWJb87xiFSdk4g1CME7QZg3uwTez+TSTjc=
cloud.google.com/go v0.46.3/go.mod h1:a6bKKbmY7er1mI7TEI4lsAkts/mkhTSZK8w33B4RAg0=
cloud.google.com/go v0.50.0/go.mod h1:r9sluTvynVuxRIOHXQEHMFffphuXHOMZMycpNR5e6To=
cloud.google.com/go v0.52.0/go.mod h1:pXajvRH/6o3+F9jDHZWQ5PbGhn+o8w9qiu/CffaVdO4=
cloud.google.com/go v0.53.0/go.mod h1:fp/UouUEsRkN6ryDKNW/Upv/JBKnv6WDthjR6+vze6M=
cloud.google.com/go v0.54.0/go.mod h1:1rq2OEkV3YMf6n/9ZvGWI3GWw0VoqH/1x2nd8Is/bPc=
cloud.google.com/go v0.56.0/go.mod h1:jr7tqZxxKOVYizybht9+26Z/gUq7tiRzu+ACVAMbKVk=
cloud.google.com/go v0.57.0/go.mod h1:oXiQ6Rzq3RAkkY7N6t3TcE6jE+CIBBbA36lwQ1JyzZs=
cloud.google.com/go v0.62.0/go.mod h1:jmCYTdRCQuc1PHIIJ/maLInMho30T/Y0M4hTdTShOYc=
cloud.google.com/go v0.65.0/go.mod h1:O5N8zS7uWy9vkA9vayVHs65eM1ubvY4h553ofrNHObY=
cloud.google.com/go v0.72.0/go.mod h1:M+5Vjvlc2wnp6tjzE102Dw08nGShTscUx2nZMufOKPI=
cloud.google.com/go v0.74.0/go.mod h1:VV1xSbzvo+9QJOxLDaJfTjx5e+MePCpCWwvftOeQmWk=
cloud.google.com/go v0.75.0/go.mod h1:VGuuCn7PG0dwsd5XPVm2Mm3wlh3EL55/79EKB6hlPTY=
cloud.google.com/go/bigquery v1.0.1/go.mod h1:i/xbL2UlR5RvWAURpBYZTtm/cXjCha9lbfbpx4poX+o=
cloud.google.com/go/bigquery v1.3.0/go.mod h1:PjpwJnslEMmckchkHFfq+HTD2DmtT67aNFKH1/VBDHE=
cloud.google.com/go/bigquery v1.4.0/go.mod h1:S8dzgnTigyfTmLBfrtrhyYhwRxG72rYxvftPBK2Dvzc=
cloud.google.com/go/bigquery v1.5.0/go.mod h1:snEHRnqQbz117VIFhE8bmtwIDY80NLUZUMb4Nv6dBIg=
cloud.google.com/go/bigquery v1.7.0/go.mod h1://okPTzCYNXSlb24MZs83e2Do+h+VXtc4gLoIoXIAPc=
cloud.google.com/go/bigquery v1.8.0/go.mod h1:J5hqkt3O0uAFnINi6JXValWIb1v0goeZM77hZzJN/fQ=
cloud.google.com/go/datastore v1.0.0/go.mod h1:LXYbyblFSglQ5pkeyhO+Qmw7ukd3C+pD7TKLgZqpHYE=
cloud.google.com/go/datastore v1.1.0/go.mod h1:umbIZjpQpHh4hmRpGhH4tLFup+FVzqBi1b3c64qFpCk=
cloud.google.com/go/pubsub v1.0.1/go.mod h1:R0Gpsv3s54REJCy4fxDixWD93lHJMoZTyQ2kNxGRt3I=
cloud.google.com/go/pubsub v1.1.0/go.mod h1:EwwdRX2sKPjnvnqCa270oGRyludottCI76h+R3AArQw=
cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIAii9o8iA=
cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
cloud.google.com/go/storage v1.8.0/go.mod h1:Wv1Oy7z6Yz3DshWRJFhqM/UCfaWIRTdp0RXyy7KQOVs=
cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9ullr3+Kg0=
cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/bits-and-blooms/bitset v1.5.0 h1:NpE8frKRLGHIcEzkR+gZhiioW1+WbYV6fKwD6ZIpQT8=
github.com/bits-and-blooms/bitset v1.5.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU=
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
github.com/cncf/udpa/go v0.0.0-20200629203442-efcf912fb354/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
github.com/consensys/bavard v0.1.13 h1:oLhMLOFGTLdlda/kma4VOJazblc7IM5y5QPd2A/YjhQ=
github.com/consensys/bavard v0.1.13/go.mod h1:9ItSMtA/dXMAiL7BG6bqW2m3NdSEObYWoH223nGHukI=
github.com/consensys/gnark-crypto v0.11.0 h1:QqzHQlwEqlQr5jfWblGDkwlKHpT+4QodYqqExkAtyks=
github.com/consensys/gnark-crypto v0.11.0/go.mod h1:Iq/P3HHl0ElSjsg2E1gsMwhAyxnxoKK5nVyZKd+/KhU=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/go-control-plane v0.9.7/go.mod h1:cwu0lG7PUMfa9snN8LXBig5ynNVH9qI8YYLbd1fK2po=
github.com/envoyproxy/go-control-plane v0.9.9-0.20201210154907-fd9021fe5dad/go.mod h1:cXg6YxExXjJnVBQHBLXeUAgxn2UodCpnH306RInaBQk=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/groupcache v0.0.0-20190702054246-869f871628b6/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20191227052852-215e87163ea7/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
github.com/golang/mock v1.4.0/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.1/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.3/go.mod h1:UOMv5ysSaYNkG+OFQykRIcU/QvvxJf3p21QfJ2Bt3cw=
github.com/golang/mock v1.4.4/go.mod h1:l3mdAwkq5BuhzHwde/uurv3sEJeZMXNpwsxVWU71h+4=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.3/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.5/go.mod h1:6O5/vntMXwX2lRkT1hjjk0nAC1IDOTvTlVgjlRvqsdk=
github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8=
github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA=
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8=
github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI=
github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.4.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.1/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
github.com/google/martian v2.1.0+incompatible/go.mod h1:9I4somxYTbIHy5NJKHRl3wXiIaQGbYVAs8BPL6v8lEs=
github.com/google/martian/v3 v3.0.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/martian/v3 v3.1.0/go.mod h1:y5Zk1BBys9G+gd6Jrk0W3cC1+ELVxBWuIGO+w/tUAp0=
github.com/google/pprof v0.0.0-20181206194817-3ea8567a2e57/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20190515194954-54271f7e092f/go.mod h1:zfwlbNMJ+OItoe0UupaVj+oy1omPYYDuagoSzA8v9mc=
github.com/google/pprof v0.0.0-20191218002539-d4f498aebedc/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200212024743-f11f1df84d12/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200229191704-1ebb73c60ed3/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200430221834-fc25d7d30c6d/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20200708004538-1a94d8640e99/go.mod h1:ZgVRPoUq/hfqzAqh7sHMqb3I9Rq5C59dIz2SbBwJ4eM=
github.com/google/pprof v0.0.0-20201023163331-3e6fc7fc9c4c/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201203190320-1bf35d6f28c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/pprof v0.0.0-20201218002935-b9804c9f04c2/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE=
github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI=
github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3yTrtFlrHVk=
github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/googleapis/gax-go/v2 v2.0.4/go.mod h1:0Wqv26UfaUD9n4G6kQubkQ+KchISgw+vpHVxEJEs9eg=
github.com/googleapis/gax-go/v2 v2.0.5/go.mod h1:DWXyrwAJ9X0FpwwEdw+IPEYBICEFu5mhpdKc/us6bOk=
github.com/googleapis/google-cloud-go-testing v0.0.0-20200911160855-bcd43fbb19e8/go.mod h1:dvDLG8qkwmyD9a/MJJN3XJcT3xFxOKAvTZGvuZmac9g=
github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/hashicorp/golang-lru v0.5.1/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8=
github.com/ianlancetaylor/demangle v0.0.0-20181102032728-5e5cf60278f6/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc=
github.com/jfeliu007/goplantuml v1.6.2 h1:WggxwLIBLF8P9OU/G/H2+IeHpEx5I7QK780jb78twEM=
github.com/jfeliu007/goplantuml v1.6.2/go.mod h1:GnvyYGyIXD68akNFe2FBlNBypwfbpeNmVUQ4ZxJw8iI=
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
github.com/mmcloughlin/addchain v0.4.0 h1:SobOdjm2xLj1KkXN5/n0xTIWyZA2+s99UCY1iPfkHRY=
github.com/mmcloughlin/addchain v0.4.0/go.mod h1:A86O+tHqZLMNO4w6ZZ4FlVQEadcoqkyU72HC5wJ4RlU=
github.com/mmcloughlin/profile v0.1.1/go.mod h1:IhHD7q1ooxgwTgjxQYkACGA77oFTDdFVejUS1/tS/qU=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/sftp v1.13.1/go.mod h1:3HaPG6Dq1ILlpPZRO0HVMrsydcdLt6HRDccSgb87qRg=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/spf13/afero v1.8.2 h1:xehSyVa0YnHWsJ49JFljMpg1HX19V6NDZ1fkm1Xznbo=
github.com/spf13/afero v1.8.2/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
go.opencensus.io v0.22.5/go.mod h1:5pWMHQbX5EPX2/62yrJeAkowc+lfs/XD7Uxpq3pI6kk=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
golang.org/x/exp v0.0.0-20190829153037-c13cbed26979/go.mod h1:86+5VVa7VpoJ4kLfm080zCjGlMRFzhUhsZKEZO7MGek=
golang.org/x/exp v0.0.0-20191030013958-a1ab85dbe136/go.mod h1:JXzH8nQsPlswgeRAPE3MuO9GYsAcnJvJ4vnMwN/5qkY=
golang.org/x/exp v0.0.0-20191129062945-2f5052295587/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20191227195350-da58074b4299/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200119233911-0405dc783f0a/go.mod h1:2RIsYlXP63K8oxa1u096TMicItID8zy7Y6sNkU49FU4=
golang.org/x/exp v0.0.0-20200207192155-f17229e696bd/go.mod h1:J/WKrq2StrnmMY6+EHIKF9dgMWnmCNThgcyBT1FY9mM=
golang.org/x/exp v0.0.0-20200224162631-6cc2880d07d6/go.mod h1:3jZMyOhIsHpP37uCMkUooju7aAi5cS1Q23tOzKc+0MU=
golang.org/x/image v0.0.0-20190227222117-0694c2d4d067/go.mod h1:kZ7UVZpmo3dzQBMxlp+ypCbDeSB+sBbTgSJuh5dn5js=
golang.org/x/image v0.0.0-20190802002840-cff245a6509b/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU=
golang.org/x/lint v0.0.0-20190301231843-5614ed5bae6f/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE=
golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190409202823-959b441ac422/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190909230951-414d861bb4ac/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc=
golang.org/x/lint v0.0.0-20191125180803-fdd1cda4f05f/go.mod h1:5qLYkcX4OjUUV8bRuDixDT3tpyyb+LUpUlRWLxfhWrs=
golang.org/x/lint v0.0.0-20200130185559-910be7a94367/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/lint v0.0.0-20201208152925-83fdc39ff7b5/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mobile v0.0.0-20190312151609-d3739f865fa6/go.mod h1:z+o9i4GpDbdi3rU15maQ/Ox0txvL9dWGYEHz965HBQE=
golang.org/x/mobile v0.0.0-20190719004257-d2bd2a29d028/go.mod h1:E/iHnbuqvinMTCcRqshq8CkpyQDoeVncDDYHnLhea+o=
golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc=
golang.org/x/mod v0.1.0/go.mod h1:0QHyrYULN0/3qlju5TqG8bIK38QM8yzMo5ekMj3DlcY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.1.1-0.20191107180719-034126e5016b/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190108225652-1e06a53dbb7e/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190501004415-9ce7a6920f09/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190503192946-f4e77d36d62c/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190628185345-da137c7871d7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190724013045-ca1201d0de80/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20191209160850-c0dbc17a3553/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200114155413-6afb5195e5aa/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200222125558-5a598a2470a0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200506145744-7e3656a0809f/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20201109201403-9fd604954f58/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20201208152858-08078c50e5b5/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/oauth2 v0.0.0-20210218202405-ba52d332ba99/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190227155943-e225da77a7e6/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20200625203802-6e8e738ad208/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190312061237-fead79001313/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502145724-3ef323f4f1fd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191001151750-bb3f8db39f24/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191204072324-ce4227a45e2e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191228213918-04cbcbbfeed8/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200212091648-12a6c2dcc1e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200302150141-5c8b2ff67527/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200331124033-c3d80250170d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200501052902-10377860bb8e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200511232937-7e40ca221e25/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200515095857-1151b9dac4a9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.2.0 h1:ljd4t30dBnAvMZaQCevtY0xLLD0A+bRZXbgLMLU1F/A=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY=
golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312151545-0bb0c0a6e846/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190312170243-e65039ee4138/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs=
golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190506145303-2d16b83fe98c/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q=
golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190628153133-6cdbf07be9d0/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc=
golang.org/x/tools v0.0.0-20190816200558-6889da9d5479/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20190911174233-4f2ddba30aff/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191012152004-8de300cfc20a/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191113191852-77e3bb0ad9e7/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191115202509-3a792d9c32b2/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191130070609-6e064ea0cf2d/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
golang.org/x/tools v0.0.0-20191216173652-a0e659d51361/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20191227053925-7b8e75db28f4/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200117161641-43d50277825c/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200122220014-bf1340f18c4a/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200204074204-1cc6d1ef6c74/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200207183749-b753a1ba74fa/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200212150539-ea181f53ac56/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200224181240-023911ca70b2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200227222343-706bc42d1f0d/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.0.0-20200304193943-95d2e580d8eb/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200312045724-11d5b4c81c7d/go.mod h1:o4KQGtdN14AW+yjsvvwRTJJuXz8XRtIHtEnmAXLyFUw=
golang.org/x/tools v0.0.0-20200331025713-a30bf2db82d4/go.mod h1:Sl4aGygMT6LrqrWclx+PTx3U+LnKx/seiNR+3G19Ar8=
golang.org/x/tools v0.0.0-20200501065659-ab2804fb9c9d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200512131952-2bc93b1c0c88/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200515010526-7d3b6ebf133d/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200618134242-20370b0cb4b2/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
golang.org/x/tools v0.0.0-20200729194436-6467de6f59a7/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200804011535-6c149bb5ef0d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200825202427-b303f430e36d/go.mod h1:njjCfa9FT2d7l9Bc6FUM5FLjQPp3cFF28FI3qnDFljA=
golang.org/x/tools v0.0.0-20200904185747-39188db58858/go.mod h1:Cj7w3i3Rnn0Xh82ur9kSqwfTHTeVxaDqrfMjpcNT6bE=
golang.org/x/tools v0.0.0-20201110124207-079ba7bd75cd/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20201201161351-ac6f37ff4c2a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20201208233053-a543418bbed2/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210105154028-b0ab187a4818/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/api v0.4.0/go.mod h1:8k5glujaEP+g9n7WNsDg8QP6cUVNI86fCNMcbazEtwE=
google.golang.org/api v0.7.0/go.mod h1:WtwebWUNSVBH/HAw79HIFXZNqEvBhG+Ra+ax0hx3E3M=
google.golang.org/api v0.8.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.9.0/go.mod h1:o4eAsZoiT+ibD93RtjEohWalFOjRDx6CVaqeizhEnKg=
google.golang.org/api v0.13.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.14.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.15.0/go.mod h1:iLdEw5Ide6rF15KTC1Kkl0iskquN2gFfn9o9XIsbkAI=
google.golang.org/api v0.17.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.18.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.19.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.20.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.22.0/go.mod h1:BwFmGc8tA3vsd7r/7kR8DY7iEEGSU04BFxCo5jP/sfE=
google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
google.golang.org/api v0.35.0/go.mod h1:/XrVsuzM0rZmrsbjJutiuftIzeuTQcEeaYcSk/mQ1dg=
google.golang.org/api v0.36.0/go.mod h1:+z5ficQTmoYpPn8LCUNVpK5I7hwkpjbcgqA7I34qYtE=
google.golang.org/api v0.40.0/go.mod h1:fYKFpnQN0DsDSKRVRcQSDQNtqWPfM9i+zNPxepjRCQ8=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.6.1/go.mod h1:i06prIuMbXzDqacNJfV5OdTW448YApPu5ww/cMBSeb0=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190418145605-e7d98fc518a7/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190502173448-54afdca5d873/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190801165951-fa694d86fc64/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc=
google.golang.org/genproto v0.0.0-20190911173649-1774047e7e51/go.mod h1:IbNlFCBrqXvoKpeg0TB2l7cyZUmoaFKYIwrEpbDKLA8=
google.golang.org/genproto v0.0.0-20191108220845-16a3f7862a1a/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191115194625-c23dd37a84c9/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191216164720-4f79533eabd1/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20191230161307-f3c370f40bfb/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200115191322-ca5a22157cba/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200122232147-0452cf42e150/go.mod h1:n3cpQtvxv34hfy77yVDNjmbRyujviMdxYliBSkLhpCc=
google.golang.org/genproto v0.0.0-20200204135345-fa8e72b47b90/go.mod h1:GmwEX6Z4W5gMy59cAlVYjN9JhxgbQH6Gn+gFDQe2lzA=
google.golang.org/genproto v0.0.0-20200212174721-66ed5ce911ce/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200224152610-e50cd9704f63/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200228133532-8c2c7df3a383/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200305110556-506484158171/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200312145019-da6875a35672/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200331122359-1ee6d9798940/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200430143042-b979b6f78d84/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c=
google.golang.org/genproto v0.0.0-20200515170657-fc4c6c6a6587/go.mod h1:YsZOwe1myG/8QRHRsmBRE1LrgQY60beZKjly0O1fX9U=
google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo=
google.golang.org/genproto v0.0.0-20200618031413-b414f8b61790/go.mod h1:jDfRM7FcilCzHH/e9qn6dsT145K34l5v+OpcnNgKAAA=
google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201109203340-2640f1f9cdfb/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201201144952-b05cb90ed32e/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201210142538-e3217bee35cc/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20201214200347-8c77b98c765d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210108203827-ffc7fda8c3d7/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/genproto v0.0.0-20210226172003-ab064af71705/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
google.golang.org/grpc v1.20.1/go.mod h1:10oTOabMzJvdu6/UiuZezV6QK5dSlG84ov/aaiqXj38=
google.golang.org/grpc v1.21.1/go.mod h1:oYelfM1adQP15Ek0mdvEgi9Df8B9CZIaU1084ijfRaM=
google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg=
google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY=
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.27.1/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/grpc v1.30.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.31.0/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.31.1/go.mod h1:N36X2cJ7JwdamYAgDz+s+rVMFjt3numwzf/HckM8pak=
google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc=
google.golang.org/grpc v1.34.0/go.mod h1:WotjhfgOW/POjDeRt8vscBtXq+2VjORFy659qA51WJ8=
google.golang.org/grpc v1.35.0/go.mod h1:qjiiYl8FncCW8feJPdyg3v6XW24KsRHe+dy9BAGRRjU=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=
google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0=
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4=
google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=
rsc.io/tmplfunc v0.0.3 h1:53XFQh69AfOa8Tw0Jm7t+GV7KZhOi6jzsCzTtKbMvzU=
rsc.io/tmplfunc v0.0.3/go.mod h1:AG3sTPzElb1Io3Yg4voV9AGZJuleGAwaVRxL9M49PhA=

29
goicicle/Makefile Normal file
View File

@@ -0,0 +1,29 @@
CUDA_ROOT_DIR = /usr/local/cuda
NVCC = $(CUDA_ROOT_DIR)/bin/nvcc
CFLAGS = -Xcompiler -fPIC -std=c++17
LDFLAGS = -shared
FEATURES = -DG2_DEFINED
TARGET_BN254 = libbn254.so
TARGET_BLS12_381 = libbls12_381.so
TARGET_BLS12_377 = libbls12_377.so
VPATH = ../icicle/curves/bn254:../icicle/curves/bls12_377:../icicle/curves/bls12_381
SRCS_BN254 = lde.cu msm.cu projective.cu ve_mod_mult.cu
SRCS_BLS12_381 = lde.cu msm.cu projective.cu ve_mod_mult.cu poseidon.cu
SRCS_BLS12_377 = lde.cu msm.cu projective.cu ve_mod_mult.cu
all: $(TARGET_BN254) $(TARGET_BLS12_381) $(TARGET_BLS12_377)
$(TARGET_BN254):
$(NVCC) $(FEATURES) $(CFLAGS) $(LDFLAGS) $(addprefix ../icicle/curves/bn254/, $(SRCS_BN254)) -o $@
$(TARGET_BLS12_381):
$(NVCC) $(FEATURES) $(CFLAGS) $(LDFLAGS) $(addprefix ../icicle/curves/bls12_381/, $(SRCS_BLS12_381)) -o $@
$(TARGET_BLS12_377):
$(NVCC) $(FEATURES) $(CFLAGS) $(LDFLAGS) $(addprefix ../icicle/curves/bls12_377/, $(SRCS_BLS12_377)) -o $@
clean:
rm -f $(TARGET_BN254) $(TARGET_BLS12_381) $(TARGET_BLS12_377)

49
goicicle/README.md Normal file
View File

@@ -0,0 +1,49 @@
# ICICLE CUDA to Golang Binding Guide
This guide provides instructions on how to compile CUDA code using the provided Makefile, and then how to use the resulting shared libraries to bind Golang to ICICLE's CUDA code.
## Prerequisites
To compile the CUDA files, you will need:
- CUDA toolkit installed. The Makefile assumes CUDA is installed in `/usr/local/cuda`. If CUDA is installed in a different location, please adjust the `CUDA_ROOT_DIR` variable accordingly.
- A compatible GPU and corresponding driver installed on your machine.
## Structure of the Makefile
The Makefile is designed to compile CUDA files for three curves: BN254, BLS12_381, and BLS12_377. The source files are located in the `icicle/curves/` directory.
## Compiling CUDA Code
1. Navigate to the directory containing the Makefile in your terminal.
2. To compile all curve libraries, use the `make all` command. This will create three shared libraries: `libbn254.so`, `libbls12_381.so`, and `libbls12_377.so`.
3. If you want to compile a specific curve, you can do so by specifying the target. For example, to compile only the BN254 curve, use `make libbn254.so`. Replace `libbn254.so` with `libbls12_381.so` or `libbls12_377.so` to compile those curves instead.
The resulting `.so` files are the compiled shared libraries for each curve.
## Golang Binding
The shared libraries produced from the CUDA code compilation are used to bind Golang to ICICLE's CUDA code.
1. These shared libraries (`libbn254.so`, `libbls12_381.so`, `libbls12_377.so`) can be imported in your Go project to leverage the GPU accelerated functionalities provided by ICICLE.
2. In your Go project, you can use `cgo` to link these shared libraries. Here's a basic example on how you can use `cgo` to link these libraries:
```go
/*
#cgo LDFLAGS: -L/path/to/shared/libs -lbn254 -lbls12_381 -lbls12_377
#include "icicle.h" // make sure you use the correct header file(s)
*/
import "C"
func main() {
// Now you can call the C functions from the ICICLE libraries.
// Note that C function calls are prefixed with 'C.' in Go code.
}
```
Replace `/path/to/shared/libs` with the actual path where the shared libraries are located on your system.
## Cleaning up
If you want to remove the compiled files, you can use the `make clean` command. This will remove the `libbn254.so`, `libbls12_381.so`, and `libbls12_377.so` files.

View File

@@ -0,0 +1,324 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"unsafe"
"encoding/binary"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_377
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
const SCALAR_SIZE = 8
const BASE_SIZE = 12
type G1ScalarField struct {
S [SCALAR_SIZE]uint32
}
type G1BaseField struct {
S [BASE_SIZE]uint32
}
/*
* BaseField Constrctors
*/
func (f *G1BaseField) SetZero() *G1BaseField {
var S [BASE_SIZE]uint32
f.S = S
return f
}
func (f *G1BaseField) SetOne() *G1BaseField {
var S [BASE_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (p *G1ProjectivePoint) FromAffine(affine *G1PointAffine) *G1ProjectivePoint {
out := (*C.BLS12_377_projective_t)(unsafe.Pointer(p))
in := (*C.BLS12_377_affine_t)(unsafe.Pointer(affine))
C.projective_from_affine_bls12_377(out, in)
return p
}
func (f *G1BaseField) FromLimbs(limbs [BASE_SIZE]uint32) *G1BaseField {
copy(f.S[:], limbs[:])
return f
}
/*
* BaseField methods
*/
func (f *G1BaseField) Limbs() [BASE_SIZE]uint32 {
return f.S
}
func (f *G1BaseField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* ScalarField methods
*/
func (p *G1ScalarField) Random() *G1ScalarField {
outC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(p))
C.random_scalar_bls12_377(outC)
return p
}
func (f *G1ScalarField) SetZero() *G1ScalarField {
var S [SCALAR_SIZE]uint32
f.S = S
return f
}
func (f *G1ScalarField) SetOne() *G1ScalarField {
var S [SCALAR_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (a *G1ScalarField) Eq(b *G1ScalarField) bool {
for i, v := range a.S {
if b.S[i] != v {
return false
}
}
return true
}
/*
* ScalarField methods
*/
func (f *G1ScalarField) Limbs() [SCALAR_SIZE]uint32 {
return f.S
}
func (f *G1ScalarField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* PointBLS12_377
*/
type G1ProjectivePoint struct {
X, Y, Z G1BaseField
}
func (f *G1ProjectivePoint) SetZero() *G1ProjectivePoint {
var yOne G1BaseField
yOne.SetOne()
var xZero G1BaseField
xZero.SetZero()
var zZero G1BaseField
zZero.SetZero()
f.X = xZero
f.Y = yOne
f.Z = zZero
return f
}
func (p *G1ProjectivePoint) Eq(pCompare *G1ProjectivePoint) bool {
// Cast *PointBLS12_377 to *C.BLS12_377_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It'S your responsibility to ensure that the types are compatible.
pC := (*C.BLS12_377_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BLS12_377_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it'S fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_bls12_377(pC, pCompareC))
}
func (p *G1ProjectivePoint) IsOnCurve() bool {
point := (*C.BLS12_377_projective_t)(unsafe.Pointer(p))
res := C.projective_is_on_curve_bls12_377(point)
return bool(res)
}
func (p *G1ProjectivePoint) Random() *G1ProjectivePoint {
outC := (*C.BLS12_377_projective_t)(unsafe.Pointer(p))
C.random_projective_bls12_377(outC)
return p
}
func (p *G1ProjectivePoint) StripZ() *G1PointAffine {
return &G1PointAffine{
X: p.X,
Y: p.Y,
}
}
func (p *G1ProjectivePoint) FromLimbs(x, y, z *[]uint32) *G1ProjectivePoint {
var _x G1BaseField
var _y G1BaseField
var _z G1BaseField
_x.FromLimbs(GetFixedLimbs(x))
_y.FromLimbs(GetFixedLimbs(y))
_z.FromLimbs(GetFixedLimbs(z))
p.X = _x
p.Y = _y
p.Z = _z
return p
}
/*
* PointAffineNoInfinityBLS12_377
*/
type G1PointAffine struct {
X, Y G1BaseField
}
func (p *G1PointAffine) FromProjective(projective *G1ProjectivePoint) *G1PointAffine {
in := (*C.BLS12_377_projective_t)(unsafe.Pointer(projective))
out := (*C.BLS12_377_affine_t)(unsafe.Pointer(p))
C.projective_to_affine_bls12_377(out, in)
return p
}
func (p *G1PointAffine) ToProjective() *G1ProjectivePoint {
var Z G1BaseField
Z.SetOne()
return &G1ProjectivePoint{
X: p.X,
Y: p.Y,
Z: Z,
}
}
func (p *G1PointAffine) FromLimbs(X, Y *[]uint32) *G1PointAffine {
var _x G1BaseField
var _y G1BaseField
_x.FromLimbs(GetFixedLimbs(X))
_y.FromLimbs(GetFixedLimbs(Y))
return p
}
/*
* Multiplication
*/
func MultiplyVec(a []G1ProjectivePoint, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
pointsC := (*C.BLS12_377_projective_t)(unsafe.Pointer(&a[0]))
scalarsC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_point_bls12_377(pointsC, scalarsC, nElementsC, deviceIdC)
}
func MultiplyScalar(a []G1ScalarField, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
aC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_scalar_bls12_377(aC, bC, nElementsC, deviceIdC)
}
// Multiply a matrix by a scalar:
//
// `a` - flattenned matrix;
// `b` - vector to multiply `a` by;
func MultiplyMatrix(a []G1ScalarField, b []G1ScalarField, deviceID int) {
c := make([]G1ScalarField, len(b))
for i := range c {
var p G1ScalarField
p.SetZero()
c[i] = p
}
aC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&b[0]))
cC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&c[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.matrix_vec_mod_mult_bls12_377(aC, bC, cC, nElementsC, deviceIdC)
}
/*
* Utils
*/
func GetFixedLimbs(slice *[]uint32) [BASE_SIZE]uint32 {
if len(*slice) <= BASE_SIZE {
limbs := [BASE_SIZE]uint32{}
copy(limbs[:len(*slice)], *slice)
return limbs
}
panic("slice has too many elements")
}

View File

@@ -0,0 +1,198 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"encoding/binary"
"testing"
"github.com/stretchr/testify/assert"
)
func TestNewFieldBLS12_377One(t *testing.T) {
var oneField G1BaseField
oneField.SetOne()
rawOneField := [8]uint32([8]uint32{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, oneField.S, rawOneField)
}
func TestNewFieldBLS12_377Zero(t *testing.T) {
var zeroField G1BaseField
zeroField.SetZero()
rawZeroField := [8]uint32([8]uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, zeroField.S, rawZeroField)
}
func TestFieldBLS12_377ToBytesLe(t *testing.T) {
var p G1ProjectivePoint
p.Random()
expected := make([]byte, len(p.X.S)*4) // each uint32 takes 4 bytes
for i, v := range p.X.S {
binary.LittleEndian.PutUint32(expected[i*4:], v)
}
assert.Equal(t, p.X.ToBytesLe(), expected)
assert.Equal(t, len(p.X.ToBytesLe()), 32)
}
func TestNewPointBLS12_377Zero(t *testing.T) {
var pointZero G1ProjectivePoint
pointZero.SetZero()
var baseOne G1BaseField
baseOne.SetOne()
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, pointZero.X, zeroSanity)
assert.Equal(t, pointZero.Y, baseOne)
assert.Equal(t, pointZero.Z, zeroSanity)
}
func TestFromProjectiveToAffine(t *testing.T) {
var projective G1ProjectivePoint
var affine G1PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G1ProjectivePoint
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestBLS12_377Eq(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
var p2 G1ProjectivePoint
p2.Random()
assert.Equal(t, p1.Eq(&p1), true)
assert.Equal(t, p1.Eq(&p2), false)
}
func TestBLS12_377StripZ(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
p2ZLess := p1.StripZ()
assert.IsType(t, G1PointAffine{}, *p2ZLess)
assert.Equal(t, p1.X, p2ZLess.X)
assert.Equal(t, p1.Y, p2ZLess.Y)
}
func TestPointBLS12_377fromLimbs(t *testing.T) {
var p G1ProjectivePoint
p.Random()
x := p.X.Limbs()
y := p.Y.Limbs()
z := p.Z.Limbs()
xSlice := x[:]
ySlice := y[:]
zSlice := z[:]
var pFromLimbs G1ProjectivePoint
pFromLimbs.FromLimbs(&xSlice, &ySlice, &zSlice)
assert.Equal(t, pFromLimbs, p)
}
func TestNewPointAffineNoInfinityBLS12_377Zero(t *testing.T) {
var zeroP G1PointAffine
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, zeroP.X, zeroSanity)
assert.Equal(t, zeroP.Y, zeroSanity)
}
func TestPointAffineNoInfinityBLS12_377FromLimbs(t *testing.T) {
// Initialize your test values
x := [8]uint32{1, 2, 3, 4, 5, 6, 7, 8}
y := [8]uint32{9, 10, 11, 12, 13, 14, 15, 16}
xSlice := x[:]
ySlice := y[:]
// Execute your function
var result G1PointAffine
result.FromLimbs(&xSlice, &ySlice)
var xBase G1BaseField
var yBase G1BaseField
xBase.FromLimbs(x)
yBase.FromLimbs(y)
// Define your expected result
expected := &G1PointAffine{
X: xBase,
Y: yBase,
}
// Test if result is as expected
assert.Equal(t, result, expected)
}
func TestGetFixedLimbs(t *testing.T) {
t.Run("case of valid input of length less than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of valid input of length 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 8}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of empty input", func(t *testing.T) {
slice := []uint32{}
expected := [8]uint32{0, 0, 0, 0, 0, 0, 0, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of input length greater than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9}
defer func() {
if r := recover(); r == nil {
t.Errorf("the code did not panic")
}
}()
GetFixedLimbs(&slice)
})
}

View File

@@ -0,0 +1,112 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"encoding/binary"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_377
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
// G2 extension field
type G2Element [6]uint64
type ExtentionField struct {
A0, A1 G2Element
}
type G2PointAffine struct {
X, Y ExtentionField
}
type G2Point struct {
X, Y, Z ExtentionField
}
func (p *G2Point) Random() *G2Point {
outC := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(p))
C.random_g2_projective_bls12_377(outC)
return p
}
func (p *G2Point) FromAffine(affine *G2PointAffine) *G2Point {
out := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(p))
in := (*C.BLS12_377_g2_affine_t)(unsafe.Pointer(affine))
C.g2_projective_from_affine_bls12_377(out, in)
return p
}
func (p *G2Point) Eq(pCompare *G2Point) bool {
// Cast *PointBLS12_377 to *C.BLS12_377_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It's your responsibility to ensure that the types are compatible.
pC := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it's fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_g2_bls12_377(pC, pCompareC))
}
func (f *G2Element) ToBytesLe() []byte {
var bytes []byte
for _, val := range f {
buf := make([]byte, 8) // 8 bytes because uint64 is 64-bit
binary.LittleEndian.PutUint64(buf, val)
bytes = append(bytes, buf...)
}
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.BLS12_377_g2_affine_t)(unsafe.Pointer(p))
in := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(projective))
C.g2_projective_to_affine_bls12_377(out, in)
return p
}
func (p *G2Point) IsOnCurve() bool {
// Directly copy memory from the C struct to the Go struct
point := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(p))
res := C.g2_projective_is_on_curve_bls12_377(point)
return bool(res)
}

View File

@@ -0,0 +1,75 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestG2Eqg2(t *testing.T) {
var point G2Point
point.Random()
assert.True(t, point.Eq(&point))
}
func TestG2FromProjectiveToAffine(t *testing.T) {
var projective G2Point
var affine G2PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G2Point
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestG2Eqg2NotEqual(t *testing.T) {
var point G2Point
point.Random()
var point2 G2Point
point2.Random()
assert.False(t, point.Eq(&point2))
}
func TestG2ToBytes(t *testing.T) {
element := G2Element{0x6546098ea84b6298, 0x4a384533d1f68aca, 0xaa0666972d771336, 0x1569e4a34321993}
bytes := element.ToBytesLe()
assert.Equal(t, bytes, []byte{0x98, 0x62, 0x4b, 0xa8, 0x8e, 0x9, 0x46, 0x65, 0xca, 0x8a, 0xf6, 0xd1, 0x33, 0x45, 0x38, 0x4a, 0x36, 0x13, 0x77, 0x2d, 0x97, 0x66, 0x6, 0xaa, 0x93, 0x19, 0x32, 0x34, 0x4a, 0x9e, 0x56, 0x1})
}
func TestG2ShouldConvertToProjective(t *testing.T) {
var pointProjective G2Point
var pointAffine G2PointAffine
pointProjective.Random()
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))
}

View File

@@ -0,0 +1,98 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdbool.h>
// msm.h
#ifndef _BLS12_377_MSM_H
#define _BLS12_377_MSM_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BLS12_377 projective and affine structs
typedef struct BLS12_377_projective_t BLS12_377_projective_t;
typedef struct BLS12_377_g2_projective_t BLS12_377_g2_projective_t;
typedef struct BLS12_377_affine_t BLS12_377_affine_t;
typedef struct BLS12_377_g2_affine_t BLS12_377_g2_affine_t;
typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
typedef cudaStream_t CudaStream_t;
int msm_cuda_bls12_377(
BLS12_377_projective_t* out, BLS12_377_affine_t* points, BLS12_377_scalar_t* scalars, size_t count, size_t device_id);
int msm_batch_cuda_bls12_377(
BLS12_377_projective_t* out,
BLS12_377_affine_t* points,
BLS12_377_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_scalar_t* d_scalars,
BLS12_377_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_scalar_t* d_scalars,
BLS12_377_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id);
int msm_g2_cuda_bls12_377(
BLS12_377_g2_projective_t* out,
BLS12_377_g2_affine_t* points,
BLS12_377_scalar_t* scalars,
size_t count,
size_t device_id);
int msm_batch_g2_cuda_bls12_377(
BLS12_377_g2_projective_t* out,
BLS12_377_g2_affine_t* points,
BLS12_377_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_g2_cuda_bls12_377(
BLS12_377_g2_projective_t* d_out,
BLS12_377_scalar_t* d_scalars,
BLS12_377_g2_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_g2_cuda_bls12_377(
BLS12_377_g2_projective_t* d_out,
BLS12_377_scalar_t* d_scalars,
BLS12_377_g2_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BLS12_377_MSM_H */

View File

@@ -0,0 +1,195 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// ntt.h
#ifndef _BLS12_377_NTT_H
#define _BLS12_377_NTT_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BLS12_377 projective and affine structs
typedef struct BLS12_377_projective_t BLS12_377_projective_t;
typedef struct BLS12_377_affine_t BLS12_377_affine_t;
typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
typedef struct BLS12_377_g2_projective_t BLS12_377_g2_projective_t;
typedef struct BLS12_377_g2_affine_t BLS12_377_g2_affine_t;
int ntt_cuda_bls12_377(BLS12_377_scalar_t* arr, uint32_t n, bool inverse, size_t device_id);
int ntt_batch_cuda_bls12_377(
BLS12_377_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
int ecntt_cuda_bls12_377(BLS12_377_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
int ecntt_batch_cuda_bls12_377(
BLS12_377_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
BLS12_377_scalar_t*
build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
int interpolate_scalars_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_evaluations,
BLS12_377_scalar_t* d_domain,
unsigned n,
unsigned device_id,
size_t stream);
int interpolate_scalars_batch_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_evaluations,
BLS12_377_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_points_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_projective_t* d_evaluations,
BLS12_377_scalar_t* d_domain,
unsigned n,
size_t device_id,
size_t stream);
int interpolate_points_batch_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_projective_t* d_evaluations,
BLS12_377_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_scalars_on_coset_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_evaluations,
BLS12_377_scalar_t* d_domain,
unsigned n,
BLS12_377_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int interpolate_scalars_batch_on_coset_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_evaluations,
BLS12_377_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
BLS12_377_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_scalars_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id,
size_t stream);
int evaluate_scalars_batch_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_points_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_projective_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id,
size_t stream);
int evaluate_points_batch_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_projective_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_scalars_on_coset_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BLS12_377_scalar_t* coset_powers,
unsigned device_id,
size_t stream);
int evaluate_scalars_on_coset_batch_cuda_bls12_377(
BLS12_377_scalar_t* d_out,
BLS12_377_scalar_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BLS12_377_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_projective_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BLS12_377_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_batch_cuda_bls12_377(
BLS12_377_projective_t* d_out,
BLS12_377_projective_t* d_coefficients,
BLS12_377_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BLS12_377_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int reverse_order_scalars_cuda_bls12_377(BLS12_377_scalar_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_scalars_batch_cuda_bls12_377(
BLS12_377_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int reverse_order_points_cuda_bls12_377(BLS12_377_projective_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_points_batch_cuda_bls12_377(
BLS12_377_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int add_scalars_cuda_bls12_377(
BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_in1, BLS12_377_scalar_t* d_in2, unsigned n, size_t stream);
int sub_scalars_cuda_bls12_377(
BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_in1, BLS12_377_scalar_t* d_in2, unsigned n, size_t stream);
int to_montgomery_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_inout, unsigned n, size_t stream);
int from_montgomery_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_inout, unsigned n, size_t stream);
// points g1
int to_montgomery_proj_points_cuda_bls12_377(BLS12_377_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_cuda_bls12_377(BLS12_377_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_cuda_bls12_377(BLS12_377_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_cuda_bls12_377(BLS12_377_affine_t* d_inout, unsigned n, size_t stream);
// points g2
int to_montgomery_proj_points_g2_cuda_bls12_377(BLS12_377_g2_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_g2_cuda_bls12_377(BLS12_377_g2_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_g2_cuda_bls12_377(BLS12_377_g2_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_g2_cuda_bls12_377(BLS12_377_g2_affine_t* d_inout, unsigned n, size_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BLS12_377_NTT_H */

View File

@@ -0,0 +1,50 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// projective.h
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BLS12_377_projective_t BLS12_377_projective_t;
typedef struct BLS12_377_g2_projective_t BLS12_377_g2_projective_t;
typedef struct BLS12_377_affine_t BLS12_377_affine_t;
typedef struct BLS12_377_g2_affine_t BLS12_377_g2_affine_t;
typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
bool projective_is_on_curve_bls12_377(BLS12_377_projective_t* point1);
int random_scalar_bls12_377(BLS12_377_scalar_t* out);
int random_projective_bls12_377(BLS12_377_projective_t* out);
BLS12_377_projective_t* projective_zero_bls12_377();
int projective_to_affine_bls12_377(BLS12_377_affine_t* out, BLS12_377_projective_t* point1);
int projective_from_affine_bls12_377(BLS12_377_projective_t* out, BLS12_377_affine_t* point1);
int random_g2_projective_bls12_377(BLS12_377_g2_projective_t* out);
int g2_projective_to_affine_bls12_377(BLS12_377_g2_affine_t* out, BLS12_377_g2_projective_t* point1);
int g2_projective_from_affine_bls12_377(BLS12_377_g2_projective_t* out, BLS12_377_g2_affine_t* point1);
bool g2_projective_is_on_curve_bls12_377(BLS12_377_g2_projective_t* point1);
bool eq_bls12_377(BLS12_377_projective_t* point1, BLS12_377_projective_t* point2);
bool eq_g2_bls12_377(BLS12_377_g2_projective_t* point1, BLS12_377_g2_projective_t* point2);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,49 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// ve_mod_mult.h
#ifndef _BLS12_377_VEC_MULT_H
#define _BLS12_377_VEC_MULT_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BLS12_377_projective_t BLS12_377_projective_t;
typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
int32_t vec_mod_mult_point_bls12_377(
BLS12_377_projective_t* inout, BLS12_377_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_scalar_bls12_377(
BLS12_377_scalar_t* inout, BLS12_377_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_device_scalar_bls12_377(
BLS12_377_scalar_t* inout, BLS12_377_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
int32_t matrix_vec_mod_mult_bls12_377(
BLS12_377_scalar_t* matrix_flattened,
BLS12_377_scalar_t* input,
BLS12_377_scalar_t* output,
size_t n_elments,
size_t device_id);
#ifdef __cplusplus
}
#endif
#endif /* _BLS12_377_VEC_MULT_H */

View File

@@ -0,0 +1,208 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"errors"
"fmt"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_377
// #include "msm.h"
import "C"
func Msm(out *G1ProjectivePoint, points []G1PointAffine, scalars []G1ScalarField, device_id int) (*G1ProjectivePoint, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BLS12_377_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BLS12_377_projective_t)(unsafe.Pointer(out))
ret := C.msm_cuda_bls12_377(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_cuda_bls12_377 returned error code: %d", ret)
}
return out, nil
}
func MsmG2(out *G2Point, points []G2PointAffine, scalars []G1ScalarField, device_id int) (*G2Point, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BLS12_377_g2_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(out))
ret := C.msm_g2_cuda_bls12_377(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_g2_cuda_bls12_377 returned error code: %d", ret)
}
return out, nil
}
func MsmBatch(points *[]G1PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G1ProjectivePoint, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G1ProjectivePoint, batchSize)
for i := 0; i < len(out); i++ {
var p G1ProjectivePoint
p.SetZero()
out[i] = &p
}
outC := (*C.BLS12_377_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BLS12_377_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_cuda_bls12_377(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bls12_377 returned error code: %d", ret)
}
return out, nil
}
func MsmG2Batch(points *[]G2PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G2Point, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G2Point, batchSize)
outC := (*C.BLS12_377_g2_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BLS12_377_g2_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_g2_cuda_bls12_377(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bls12_377 returned error code: %d", ret)
}
return out, nil
}
func Commit(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BLS12_377_projective_t)(d_out)
scalarsC := (*C.BLS12_377_scalar_t)(d_scalars)
pointsC := (*C.BLS12_377_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_cuda_bls12_377(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BLS12_377_g2_projective_t)(d_out)
scalarsC := (*C.BLS12_377_scalar_t)(d_scalars)
pointsC := (*C.BLS12_377_g2_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_g2_cuda_bls12_377(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitBatch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BLS12_377_projective_t)(d_out)
scalarsC := (*C.BLS12_377_scalar_t)(d_scalars)
pointsC := (*C.BLS12_377_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.commit_batch_cuda_bls12_377(d_outC, scalarsC, pointsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2Batch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BLS12_377_g2_projective_t)(d_out)
scalarsC := (*C.BLS12_377_scalar_t)(d_scalars)
pointsC := (*C.BLS12_377_g2_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.msm_batch_g2_cuda_bls12_377(d_outC, pointsC, scalarsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}

View File

@@ -0,0 +1,355 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"fmt"
"math"
"testing"
"time"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
"github.com/stretchr/testify/assert"
)
func GeneratePoints(count int) []G1PointAffine {
// Declare a slice of integers
var points []G1PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var pointProjective G1ProjectivePoint
pointProjective.Random()
var pointAffine G1PointAffine
pointAffine.FromProjective(&pointProjective)
points = append(points, pointAffine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func GeneratePointsProj(count int) []G1ProjectivePoint {
// Declare a slice of integers
var points []G1ProjectivePoint
// Use a loop to populate the slice
for i := 0; i < count; i++ {
var p G1ProjectivePoint
p.Random()
points = append(points, p)
}
return points
}
func GenerateScalars(count int, skewed bool) []G1ScalarField {
// Declare a slice of integers
var scalars []G1ScalarField
var rand G1ScalarField
var zero G1ScalarField
var one G1ScalarField
var randLarge G1ScalarField
zero.SetZero()
one.SetOne()
randLarge.Random()
if skewed && count > 1_200_000 {
for i := 0; i < count-1_200_000; i++ {
rand.Random()
scalars = append(scalars, rand)
}
for i := 0; i < 600_000; i++ {
scalars = append(scalars, randLarge)
}
for i := 0; i < 400_000; i++ {
scalars = append(scalars, zero)
}
for i := 0; i < 200_000; i++ {
scalars = append(scalars, one)
}
} else {
for i := 0; i < count; i++ {
rand.Random()
scalars = append(scalars, rand)
}
}
return scalars[:count]
}
func TestMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out := new(G1ProjectivePoint)
startTime := time.Now()
_, e := Msm(out, points, scalars, 0) // non mont
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func TestCommitMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1<<v - 1
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := count * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := Commit(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G1ProjectivePoint, 1)
goicicle.CudaMemCpyDtoH[G1ProjectivePoint](outHost, out_d, 96)
assert.Equal(t, e, 0, "error should be 0")
assert.True(t, outHost[0].IsOnCurve())
}
}
func BenchmarkCommit(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := msmSize * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := msmSize * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
e := Commit(out_d, scalars_d, points_d, msmSize, 10)
if e != 0 {
panic("Error occured")
}
}
})
}
}
func TestBenchMSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GeneratePoints(count)
scalars := GenerateScalars(count, false)
a, e := MsmBatch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBLS12_377 returned an error: %v", e)
}
if len(a) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(a))
}
}
}
}
func BenchmarkMSM(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G1ProjectivePoint)
_, e := Msm(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
// G2
func GenerateG2Points(count int) []G2PointAffine {
// Declare a slice of integers
var points []G2PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var p G2Point
p.Random()
var affine G2PointAffine
affine.FromProjective(&p)
points = append(points, affine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func TestMsmG2BLS12_377(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating scalars\n")
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func BenchmarkMsmG2BLS12_377(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GenerateG2Points(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM G2 %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
func TestCommitG2MSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
var sizeCheckG2PointAffine G2PointAffine
inputPointsBytes := count * int(unsafe.Sizeof(sizeCheckG2PointAffine))
var sizeCheckG2Point G2Point
out_d, _ := goicicle.CudaMalloc(int(unsafe.Sizeof(sizeCheckG2Point)))
points_d, _ := goicicle.CudaMalloc(inputPointsBytes)
goicicle.CudaMemCpyHtoD[G2PointAffine](points_d, points, inputPointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := CommitG2(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G2Point, 1)
goicicle.CudaMemCpyDtoH[G2Point](outHost, out_d, int(unsafe.Sizeof(sizeCheckG2Point)))
assert.Equal(t, e, 0, "error should be 0")
assert.Equal(t, len(outHost), 1)
result := outHost[0]
assert.True(t, result.IsOnCurve())
}
}
func TestBatchG2MSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GenerateG2Points(count)
scalars := GenerateScalars(count, false)
pointsResults, e := MsmG2Batch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBLS12_377 returned an error: %v", e)
}
if len(pointsResults) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(pointsResults))
}
for _, s := range pointsResults {
assert.True(t, s.IsOnCurve())
}
}
}
}

View File

@@ -0,0 +1,221 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"errors"
"fmt"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_377
// #include "ntt.h"
import "C"
const (
NONE = 0
DIF = 1
DIT = 2
)
func Ntt(scalars *[]G1ScalarField, isInverse bool, deviceId int) uint64 {
scalarsC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
ret := C.ntt_cuda_bls12_377(scalarsC, C.uint32_t(len(*scalars)), C.bool(isInverse), C.size_t(deviceId))
return uint64(ret)
}
func NttBatch(scalars *[]G1ScalarField, isInverse bool, batchSize, deviceId int) uint64 {
scalarsC := (*C.BLS12_377_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
isInverseC := C.bool(isInverse)
batchSizeC := C.uint32_t(batchSize)
deviceIdC := C.size_t(deviceId)
ret := C.ntt_batch_cuda_bls12_377(scalarsC, C.uint32_t(len(*scalars)), batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNtt(values *[]G1ProjectivePoint, isInverse bool, deviceId int) uint64 {
valuesC := (*C.BLS12_377_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
ret := C.ecntt_cuda_bls12_377(valuesC, n, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNttBatch(values *[]G1ProjectivePoint, isInverse bool, batchSize, deviceId int) uint64 {
valuesC := (*C.BLS12_377_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
batchSizeC := C.uint32_t(batchSize)
ret := C.ecntt_batch_cuda_bls12_377(valuesC, n, batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func GenerateTwiddles(d_size int, log_d_size int, inverse bool) (up unsafe.Pointer, err error) {
domain_size := C.uint32_t(d_size)
logn := C.uint32_t(log_d_size)
is_inverse := C.bool(inverse)
dp := C.build_domain_cuda_bls12_377(domain_size, logn, is_inverse, 0, 0)
if dp == nil {
err = errors.New("nullptr returned from generating twiddles")
return unsafe.Pointer(nil), err
}
return unsafe.Pointer(dp), nil
}
// Reverses d_scalars in-place
func ReverseScalars(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BLS12_377_scalar_t)(d_scalars)
lenC := C.int(len)
if success := C.reverse_order_scalars_cuda_bls12_377(scalarsC, lenC, 0, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func Interpolate(scalars, twiddles, cosetPowers unsafe.Pointer, size int, isCoset bool) unsafe.Pointer {
size_d := size * 32
dp, err := goicicle.CudaMalloc(size_d)
if err != nil {
return nil
}
d_out := (*C.BLS12_377_scalar_t)(dp)
scalarsC := (*C.BLS12_377_scalar_t)(scalars)
twiddlesC := (*C.BLS12_377_scalar_t)(twiddles)
cosetPowersC := (*C.BLS12_377_scalar_t)(cosetPowers)
sizeC := C.uint(size)
var ret C.int
if isCoset {
ret = C.interpolate_scalars_on_coset_cuda_bls12_377(d_out, scalarsC, twiddlesC, sizeC, cosetPowersC, 0, 0)
} else {
ret = C.interpolate_scalars_cuda_bls12_377(d_out, scalarsC, twiddlesC, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
}
return unsafe.Pointer(d_out)
}
func Evaluate(scalars_out, scalars, twiddles, coset_powers unsafe.Pointer, scalars_size, twiddles_size int, isCoset bool) int {
scalars_outC := (*C.BLS12_377_scalar_t)(scalars_out)
scalarsC := (*C.BLS12_377_scalar_t)(scalars)
twiddlesC := (*C.BLS12_377_scalar_t)(twiddles)
coset_powersC := (*C.BLS12_377_scalar_t)(coset_powers)
sizeC := C.uint(scalars_size)
twiddlesC_size := C.uint(twiddles_size)
var ret C.int
if isCoset {
ret = C.evaluate_scalars_on_coset_cuda_bls12_377(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, coset_powersC, 0, 0)
} else {
ret = C.evaluate_scalars_cuda_bls12_377(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
return -1
}
return 0
}
func VecScalarAdd(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BLS12_377_scalar_t)(in1_d)
in2_dC := (*C.BLS12_377_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.add_scalars_cuda_bls12_377(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error adding scalar vectors")
return -1
}
return 0
}
func VecScalarSub(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BLS12_377_scalar_t)(in1_d)
in2_dC := (*C.BLS12_377_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.sub_scalars_cuda_bls12_377(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error subtracting scalar vectors")
return -1
}
return 0
}
func ToMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BLS12_377_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.to_montgomery_scalars_cuda_bls12_377(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func FromMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BLS12_377_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.from_montgomery_scalars_cuda_bls12_377(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BLS12_377_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_cuda_bls12_377(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func G2AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BLS12_377_g2_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_g2_cuda_bls12_377(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}

View File

@@ -0,0 +1,148 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
import (
"fmt"
"github.com/stretchr/testify/assert"
"reflect"
"testing"
)
func TestNttBLS12_377BBB(t *testing.T) {
count := 1 << 20
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
NttBatch(&nttResult, false, count, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBLS12_377CompareToGnarkDIF(t *testing.T) {
count := 1 << 2
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, DIF, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestINttBLS12_377CompareToGnarkDIT(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, true, DIT, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBLS12_377(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, NONE, 0)
assert.NotEqual(t, nttResult, scalars)
inttResult := make([]G1ScalarField, len(nttResult))
copy(inttResult, nttResult)
assert.Equal(t, inttResult, nttResult)
Ntt(&inttResult, true, NONE, 0)
assert.Equal(t, inttResult, scalars)
}
func TestNttBatchBLS12_377(t *testing.T) {
count := 1 << 5
batches := 4
scalars := GenerateScalars(count*batches, false)
var scalarVecOfVec [][]G1ScalarField = make([][]G1ScalarField, 0)
for i := 0; i < batches; i++ {
start := i * count
end := (i + 1) * count
batch := make([]G1ScalarField, len(scalars[start:end]))
copy(batch, scalars[start:end])
scalarVecOfVec = append(scalarVecOfVec, batch)
}
nttBatchResult := make([]G1ScalarField, len(scalars))
copy(nttBatchResult, scalars)
NttBatch(&nttBatchResult, false, count, 0)
var nttResultVecOfVec [][]G1ScalarField
for i := 0; i < batches; i++ {
// Clone the slice
clone := make([]G1ScalarField, len(scalarVecOfVec[i]))
copy(clone, scalarVecOfVec[i])
// Add it to the result vector of vectors
nttResultVecOfVec = append(nttResultVecOfVec, clone)
// Call the ntt_bls12_377 function
Ntt(&nttResultVecOfVec[i], false, NONE, 0)
}
assert.NotEqual(t, nttBatchResult, scalars)
// Check that the ntt of each vec of scalars is equal to the intt of the specific batch
for i := 0; i < batches; i++ {
if !reflect.DeepEqual(nttResultVecOfVec[i], nttBatchResult[i*count:((i+1)*count)]) {
t.Errorf("ntt of vec of scalars not equal to intt of specific batch")
}
}
}
func BenchmarkNTT(b *testing.B) {
LOG_NTT_SIZES := []int{12, 15, 20, 21, 22, 23, 24, 25, 26}
for _, logNTTSize := range LOG_NTT_SIZES {
nttSize := 1 << logNTTSize
b.Run(fmt.Sprintf("NTT %d", logNTTSize), func(b *testing.B) {
scalars := GenerateScalars(nttSize, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
for n := 0; n < b.N; n++ {
Ntt(&nttResult, false, NONE, 0)
}
})
}
}

View File

@@ -0,0 +1,38 @@
package bls12377
import "encoding/binary"
// Function to convert [8]uint32 to [4]uint64
func ConvertUint32ArrToUint64Arr(arr32 [8]uint32) [4]uint64 {
var arr64 [4]uint64
for i := 0; i < len(arr32); i += 2 {
arr64[i/2] = (uint64(arr32[i]) << 32) | uint64(arr32[i+1])
}
return arr64
}
func ConvertUint64ArrToUint32Arr4(arr64 [4]uint64) [8]uint32 {
var arr32 [8]uint32
for i, v := range arr64 {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, v)
arr32[i*2] = binary.LittleEndian.Uint32(b[0:4])
arr32[i*2+1] = binary.LittleEndian.Uint32(b[4:8])
}
return arr32
}
func ConvertUint64ArrToUint32Arr6(arr64 [6]uint64) [12]uint32 {
var arr32 [12]uint32
for i, v := range arr64 {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, v)
arr32[i*2] = binary.LittleEndian.Uint32(b[0:4])
arr32[i*2+1] = binary.LittleEndian.Uint32(b[4:8])
}
return arr32
}

View File

@@ -0,0 +1,41 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12377
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_377
// #include "ve_mod_mult.h"
import "C"
import (
"fmt"
"unsafe"
)
func VecScalarMulMod(scalarVec1, scalarVec2 unsafe.Pointer, size int) int {
scalarVec1C := (*C.BLS12_377_scalar_t)(scalarVec1)
scalarVec2C := (*C.BLS12_377_scalar_t)(scalarVec2)
sizeC := C.size_t(size)
ret := C.vec_mod_mult_device_scalar_bls12_377(scalarVec1C, scalarVec2C, sizeC, 0)
if ret != 0 {
fmt.Print("error multiplying scalar vectors")
return -1
}
return 0
}

View File

@@ -0,0 +1,324 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"unsafe"
"encoding/binary"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_381
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
const SCALAR_SIZE = 8
const BASE_SIZE = 12
type G1ScalarField struct {
S [SCALAR_SIZE]uint32
}
type G1BaseField struct {
S [BASE_SIZE]uint32
}
/*
* BaseField Constrctors
*/
func (f *G1BaseField) SetZero() *G1BaseField {
var S [BASE_SIZE]uint32
f.S = S
return f
}
func (f *G1BaseField) SetOne() *G1BaseField {
var S [BASE_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (p *G1ProjectivePoint) FromAffine(affine *G1PointAffine) *G1ProjectivePoint {
out := (*C.BLS12_381_projective_t)(unsafe.Pointer(p))
in := (*C.BLS12_381_affine_t)(unsafe.Pointer(affine))
C.projective_from_affine_bls12_381(out, in)
return p
}
func (f *G1BaseField) FromLimbs(limbs [BASE_SIZE]uint32) *G1BaseField {
copy(f.S[:], limbs[:])
return f
}
/*
* BaseField methods
*/
func (f *G1BaseField) Limbs() [BASE_SIZE]uint32 {
return f.S
}
func (f *G1BaseField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* ScalarField methods
*/
func (p *G1ScalarField) Random() *G1ScalarField {
outC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(p))
C.random_scalar_bls12_381(outC)
return p
}
func (f *G1ScalarField) SetZero() *G1ScalarField {
var S [SCALAR_SIZE]uint32
f.S = S
return f
}
func (f *G1ScalarField) SetOne() *G1ScalarField {
var S [SCALAR_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (a *G1ScalarField) Eq(b *G1ScalarField) bool {
for i, v := range a.S {
if b.S[i] != v {
return false
}
}
return true
}
/*
* ScalarField methods
*/
func (f *G1ScalarField) Limbs() [SCALAR_SIZE]uint32 {
return f.S
}
func (f *G1ScalarField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* PointBLS12_381
*/
type G1ProjectivePoint struct {
X, Y, Z G1BaseField
}
func (f *G1ProjectivePoint) SetZero() *G1ProjectivePoint {
var yOne G1BaseField
yOne.SetOne()
var xZero G1BaseField
xZero.SetZero()
var zZero G1BaseField
zZero.SetZero()
f.X = xZero
f.Y = yOne
f.Z = zZero
return f
}
func (p *G1ProjectivePoint) Eq(pCompare *G1ProjectivePoint) bool {
// Cast *PointBLS12_381 to *C.BLS12_381_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It'S your responsibility to ensure that the types are compatible.
pC := (*C.BLS12_381_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BLS12_381_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it'S fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_bls12_381(pC, pCompareC))
}
func (p *G1ProjectivePoint) IsOnCurve() bool {
point := (*C.BLS12_381_projective_t)(unsafe.Pointer(p))
res := C.projective_is_on_curve_bls12_381(point)
return bool(res)
}
func (p *G1ProjectivePoint) Random() *G1ProjectivePoint {
outC := (*C.BLS12_381_projective_t)(unsafe.Pointer(p))
C.random_projective_bls12_381(outC)
return p
}
func (p *G1ProjectivePoint) StripZ() *G1PointAffine {
return &G1PointAffine{
X: p.X,
Y: p.Y,
}
}
func (p *G1ProjectivePoint) FromLimbs(x, y, z *[]uint32) *G1ProjectivePoint {
var _x G1BaseField
var _y G1BaseField
var _z G1BaseField
_x.FromLimbs(GetFixedLimbs(x))
_y.FromLimbs(GetFixedLimbs(y))
_z.FromLimbs(GetFixedLimbs(z))
p.X = _x
p.Y = _y
p.Z = _z
return p
}
/*
* PointAffineNoInfinityBLS12_381
*/
type G1PointAffine struct {
X, Y G1BaseField
}
func (p *G1PointAffine) FromProjective(projective *G1ProjectivePoint) *G1PointAffine {
in := (*C.BLS12_381_projective_t)(unsafe.Pointer(projective))
out := (*C.BLS12_381_affine_t)(unsafe.Pointer(p))
C.projective_to_affine_bls12_381(out, in)
return p
}
func (p *G1PointAffine) ToProjective() *G1ProjectivePoint {
var Z G1BaseField
Z.SetOne()
return &G1ProjectivePoint{
X: p.X,
Y: p.Y,
Z: Z,
}
}
func (p *G1PointAffine) FromLimbs(X, Y *[]uint32) *G1PointAffine {
var _x G1BaseField
var _y G1BaseField
_x.FromLimbs(GetFixedLimbs(X))
_y.FromLimbs(GetFixedLimbs(Y))
return p
}
/*
* Multiplication
*/
func MultiplyVec(a []G1ProjectivePoint, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
pointsC := (*C.BLS12_381_projective_t)(unsafe.Pointer(&a[0]))
scalarsC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_point_bls12_381(pointsC, scalarsC, nElementsC, deviceIdC)
}
func MultiplyScalar(a []G1ScalarField, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
aC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_scalar_bls12_381(aC, bC, nElementsC, deviceIdC)
}
// Multiply a matrix by a scalar:
//
// `a` - flattenned matrix;
// `b` - vector to multiply `a` by;
func MultiplyMatrix(a []G1ScalarField, b []G1ScalarField, deviceID int) {
c := make([]G1ScalarField, len(b))
for i := range c {
var p G1ScalarField
p.SetZero()
c[i] = p
}
aC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&b[0]))
cC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&c[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.matrix_vec_mod_mult_bls12_381(aC, bC, cC, nElementsC, deviceIdC)
}
/*
* Utils
*/
func GetFixedLimbs(slice *[]uint32) [BASE_SIZE]uint32 {
if len(*slice) <= BASE_SIZE {
limbs := [BASE_SIZE]uint32{}
copy(limbs[:len(*slice)], *slice)
return limbs
}
panic("slice has too many elements")
}

View File

@@ -0,0 +1,198 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"encoding/binary"
"testing"
"github.com/stretchr/testify/assert"
)
func TestNewFieldBLS12_381One(t *testing.T) {
var oneField G1BaseField
oneField.SetOne()
rawOneField := [8]uint32([8]uint32{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, oneField.S, rawOneField)
}
func TestNewFieldBLS12_381Zero(t *testing.T) {
var zeroField G1BaseField
zeroField.SetZero()
rawZeroField := [8]uint32([8]uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, zeroField.S, rawZeroField)
}
func TestFieldBLS12_381ToBytesLe(t *testing.T) {
var p G1ProjectivePoint
p.Random()
expected := make([]byte, len(p.X.S)*4) // each uint32 takes 4 bytes
for i, v := range p.X.S {
binary.LittleEndian.PutUint32(expected[i*4:], v)
}
assert.Equal(t, p.X.ToBytesLe(), expected)
assert.Equal(t, len(p.X.ToBytesLe()), 32)
}
func TestNewPointBLS12_381Zero(t *testing.T) {
var pointZero G1ProjectivePoint
pointZero.SetZero()
var baseOne G1BaseField
baseOne.SetOne()
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, pointZero.X, zeroSanity)
assert.Equal(t, pointZero.Y, baseOne)
assert.Equal(t, pointZero.Z, zeroSanity)
}
func TestFromProjectiveToAffine(t *testing.T) {
var projective G1ProjectivePoint
var affine G1PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G1ProjectivePoint
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestBLS12_381Eq(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
var p2 G1ProjectivePoint
p2.Random()
assert.Equal(t, p1.Eq(&p1), true)
assert.Equal(t, p1.Eq(&p2), false)
}
func TestBLS12_381StripZ(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
p2ZLess := p1.StripZ()
assert.IsType(t, G1PointAffine{}, *p2ZLess)
assert.Equal(t, p1.X, p2ZLess.X)
assert.Equal(t, p1.Y, p2ZLess.Y)
}
func TestPointBLS12_381fromLimbs(t *testing.T) {
var p G1ProjectivePoint
p.Random()
x := p.X.Limbs()
y := p.Y.Limbs()
z := p.Z.Limbs()
xSlice := x[:]
ySlice := y[:]
zSlice := z[:]
var pFromLimbs G1ProjectivePoint
pFromLimbs.FromLimbs(&xSlice, &ySlice, &zSlice)
assert.Equal(t, pFromLimbs, p)
}
func TestNewPointAffineNoInfinityBLS12_381Zero(t *testing.T) {
var zeroP G1PointAffine
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, zeroP.X, zeroSanity)
assert.Equal(t, zeroP.Y, zeroSanity)
}
func TestPointAffineNoInfinityBLS12_381FromLimbs(t *testing.T) {
// Initialize your test values
x := [12]uint32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
y := [12]uint32{9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}
xSlice := x[:]
ySlice := y[:]
// Execute your function
var result G1PointAffine
result.FromLimbs(&xSlice, &ySlice)
var xBase G1BaseField
var yBase G1BaseField
xBase.FromLimbs(x)
yBase.FromLimbs(y)
// Define your expected result
expected := &G1PointAffine{
X: xBase,
Y: yBase,
}
// Test if result is as expected
assert.Equal(t, result, expected)
}
func TestGetFixedLimbs(t *testing.T) {
t.Run("case of valid input of length less than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of valid input of length 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 8}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of empty input", func(t *testing.T) {
slice := []uint32{}
expected := [8]uint32{0, 0, 0, 0, 0, 0, 0, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of input length greater than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9}
defer func() {
if r := recover(); r == nil {
t.Errorf("the code did not panic")
}
}()
GetFixedLimbs(&slice)
})
}

View File

@@ -0,0 +1,112 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"encoding/binary"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_381
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
// G2 extension field
type G2Element [6]uint64
type ExtentionField struct {
A0, A1 G2Element
}
type G2PointAffine struct {
X, Y ExtentionField
}
type G2Point struct {
X, Y, Z ExtentionField
}
func (p *G2Point) Random() *G2Point {
outC := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(p))
C.random_g2_projective_bls12_381(outC)
return p
}
func (p *G2Point) FromAffine(affine *G2PointAffine) *G2Point {
out := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(p))
in := (*C.BLS12_381_g2_affine_t)(unsafe.Pointer(affine))
C.g2_projective_from_affine_bls12_381(out, in)
return p
}
func (p *G2Point) Eq(pCompare *G2Point) bool {
// Cast *PointBLS12_381 to *C.BLS12_381_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It's your responsibility to ensure that the types are compatible.
pC := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it's fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_g2_bls12_381(pC, pCompareC))
}
func (f *G2Element) ToBytesLe() []byte {
var bytes []byte
for _, val := range f {
buf := make([]byte, 8) // 8 bytes because uint64 is 64-bit
binary.LittleEndian.PutUint64(buf, val)
bytes = append(bytes, buf...)
}
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.BLS12_381_g2_affine_t)(unsafe.Pointer(p))
in := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(projective))
C.g2_projective_to_affine_bls12_381(out, in)
return p
}
func (p *G2Point) IsOnCurve() bool {
// Directly copy memory from the C struct to the Go struct
point := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(p))
res := C.g2_projective_is_on_curve_bls12_381(point)
return bool(res)
}

View File

@@ -0,0 +1,75 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestG2Eqg2(t *testing.T) {
var point G2Point
point.Random()
assert.True(t, point.Eq(&point))
}
func TestG2FromProjectiveToAffine(t *testing.T) {
var projective G2Point
var affine G2PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G2Point
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestG2Eqg2NotEqual(t *testing.T) {
var point G2Point
point.Random()
var point2 G2Point
point2.Random()
assert.False(t, point.Eq(&point2))
}
func TestG2ToBytes(t *testing.T) {
element := G2Element{0x6546098ea84b6298, 0x4a384533d1f68aca, 0xaa0666972d771336, 0x1569e4a34321993}
bytes := element.ToBytesLe()
assert.Equal(t, bytes, []byte{0x98, 0x62, 0x4b, 0xa8, 0x8e, 0x9, 0x46, 0x65, 0xca, 0x8a, 0xf6, 0xd1, 0x33, 0x45, 0x38, 0x4a, 0x36, 0x13, 0x77, 0x2d, 0x97, 0x66, 0x6, 0xaa, 0x93, 0x19, 0x32, 0x34, 0x4a, 0x9e, 0x56, 0x1})
}
func TestG2ShouldConvertToProjective(t *testing.T) {
var pointProjective G2Point
var pointAffine G2PointAffine
pointProjective.Random()
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))
}

View File

@@ -0,0 +1,98 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdbool.h>
// msm.h
#ifndef _BLS12_381_MSM_H
#define _BLS12_381_MSM_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BLS12_381 projective and affine structs
typedef struct BLS12_381_projective_t BLS12_381_projective_t;
typedef struct BLS12_381_g2_projective_t BLS12_381_g2_projective_t;
typedef struct BLS12_381_affine_t BLS12_381_affine_t;
typedef struct BLS12_381_g2_affine_t BLS12_381_g2_affine_t;
typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
typedef cudaStream_t CudaStream_t;
int msm_cuda_bls12_381(
BLS12_381_projective_t* out, BLS12_381_affine_t* points, BLS12_381_scalar_t* scalars, size_t count, size_t device_id);
int msm_batch_cuda_bls12_381(
BLS12_381_projective_t* out,
BLS12_381_affine_t* points,
BLS12_381_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_scalar_t* d_scalars,
BLS12_381_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_scalar_t* d_scalars,
BLS12_381_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id);
int msm_g2_cuda_bls12_381(
BLS12_381_g2_projective_t* out,
BLS12_381_g2_affine_t* points,
BLS12_381_scalar_t* scalars,
size_t count,
size_t device_id);
int msm_batch_g2_cuda_bls12_381(
BLS12_381_g2_projective_t* out,
BLS12_381_g2_affine_t* points,
BLS12_381_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_g2_cuda_bls12_381(
BLS12_381_g2_projective_t* d_out,
BLS12_381_scalar_t* d_scalars,
BLS12_381_g2_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_g2_cuda_bls12_381(
BLS12_381_g2_projective_t* d_out,
BLS12_381_scalar_t* d_scalars,
BLS12_381_g2_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BLS12_381_MSM_H */

View File

@@ -0,0 +1,195 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// ntt.h
#ifndef _BLS12_381_NTT_H
#define _BLS12_381_NTT_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BLS12_381 projective and affine structs
typedef struct BLS12_381_projective_t BLS12_381_projective_t;
typedef struct BLS12_381_affine_t BLS12_381_affine_t;
typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
typedef struct BLS12_381_g2_projective_t BLS12_381_g2_projective_t;
typedef struct BLS12_381_g2_affine_t BLS12_381_g2_affine_t;
int ntt_cuda_bls12_381(BLS12_381_scalar_t* arr, uint32_t n, bool inverse, size_t device_id);
int ntt_batch_cuda_bls12_381(
BLS12_381_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
int ecntt_cuda_bls12_381(BLS12_381_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
int ecntt_batch_cuda_bls12_381(
BLS12_381_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
BLS12_381_scalar_t*
build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
int interpolate_scalars_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_evaluations,
BLS12_381_scalar_t* d_domain,
unsigned n,
unsigned device_id,
size_t stream);
int interpolate_scalars_batch_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_evaluations,
BLS12_381_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_points_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_projective_t* d_evaluations,
BLS12_381_scalar_t* d_domain,
unsigned n,
size_t device_id,
size_t stream);
int interpolate_points_batch_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_projective_t* d_evaluations,
BLS12_381_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_scalars_on_coset_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_evaluations,
BLS12_381_scalar_t* d_domain,
unsigned n,
BLS12_381_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int interpolate_scalars_batch_on_coset_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_evaluations,
BLS12_381_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
BLS12_381_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_scalars_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id,
size_t stream);
int evaluate_scalars_batch_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_points_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_projective_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id,
size_t stream);
int evaluate_points_batch_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_projective_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_scalars_on_coset_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BLS12_381_scalar_t* coset_powers,
unsigned device_id,
size_t stream);
int evaluate_scalars_on_coset_batch_cuda_bls12_381(
BLS12_381_scalar_t* d_out,
BLS12_381_scalar_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BLS12_381_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_projective_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BLS12_381_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_batch_cuda_bls12_381(
BLS12_381_projective_t* d_out,
BLS12_381_projective_t* d_coefficients,
BLS12_381_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BLS12_381_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int reverse_order_scalars_cuda_bls12_381(BLS12_381_scalar_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_scalars_batch_cuda_bls12_381(
BLS12_381_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int reverse_order_points_cuda_bls12_381(BLS12_381_projective_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_points_batch_cuda_bls12_381(
BLS12_381_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int add_scalars_cuda_bls12_381(
BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_in1, BLS12_381_scalar_t* d_in2, unsigned n, size_t stream);
int sub_scalars_cuda_bls12_381(
BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_in1, BLS12_381_scalar_t* d_in2, unsigned n, size_t stream);
int to_montgomery_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_inout, unsigned n, size_t stream);
int from_montgomery_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_inout, unsigned n, size_t stream);
// points g1
int to_montgomery_proj_points_cuda_bls12_381(BLS12_381_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_cuda_bls12_381(BLS12_381_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_cuda_bls12_381(BLS12_381_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_cuda_bls12_381(BLS12_381_affine_t* d_inout, unsigned n, size_t stream);
// points g2
int to_montgomery_proj_points_g2_cuda_bls12_381(BLS12_381_g2_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_g2_cuda_bls12_381(BLS12_381_g2_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_g2_cuda_bls12_381(BLS12_381_g2_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_g2_cuda_bls12_381(BLS12_381_g2_affine_t* d_inout, unsigned n, size_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BLS12_381_NTT_H */

View File

@@ -0,0 +1,50 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// projective.h
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BLS12_381_projective_t BLS12_381_projective_t;
typedef struct BLS12_381_g2_projective_t BLS12_381_g2_projective_t;
typedef struct BLS12_381_affine_t BLS12_381_affine_t;
typedef struct BLS12_381_g2_affine_t BLS12_381_g2_affine_t;
typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
bool projective_is_on_curve_bls12_381(BLS12_381_projective_t* point1);
int random_scalar_bls12_381(BLS12_381_scalar_t* out);
int random_projective_bls12_381(BLS12_381_projective_t* out);
BLS12_381_projective_t* projective_zero_bls12_381();
int projective_to_affine_bls12_381(BLS12_381_affine_t* out, BLS12_381_projective_t* point1);
int projective_from_affine_bls12_381(BLS12_381_projective_t* out, BLS12_381_affine_t* point1);
int random_g2_projective_bls12_381(BLS12_381_g2_projective_t* out);
int g2_projective_to_affine_bls12_381(BLS12_381_g2_affine_t* out, BLS12_381_g2_projective_t* point1);
int g2_projective_from_affine_bls12_381(BLS12_381_g2_projective_t* out, BLS12_381_g2_affine_t* point1);
bool g2_projective_is_on_curve_bls12_381(BLS12_381_g2_projective_t* point1);
bool eq_bls12_381(BLS12_381_projective_t* point1, BLS12_381_projective_t* point2);
bool eq_g2_bls12_381(BLS12_381_g2_projective_t* point1, BLS12_381_g2_projective_t* point2);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,49 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// ve_mod_mult.h
#ifndef _BLS12_381_VEC_MULT_H
#define _BLS12_381_VEC_MULT_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BLS12_381_projective_t BLS12_381_projective_t;
typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
int32_t vec_mod_mult_point_bls12_381(
BLS12_381_projective_t* inout, BLS12_381_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_scalar_bls12_381(
BLS12_381_scalar_t* inout, BLS12_381_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_device_scalar_bls12_381(
BLS12_381_scalar_t* inout, BLS12_381_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
int32_t matrix_vec_mod_mult_bls12_381(
BLS12_381_scalar_t* matrix_flattened,
BLS12_381_scalar_t* input,
BLS12_381_scalar_t* output,
size_t n_elments,
size_t device_id);
#ifdef __cplusplus
}
#endif
#endif /* _BLS12_381_VEC_MULT_H */

View File

@@ -0,0 +1,208 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"errors"
"fmt"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_381
// #include "msm.h"
import "C"
func Msm(out *G1ProjectivePoint, points []G1PointAffine, scalars []G1ScalarField, device_id int) (*G1ProjectivePoint, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BLS12_381_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BLS12_381_projective_t)(unsafe.Pointer(out))
ret := C.msm_cuda_bls12_381(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_cuda_bls12_381 returned error code: %d", ret)
}
return out, nil
}
func MsmG2(out *G2Point, points []G2PointAffine, scalars []G1ScalarField, device_id int) (*G2Point, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BLS12_381_g2_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(out))
ret := C.msm_g2_cuda_bls12_381(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_g2_cuda_bls12_381 returned error code: %d", ret)
}
return out, nil
}
func MsmBatch(points *[]G1PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G1ProjectivePoint, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G1ProjectivePoint, batchSize)
for i := 0; i < len(out); i++ {
var p G1ProjectivePoint
p.SetZero()
out[i] = &p
}
outC := (*C.BLS12_381_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BLS12_381_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_cuda_bls12_381(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bls12_381 returned error code: %d", ret)
}
return out, nil
}
func MsmG2Batch(points *[]G2PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G2Point, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G2Point, batchSize)
outC := (*C.BLS12_381_g2_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BLS12_381_g2_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_g2_cuda_bls12_381(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bls12_381 returned error code: %d", ret)
}
return out, nil
}
func Commit(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BLS12_381_projective_t)(d_out)
scalarsC := (*C.BLS12_381_scalar_t)(d_scalars)
pointsC := (*C.BLS12_381_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_cuda_bls12_381(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BLS12_381_g2_projective_t)(d_out)
scalarsC := (*C.BLS12_381_scalar_t)(d_scalars)
pointsC := (*C.BLS12_381_g2_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_g2_cuda_bls12_381(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitBatch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BLS12_381_projective_t)(d_out)
scalarsC := (*C.BLS12_381_scalar_t)(d_scalars)
pointsC := (*C.BLS12_381_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.commit_batch_cuda_bls12_381(d_outC, scalarsC, pointsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2Batch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BLS12_381_g2_projective_t)(d_out)
scalarsC := (*C.BLS12_381_scalar_t)(d_scalars)
pointsC := (*C.BLS12_381_g2_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.msm_batch_g2_cuda_bls12_381(d_outC, pointsC, scalarsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}

View File

@@ -0,0 +1,355 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"fmt"
"math"
"testing"
"time"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
"github.com/stretchr/testify/assert"
)
func GeneratePoints(count int) []G1PointAffine {
// Declare a slice of integers
var points []G1PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var pointProjective G1ProjectivePoint
pointProjective.Random()
var pointAffine G1PointAffine
pointAffine.FromProjective(&pointProjective)
points = append(points, pointAffine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func GeneratePointsProj(count int) []G1ProjectivePoint {
// Declare a slice of integers
var points []G1ProjectivePoint
// Use a loop to populate the slice
for i := 0; i < count; i++ {
var p G1ProjectivePoint
p.Random()
points = append(points, p)
}
return points
}
func GenerateScalars(count int, skewed bool) []G1ScalarField {
// Declare a slice of integers
var scalars []G1ScalarField
var rand G1ScalarField
var zero G1ScalarField
var one G1ScalarField
var randLarge G1ScalarField
zero.SetZero()
one.SetOne()
randLarge.Random()
if skewed && count > 1_200_000 {
for i := 0; i < count-1_200_000; i++ {
rand.Random()
scalars = append(scalars, rand)
}
for i := 0; i < 600_000; i++ {
scalars = append(scalars, randLarge)
}
for i := 0; i < 400_000; i++ {
scalars = append(scalars, zero)
}
for i := 0; i < 200_000; i++ {
scalars = append(scalars, one)
}
} else {
for i := 0; i < count; i++ {
rand.Random()
scalars = append(scalars, rand)
}
}
return scalars[:count]
}
func TestMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out := new(G1ProjectivePoint)
startTime := time.Now()
_, e := Msm(out, points, scalars, 0) // non mont
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func TestCommitMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1<<v - 1
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := count * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := Commit(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G1ProjectivePoint, 1)
goicicle.CudaMemCpyDtoH[G1ProjectivePoint](outHost, out_d, 96)
assert.Equal(t, e, 0, "error should be 0")
assert.True(t, outHost[0].IsOnCurve())
}
}
func BenchmarkCommit(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := msmSize * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := msmSize * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
e := Commit(out_d, scalars_d, points_d, msmSize, 10)
if e != 0 {
panic("Error occured")
}
}
})
}
}
func TestBenchMSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GeneratePoints(count)
scalars := GenerateScalars(count, false)
a, e := MsmBatch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBLS12_381 returned an error: %v", e)
}
if len(a) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(a))
}
}
}
}
func BenchmarkMSM(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G1ProjectivePoint)
_, e := Msm(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
// G2
func GenerateG2Points(count int) []G2PointAffine {
// Declare a slice of integers
var points []G2PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var p G2Point
p.Random()
var affine G2PointAffine
affine.FromProjective(&p)
points = append(points, affine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func TestMsmG2BLS12381(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating scalars\n")
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func BenchmarkMsmG2BLS12381(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GenerateG2Points(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM G2 %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
func TestCommitG2MSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
var sizeCheckG2PointAffine G2PointAffine
inputPointsBytes := count * int(unsafe.Sizeof(sizeCheckG2PointAffine))
var sizeCheckG2Point G2Point
out_d, _ := goicicle.CudaMalloc(int(unsafe.Sizeof(sizeCheckG2Point)))
points_d, _ := goicicle.CudaMalloc(inputPointsBytes)
goicicle.CudaMemCpyHtoD[G2PointAffine](points_d, points, inputPointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := CommitG2(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G2Point, 1)
goicicle.CudaMemCpyDtoH[G2Point](outHost, out_d, int(unsafe.Sizeof(sizeCheckG2Point)))
assert.Equal(t, e, 0, "error should be 0")
assert.Equal(t, len(outHost), 1)
result := outHost[0]
assert.True(t, result.IsOnCurve())
}
}
func TestBatchG2MSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GenerateG2Points(count)
scalars := GenerateScalars(count, false)
pointsResults, e := MsmG2Batch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBLS12_381 returned an error: %v", e)
}
if len(pointsResults) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(pointsResults))
}
for _, s := range pointsResults {
assert.True(t, s.IsOnCurve())
}
}
}
}

View File

@@ -0,0 +1,221 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"errors"
"fmt"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_381
// #include "ntt.h"
import "C"
const (
NONE = 0
DIF = 1
DIT = 2
)
func Ntt(scalars *[]G1ScalarField, isInverse bool, deviceId int) uint64 {
scalarsC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
ret := C.ntt_cuda_bls12_381(scalarsC, C.uint32_t(len(*scalars)), C.bool(isInverse), C.size_t(deviceId))
return uint64(ret)
}
func NttBatch(scalars *[]G1ScalarField, isInverse bool, batchSize, deviceId int) uint64 {
scalarsC := (*C.BLS12_381_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
isInverseC := C.bool(isInverse)
batchSizeC := C.uint32_t(batchSize)
deviceIdC := C.size_t(deviceId)
ret := C.ntt_batch_cuda_bls12_381(scalarsC, C.uint32_t(len(*scalars)), batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNtt(values *[]G1ProjectivePoint, isInverse bool, deviceId int) uint64 {
valuesC := (*C.BLS12_381_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
ret := C.ecntt_cuda_bls12_381(valuesC, n, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNttBatch(values *[]G1ProjectivePoint, isInverse bool, batchSize, deviceId int) uint64 {
valuesC := (*C.BLS12_381_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
batchSizeC := C.uint32_t(batchSize)
ret := C.ecntt_batch_cuda_bls12_381(valuesC, n, batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func GenerateTwiddles(d_size int, log_d_size int, inverse bool) (up unsafe.Pointer, err error) {
domain_size := C.uint32_t(d_size)
logn := C.uint32_t(log_d_size)
is_inverse := C.bool(inverse)
dp := C.build_domain_cuda_bls12_381(domain_size, logn, is_inverse, 0, 0)
if dp == nil {
err = errors.New("nullptr returned from generating twiddles")
return unsafe.Pointer(nil), err
}
return unsafe.Pointer(dp), nil
}
// Reverses d_scalars in-place
func ReverseScalars(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BLS12_381_scalar_t)(d_scalars)
lenC := C.int(len)
if success := C.reverse_order_scalars_cuda_bls12_381(scalarsC, lenC, 0, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func Interpolate(scalars, twiddles, cosetPowers unsafe.Pointer, size int, isCoset bool) unsafe.Pointer {
size_d := size * 32
dp, err := goicicle.CudaMalloc(size_d)
if err != nil {
return nil
}
d_out := (*C.BLS12_381_scalar_t)(dp)
scalarsC := (*C.BLS12_381_scalar_t)(scalars)
twiddlesC := (*C.BLS12_381_scalar_t)(twiddles)
cosetPowersC := (*C.BLS12_381_scalar_t)(cosetPowers)
sizeC := C.uint(size)
var ret C.int
if isCoset {
ret = C.interpolate_scalars_on_coset_cuda_bls12_381(d_out, scalarsC, twiddlesC, sizeC, cosetPowersC, 0, 0)
} else {
ret = C.interpolate_scalars_cuda_bls12_381(d_out, scalarsC, twiddlesC, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
}
return unsafe.Pointer(d_out)
}
func Evaluate(scalars_out, scalars, twiddles, coset_powers unsafe.Pointer, scalars_size, twiddles_size int, isCoset bool) int {
scalars_outC := (*C.BLS12_381_scalar_t)(scalars_out)
scalarsC := (*C.BLS12_381_scalar_t)(scalars)
twiddlesC := (*C.BLS12_381_scalar_t)(twiddles)
coset_powersC := (*C.BLS12_381_scalar_t)(coset_powers)
sizeC := C.uint(scalars_size)
twiddlesC_size := C.uint(twiddles_size)
var ret C.int
if isCoset {
ret = C.evaluate_scalars_on_coset_cuda_bls12_381(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, coset_powersC, 0, 0)
} else {
ret = C.evaluate_scalars_cuda_bls12_381(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
return -1
}
return 0
}
func VecScalarAdd(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BLS12_381_scalar_t)(in1_d)
in2_dC := (*C.BLS12_381_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.add_scalars_cuda_bls12_381(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error adding scalar vectors")
return -1
}
return 0
}
func VecScalarSub(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BLS12_381_scalar_t)(in1_d)
in2_dC := (*C.BLS12_381_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.sub_scalars_cuda_bls12_381(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error subtracting scalar vectors")
return -1
}
return 0
}
func ToMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BLS12_381_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.to_montgomery_scalars_cuda_bls12_381(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func FromMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BLS12_381_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.from_montgomery_scalars_cuda_bls12_381(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BLS12_381_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_cuda_bls12_381(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func G2AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BLS12_381_g2_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_g2_cuda_bls12_381(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}

View File

@@ -0,0 +1,148 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
import (
"fmt"
"github.com/stretchr/testify/assert"
"reflect"
"testing"
)
func TestNttBLS12381BBB(t *testing.T) {
count := 1 << 20
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
NttBatch(&nttResult, false, count, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBLS12381CompareToGnarkDIF(t *testing.T) {
count := 1 << 2
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, DIF, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestINttBLS12381CompareToGnarkDIT(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, true, DIT, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBLS12381(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, NONE, 0)
assert.NotEqual(t, nttResult, scalars)
inttResult := make([]G1ScalarField, len(nttResult))
copy(inttResult, nttResult)
assert.Equal(t, inttResult, nttResult)
Ntt(&inttResult, true, NONE, 0)
assert.Equal(t, inttResult, scalars)
}
func TestNttBatchBLS12381(t *testing.T) {
count := 1 << 5
batches := 4
scalars := GenerateScalars(count*batches, false)
var scalarVecOfVec [][]G1ScalarField = make([][]G1ScalarField, 0)
for i := 0; i < batches; i++ {
start := i * count
end := (i + 1) * count
batch := make([]G1ScalarField, len(scalars[start:end]))
copy(batch, scalars[start:end])
scalarVecOfVec = append(scalarVecOfVec, batch)
}
nttBatchResult := make([]G1ScalarField, len(scalars))
copy(nttBatchResult, scalars)
NttBatch(&nttBatchResult, false, count, 0)
var nttResultVecOfVec [][]G1ScalarField
for i := 0; i < batches; i++ {
// Clone the slice
clone := make([]G1ScalarField, len(scalarVecOfVec[i]))
copy(clone, scalarVecOfVec[i])
// Add it to the result vector of vectors
nttResultVecOfVec = append(nttResultVecOfVec, clone)
// Call the ntt_bls12_381 function
Ntt(&nttResultVecOfVec[i], false, NONE, 0)
}
assert.NotEqual(t, nttBatchResult, scalars)
// Check that the ntt of each vec of scalars is equal to the intt of the specific batch
for i := 0; i < batches; i++ {
if !reflect.DeepEqual(nttResultVecOfVec[i], nttBatchResult[i*count:((i+1)*count)]) {
t.Errorf("ntt of vec of scalars not equal to intt of specific batch")
}
}
}
func BenchmarkNTT(b *testing.B) {
LOG_NTT_SIZES := []int{12, 15, 20, 21, 22, 23, 24, 25, 26}
for _, logNTTSize := range LOG_NTT_SIZES {
nttSize := 1 << logNTTSize
b.Run(fmt.Sprintf("NTT %d", logNTTSize), func(b *testing.B) {
scalars := GenerateScalars(nttSize, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
for n := 0; n < b.N; n++ {
Ntt(&nttResult, false, NONE, 0)
}
})
}
}

View File

@@ -0,0 +1,38 @@
package bls12381
import "encoding/binary"
// Function to convert [8]uint32 to [4]uint64
func ConvertUint32ArrToUint64Arr(arr32 [8]uint32) [4]uint64 {
var arr64 [4]uint64
for i := 0; i < len(arr32); i += 2 {
arr64[i/2] = (uint64(arr32[i]) << 32) | uint64(arr32[i+1])
}
return arr64
}
func ConvertUint64ArrToUint32Arr4(arr64 [4]uint64) [8]uint32 {
var arr32 [8]uint32
for i, v := range arr64 {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, v)
arr32[i*2] = binary.LittleEndian.Uint32(b[0:4])
arr32[i*2+1] = binary.LittleEndian.Uint32(b[4:8])
}
return arr32
}
func ConvertUint64ArrToUint32Arr6(arr64 [6]uint64) [12]uint32 {
var arr32 [12]uint32
for i, v := range arr64 {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, v)
arr32[i*2] = binary.LittleEndian.Uint32(b[0:4])
arr32[i*2+1] = binary.LittleEndian.Uint32(b[4:8])
}
return arr32
}

View File

@@ -0,0 +1,81 @@
package bls12381
import (
"testing"
)
func TestConvertUint32ArrToUint64Arr(t *testing.T) {
testCases := []struct {
name string
input [8]uint32
want [4]uint64
}{
{
name: "Test with incremental array",
input: [8]uint32{1, 2, 3, 4, 5, 6, 7, 8},
want: [4]uint64{4294967298, 12884901892, 21474836486, 30064771080},
},
{
name: "Test with all zeros",
input: [8]uint32{0, 0, 0, 0, 0, 0, 0, 0},
want: [4]uint64{0, 0, 0, 0},
},
{
name: "Test with maximum uint32 values",
input: [8]uint32{4294967295, 4294967295, 4294967295, 4294967295, 4294967295, 4294967295, 4294967295, 4294967295},
want: [4]uint64{18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615},
},
{
name: "Test with alternating min and max uint32 values",
input: [8]uint32{0, 4294967295, 0, 4294967295, 0, 4294967295, 0, 4294967295},
want: [4]uint64{4294967295, 4294967295, 4294967295, 4294967295},
},
{
name: "Test with alternating max and min uint32 values",
input: [8]uint32{4294967295, 0, 4294967295, 0, 4294967295, 0, 4294967295, 0},
want: [4]uint64{18446744069414584320, 18446744069414584320, 18446744069414584320, 18446744069414584320},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
got := ConvertUint32ArrToUint64Arr(tc.input)
if got != tc.want {
t.Errorf("got %v, want %v", got, tc.want)
}
})
}
}
func TestConvertUint64ArrToUint32Arr(t *testing.T) {
testCases := []struct {
name string
input [6]uint64
expected [12]uint32
}{
{
name: "test one",
input: [6]uint64{1, 2, 3, 4, 5, 6},
expected: [12]uint32{1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0},
},
{
name: "test two",
input: [6]uint64{100, 200, 300, 400, 500, 600},
expected: [12]uint32{100, 0, 200, 0, 300, 0, 400, 0, 500, 0, 600, 0},
},
{
name: "test three",
input: [6]uint64{1000, 2000, 3000, 4000, 5000, 6000},
expected: [12]uint32{1000, 0, 2000, 0, 3000, 0, 4000, 0, 5000, 0, 6000, 0},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
got := ConvertUint64ArrToUint32Arr6(tc.input)
if got != tc.expected {
t.Errorf("got %v, want %v", got, tc.expected)
}
})
}
}

View File

@@ -0,0 +1,41 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bls12381
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbls12_381
// #include "ve_mod_mult.h"
import "C"
import (
"fmt"
"unsafe"
)
func VecScalarMulMod(scalarVec1, scalarVec2 unsafe.Pointer, size int) int {
scalarVec1C := (*C.BLS12_381_scalar_t)(scalarVec1)
scalarVec2C := (*C.BLS12_381_scalar_t)(scalarVec2)
sizeC := C.size_t(size)
ret := C.vec_mod_mult_device_scalar_bls12_381(scalarVec1C, scalarVec2C, sizeC, 0)
if ret != 0 {
fmt.Print("error multiplying scalar vectors")
return -1
}
return 0
}

324
goicicle/curves/bn254/g1.go Normal file
View File

@@ -0,0 +1,324 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"unsafe"
"encoding/binary"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbn254
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
const SCALAR_SIZE = 8
const BASE_SIZE = 8
type G1ScalarField struct {
S [SCALAR_SIZE]uint32
}
type G1BaseField struct {
S [BASE_SIZE]uint32
}
/*
* BaseField Constrctors
*/
func (f *G1BaseField) SetZero() *G1BaseField {
var S [BASE_SIZE]uint32
f.S = S
return f
}
func (f *G1BaseField) SetOne() *G1BaseField {
var S [BASE_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (p *G1ProjectivePoint) FromAffine(affine *G1PointAffine) *G1ProjectivePoint {
out := (*C.BN254_projective_t)(unsafe.Pointer(p))
in := (*C.BN254_affine_t)(unsafe.Pointer(affine))
C.projective_from_affine_bn254(out, in)
return p
}
func (f *G1BaseField) FromLimbs(limbs [BASE_SIZE]uint32) *G1BaseField {
copy(f.S[:], limbs[:])
return f
}
/*
* BaseField methods
*/
func (f *G1BaseField) Limbs() [BASE_SIZE]uint32 {
return f.S
}
func (f *G1BaseField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* ScalarField methods
*/
func (p *G1ScalarField) Random() *G1ScalarField {
outC := (*C.BN254_scalar_t)(unsafe.Pointer(p))
C.random_scalar_bn254(outC)
return p
}
func (f *G1ScalarField) SetZero() *G1ScalarField {
var S [SCALAR_SIZE]uint32
f.S = S
return f
}
func (f *G1ScalarField) SetOne() *G1ScalarField {
var S [SCALAR_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (a *G1ScalarField) Eq(b *G1ScalarField) bool {
for i, v := range a.S {
if b.S[i] != v {
return false
}
}
return true
}
/*
* ScalarField methods
*/
func (f *G1ScalarField) Limbs() [SCALAR_SIZE]uint32 {
return f.S
}
func (f *G1ScalarField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* PointBN254
*/
type G1ProjectivePoint struct {
X, Y, Z G1BaseField
}
func (f *G1ProjectivePoint) SetZero() *G1ProjectivePoint {
var yOne G1BaseField
yOne.SetOne()
var xZero G1BaseField
xZero.SetZero()
var zZero G1BaseField
zZero.SetZero()
f.X = xZero
f.Y = yOne
f.Z = zZero
return f
}
func (p *G1ProjectivePoint) Eq(pCompare *G1ProjectivePoint) bool {
// Cast *PointBN254 to *C.BN254_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It'S your responsibility to ensure that the types are compatible.
pC := (*C.BN254_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BN254_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it'S fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_bn254(pC, pCompareC))
}
func (p *G1ProjectivePoint) IsOnCurve() bool {
point := (*C.BN254_projective_t)(unsafe.Pointer(p))
res := C.projective_is_on_curve_bn254(point)
return bool(res)
}
func (p *G1ProjectivePoint) Random() *G1ProjectivePoint {
outC := (*C.BN254_projective_t)(unsafe.Pointer(p))
C.random_projective_bn254(outC)
return p
}
func (p *G1ProjectivePoint) StripZ() *G1PointAffine {
return &G1PointAffine{
X: p.X,
Y: p.Y,
}
}
func (p *G1ProjectivePoint) FromLimbs(x, y, z *[]uint32) *G1ProjectivePoint {
var _x G1BaseField
var _y G1BaseField
var _z G1BaseField
_x.FromLimbs(GetFixedLimbs(x))
_y.FromLimbs(GetFixedLimbs(y))
_z.FromLimbs(GetFixedLimbs(z))
p.X = _x
p.Y = _y
p.Z = _z
return p
}
/*
* PointAffineNoInfinityBN254
*/
type G1PointAffine struct {
X, Y G1BaseField
}
func (p *G1PointAffine) FromProjective(projective *G1ProjectivePoint) *G1PointAffine {
in := (*C.BN254_projective_t)(unsafe.Pointer(projective))
out := (*C.BN254_affine_t)(unsafe.Pointer(p))
C.projective_to_affine_bn254(out,in)
return p
}
func (p *G1PointAffine) ToProjective() *G1ProjectivePoint {
var Z G1BaseField
Z.SetOne()
return &G1ProjectivePoint{
X: p.X,
Y: p.Y,
Z: Z,
}
}
func (p *G1PointAffine) FromLimbs(X, Y *[]uint32) *G1PointAffine {
var _x G1BaseField
var _y G1BaseField
_x.FromLimbs(GetFixedLimbs(X))
_y.FromLimbs(GetFixedLimbs(Y))
return p
}
/*
* Multiplication
*/
func MultiplyVec(a []G1ProjectivePoint, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
pointsC := (*C.BN254_projective_t)(unsafe.Pointer(&a[0]))
scalarsC := (*C.BN254_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_point_bn254(pointsC, scalarsC, nElementsC, deviceIdC)
}
func MultiplyScalar(a []G1ScalarField, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
aC := (*C.BN254_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BN254_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_scalar_bn254(aC, bC, nElementsC, deviceIdC)
}
// Multiply a matrix by a scalar:
//
// `a` - flattenned matrix;
// `b` - vector to multiply `a` by;
func MultiplyMatrix(a []G1ScalarField, b []G1ScalarField, deviceID int) {
c := make([]G1ScalarField, len(b))
for i := range c {
var p G1ScalarField
p.SetZero()
c[i] = p
}
aC := (*C.BN254_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.BN254_scalar_t)(unsafe.Pointer(&b[0]))
cC := (*C.BN254_scalar_t)(unsafe.Pointer(&c[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.matrix_vec_mod_mult_bn254(aC, bC, cC, nElementsC, deviceIdC)
}
/*
* Utils
*/
func GetFixedLimbs(slice *[]uint32) [BASE_SIZE]uint32 {
if len(*slice) <= BASE_SIZE {
limbs := [BASE_SIZE]uint32{}
copy(limbs[:len(*slice)], *slice)
return limbs
}
panic("slice has too many elements")
}

View File

@@ -0,0 +1,198 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"encoding/binary"
"testing"
"github.com/stretchr/testify/assert"
)
func TestNewFieldBN254One(t *testing.T) {
var oneField G1BaseField
oneField.SetOne()
rawOneField := [8]uint32([8]uint32{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, oneField.S, rawOneField)
}
func TestNewFieldBN254Zero(t *testing.T) {
var zeroField G1BaseField
zeroField.SetZero()
rawZeroField := [8]uint32([8]uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, zeroField.S, rawZeroField)
}
func TestFieldBN254ToBytesLe(t *testing.T) {
var p G1ProjectivePoint
p.Random()
expected := make([]byte, len(p.X.S)*4) // each uint32 takes 4 bytes
for i, v := range p.X.S {
binary.LittleEndian.PutUint32(expected[i*4:], v)
}
assert.Equal(t, p.X.ToBytesLe(), expected)
assert.Equal(t, len(p.X.ToBytesLe()), 32)
}
func TestNewPointBN254Zero(t *testing.T) {
var pointZero G1ProjectivePoint
pointZero.SetZero()
var baseOne G1BaseField
baseOne.SetOne()
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, pointZero.X, zeroSanity)
assert.Equal(t, pointZero.Y, baseOne)
assert.Equal(t, pointZero.Z, zeroSanity)
}
func TestFromProjectiveToAffine(t *testing.T) {
var projective G1ProjectivePoint
var affine G1PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G1ProjectivePoint
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestBN254Eq(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
var p2 G1ProjectivePoint
p2.Random()
assert.Equal(t, p1.Eq(&p1), true)
assert.Equal(t, p1.Eq(&p2), false)
}
func TestBN254StripZ(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
p2ZLess := p1.StripZ()
assert.IsType(t, G1PointAffine{}, *p2ZLess)
assert.Equal(t, p1.X, p2ZLess.X)
assert.Equal(t, p1.Y, p2ZLess.Y)
}
func TestPointBN254fromLimbs(t *testing.T) {
var p G1ProjectivePoint
p.Random()
x := p.X.Limbs()
y := p.Y.Limbs()
z := p.Z.Limbs()
xSlice := x[:]
ySlice := y[:]
zSlice := z[:]
var pFromLimbs G1ProjectivePoint
pFromLimbs.FromLimbs(&xSlice, &ySlice, &zSlice)
assert.Equal(t, pFromLimbs, p)
}
func TestNewPointAffineNoInfinityBN254Zero(t *testing.T) {
var zeroP G1PointAffine
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, zeroP.X, zeroSanity)
assert.Equal(t, zeroP.Y, zeroSanity)
}
func TestPointAffineNoInfinityBN254FromLimbs(t *testing.T) {
// Initialize your test values
x := [8]uint32{1, 2, 3, 4, 5, 6, 7, 8}
y := [8]uint32{9, 10, 11, 12, 13, 14, 15, 16}
xSlice := x[:]
ySlice := y[:]
// Execute your function
var result G1PointAffine
result.FromLimbs(&xSlice, &ySlice)
var xBase G1BaseField
var yBase G1BaseField
xBase.FromLimbs(x)
yBase.FromLimbs(y)
// Define your expected result
expected := &G1PointAffine{
X: xBase,
Y: yBase,
}
// Test if result is as expected
assert.Equal(t, result, expected)
}
func TestGetFixedLimbs(t *testing.T) {
t.Run("case of valid input of length less than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of valid input of length 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 8}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of empty input", func(t *testing.T) {
slice := []uint32{}
expected := [8]uint32{0, 0, 0, 0, 0, 0, 0, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of input length greater than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9}
defer func() {
if r := recover(); r == nil {
t.Errorf("the code did not panic")
}
}()
GetFixedLimbs(&slice)
})
}

113
goicicle/curves/bn254/g2.go Normal file
View File

@@ -0,0 +1,113 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"encoding/binary"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbn254
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
// G2 extension field
type G2Element [4]uint64
type ExtentionField struct {
A0, A1 G2Element
}
type G2PointAffine struct {
X, Y ExtentionField
}
type G2Point struct {
X, Y, Z ExtentionField
}
func (p *G2Point) Random() *G2Point {
outC := (*C.BN254_g2_projective_t)(unsafe.Pointer(p))
C.random_g2_projective_bn254(outC)
return p
}
func (p *G2Point) FromAffine(affine *G2PointAffine) *G2Point {
out := (*C.BN254_g2_projective_t)(unsafe.Pointer(p))
in := (*C.BN254_g2_affine_t)(unsafe.Pointer(affine))
C.g2_projective_from_affine_bn254(out, in)
return p
}
func (p *G2Point) Eq(pCompare *G2Point) bool {
// Cast *PointBN254 to *C.BN254_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It's your responsibility to ensure that the types are compatible.
pC := (*C.BN254_g2_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.BN254_g2_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it's fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_g2_bn254(pC, pCompareC))
}
func (f *G2Element) ToBytesLe() []byte {
var bytes []byte
for _, val := range f {
buf := make([]byte, 8) // 8 bytes because uint64 is 64-bit
binary.LittleEndian.PutUint64(buf, val)
bytes = append(bytes, buf...)
}
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.BN254_g2_affine_t)(unsafe.Pointer(p))
in := (*C.BN254_g2_projective_t)(unsafe.Pointer(projective))
C.g2_projective_to_affine_bn254(out, in)
return p
}
func (p *G2Point) IsOnCurve() bool {
// Directly copy memory from the C struct to the Go struct
point := (*C.BN254_g2_projective_t)(unsafe.Pointer(p))
res := C.g2_projective_is_on_curve_bn254(point)
return bool(res)
}

View File

@@ -0,0 +1,76 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestG2Eqg2(t *testing.T) {
var point G2Point
point.Random()
assert.True(t, point.Eq(&point))
}
func TestG2FromProjectiveToAffine(t *testing.T) {
var projective G2Point
var affine G2PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G2Point
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestG2Eqg2NotEqual(t *testing.T) {
var point G2Point
point.Random()
var point2 G2Point
point2.Random()
assert.False(t, point.Eq(&point2))
}
func TestG2ToBytes(t *testing.T) {
element := G2Element{0x6546098ea84b6298, 0x4a384533d1f68aca, 0xaa0666972d771336, 0x1569e4a34321993}
bytes := element.ToBytesLe()
assert.Equal(t, bytes, []byte{0x98, 0x62, 0x4b, 0xa8, 0x8e, 0x9, 0x46, 0x65, 0xca, 0x8a, 0xf6, 0xd1, 0x33, 0x45, 0x38, 0x4a, 0x36, 0x13, 0x77, 0x2d, 0x97, 0x66, 0x6, 0xaa, 0x93, 0x19, 0x32, 0x34, 0x4a, 0x9e, 0x56, 0x1})
}
func TestG2ShouldConvertToProjective(t *testing.T) {
var pointProjective G2Point
var pointAffine G2PointAffine
pointProjective.Random()
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))
}

View File

@@ -0,0 +1,94 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdbool.h>
// msm.h
#ifndef _BN254_MSM_H
#define _BN254_MSM_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BN254 projective and affine structs
typedef struct BN254_projective_t BN254_projective_t;
typedef struct BN254_g2_projective_t BN254_g2_projective_t;
typedef struct BN254_affine_t BN254_affine_t;
typedef struct BN254_g2_affine_t BN254_g2_affine_t;
typedef struct BN254_scalar_t BN254_scalar_t;
typedef cudaStream_t CudaStream_t;
int msm_cuda_bn254(
BN254_projective_t* out, BN254_affine_t* points, BN254_scalar_t* scalars, size_t count, size_t device_id);
int msm_batch_cuda_bn254(
BN254_projective_t* out,
BN254_affine_t* points,
BN254_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_cuda_bn254(
BN254_projective_t* d_out,
BN254_scalar_t* d_scalars,
BN254_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_cuda_bn254(
BN254_projective_t* d_out,
BN254_scalar_t* d_scalars,
BN254_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id);
int msm_g2_cuda_bn254(
BN254_g2_projective_t* out, BN254_g2_affine_t* points, BN254_scalar_t* scalars, size_t count, size_t device_id);
int msm_batch_g2_cuda_bn254(
BN254_g2_projective_t* out,
BN254_g2_affine_t* points,
BN254_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_g2_cuda_bn254(
BN254_g2_projective_t* d_out,
BN254_scalar_t* d_scalars,
BN254_g2_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_g2_cuda_bn254(
BN254_g2_projective_t* d_out,
BN254_scalar_t* d_scalars,
BN254_g2_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BN254_MSM_H */

View File

@@ -0,0 +1,193 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// ntt.h
#ifndef _BN254_NTT_H
#define _BN254_NTT_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of BN254 projective and affine structs
typedef struct BN254_projective_t BN254_projective_t;
typedef struct BN254_affine_t BN254_affine_t;
typedef struct BN254_scalar_t BN254_scalar_t;
typedef struct BN254_g2_projective_t BN254_g2_projective_t;
typedef struct BN254_g2_affine_t BN254_g2_affine_t;
int ntt_cuda_bn254(BN254_scalar_t* arr, uint32_t n, bool inverse, size_t device_id);
int ntt_batch_cuda_bn254(BN254_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
int ecntt_cuda_bn254(BN254_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
int ecntt_batch_cuda_bn254(
BN254_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
BN254_scalar_t*
build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
int interpolate_scalars_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_evaluations,
BN254_scalar_t* d_domain,
unsigned n,
unsigned device_id,
size_t stream);
int interpolate_scalars_batch_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_evaluations,
BN254_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_points_cuda_bn254(
BN254_projective_t* d_out,
BN254_projective_t* d_evaluations,
BN254_scalar_t* d_domain,
unsigned n,
size_t device_id,
size_t stream);
int interpolate_points_batch_cuda_bn254(
BN254_projective_t* d_out,
BN254_projective_t* d_evaluations,
BN254_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_scalars_on_coset_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_evaluations,
BN254_scalar_t* d_domain,
unsigned n,
BN254_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int interpolate_scalars_batch_on_coset_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_evaluations,
BN254_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
BN254_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_scalars_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id,
size_t stream);
int evaluate_scalars_batch_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_points_cuda_bn254(
BN254_projective_t* d_out,
BN254_projective_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id,
size_t stream);
int evaluate_points_batch_cuda_bn254(
BN254_projective_t* d_out,
BN254_projective_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_scalars_on_coset_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BN254_scalar_t* coset_powers,
unsigned device_id,
size_t stream);
int evaluate_scalars_on_coset_batch_cuda_bn254(
BN254_scalar_t* d_out,
BN254_scalar_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BN254_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_cuda_bn254(
BN254_projective_t* d_out,
BN254_projective_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BN254_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_batch_cuda_bn254(
BN254_projective_t* d_out,
BN254_projective_t* d_coefficients,
BN254_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BN254_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int reverse_order_scalars_cuda_bn254(BN254_scalar_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_scalars_batch_cuda_bn254(BN254_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int reverse_order_points_cuda_bn254(BN254_projective_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_points_batch_cuda_bn254(
BN254_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int add_scalars_cuda_bn254(
BN254_scalar_t* d_out, BN254_scalar_t* d_in1, BN254_scalar_t* d_in2, unsigned n, size_t stream);
int sub_scalars_cuda_bn254(
BN254_scalar_t* d_out, BN254_scalar_t* d_in1, BN254_scalar_t* d_in2, unsigned n, size_t stream);
int to_montgomery_scalars_cuda_bn254(BN254_scalar_t* d_inout, unsigned n, size_t stream);
int from_montgomery_scalars_cuda_bn254(BN254_scalar_t* d_inout, unsigned n, size_t stream);
// points g1
int to_montgomery_proj_points_cuda_bn254(BN254_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_cuda_bn254(BN254_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_cuda_bn254(BN254_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_cuda_bn254(BN254_affine_t* d_inout, unsigned n, size_t stream);
// points g2
int to_montgomery_proj_points_g2_cuda_bn254(BN254_g2_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_g2_cuda_bn254(BN254_g2_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_g2_cuda_bn254(BN254_g2_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_g2_cuda_bn254(BN254_g2_affine_t* d_inout, unsigned n, size_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _BN254_NTT_H */

View File

@@ -0,0 +1,50 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// projective.h
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BN254_projective_t BN254_projective_t;
typedef struct BN254_g2_projective_t BN254_g2_projective_t;
typedef struct BN254_affine_t BN254_affine_t;
typedef struct BN254_g2_affine_t BN254_g2_affine_t;
typedef struct BN254_scalar_t BN254_scalar_t;
bool projective_is_on_curve_bn254(BN254_projective_t* point1);
int random_scalar_bn254(BN254_scalar_t* out);
int random_projective_bn254(BN254_projective_t* out);
BN254_projective_t* projective_zero_bn254();
int projective_to_affine_bn254(BN254_affine_t* out, BN254_projective_t* point1);
int projective_from_affine_bn254(BN254_projective_t* out, BN254_affine_t* point1);
int random_g2_projective_bn254(BN254_g2_projective_t* out);
int g2_projective_to_affine_bn254(BN254_g2_affine_t* out, BN254_g2_projective_t* point1);
int g2_projective_from_affine_bn254(BN254_g2_projective_t* out, BN254_g2_affine_t* point1);
bool g2_projective_is_on_curve_bn254(BN254_g2_projective_t* point1);
bool eq_bn254(BN254_projective_t* point1, BN254_projective_t* point2);
bool eq_g2_bn254(BN254_g2_projective_t* point1, BN254_g2_projective_t* point2);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,45 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
#include <cuda.h>
#include <stdbool.h>
// ve_mod_mult.h
#ifndef _BN254_VEC_MULT_H
#define _BN254_VEC_MULT_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct BN254_projective_t BN254_projective_t;
typedef struct BN254_scalar_t BN254_scalar_t;
int32_t
vec_mod_mult_point_bn254(BN254_projective_t* inout, BN254_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t
vec_mod_mult_scalar_bn254(BN254_scalar_t* inout, BN254_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_device_scalar_bn254(
BN254_scalar_t* inout, BN254_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
int32_t matrix_vec_mod_mult_bn254(
BN254_scalar_t* matrix_flattened, BN254_scalar_t* input, BN254_scalar_t* output, size_t n_elments, size_t device_id);
#ifdef __cplusplus
}
#endif
#endif /* _BN254_VEC_MULT_H */

View File

@@ -0,0 +1,208 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"errors"
"fmt"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbn254
// #include "msm.h"
import "C"
func Msm(out *G1ProjectivePoint, points []G1PointAffine, scalars []G1ScalarField, device_id int) (*G1ProjectivePoint, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BN254_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BN254_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BN254_projective_t)(unsafe.Pointer(out))
ret := C.msm_cuda_bn254(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_cuda_bn254 returned error code: %d", ret)
}
return out, nil
}
func MsmG2(out *G2Point, points []G2PointAffine, scalars []G1ScalarField, device_id int) (*G2Point, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.BN254_g2_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.BN254_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.BN254_g2_projective_t)(unsafe.Pointer(out))
ret := C.msm_g2_cuda_bn254(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_g2_cuda_bn254 returned error code: %d", ret)
}
return out, nil
}
func MsmBatch(points *[]G1PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G1ProjectivePoint, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G1ProjectivePoint, batchSize)
for i := 0; i < len(out); i++ {
var p G1ProjectivePoint
p.SetZero()
out[i] = &p
}
outC := (*C.BN254_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BN254_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BN254_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_cuda_bn254(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bn254 returned error code: %d", ret)
}
return out, nil
}
func MsmG2Batch(points *[]G2PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G2Point, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G2Point, batchSize)
outC := (*C.BN254_g2_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.BN254_g2_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.BN254_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_g2_cuda_bn254(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_bn254 returned error code: %d", ret)
}
return out, nil
}
func Commit(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BN254_projective_t)(d_out)
scalarsC := (*C.BN254_scalar_t)(d_scalars)
pointsC := (*C.BN254_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_cuda_bn254(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.BN254_g2_projective_t)(d_out)
scalarsC := (*C.BN254_scalar_t)(d_scalars)
pointsC := (*C.BN254_g2_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_g2_cuda_bn254(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitBatch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BN254_projective_t)(d_out)
scalarsC := (*C.BN254_scalar_t)(d_scalars)
pointsC := (*C.BN254_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.commit_batch_cuda_bn254(d_outC, scalarsC, pointsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2Batch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.BN254_g2_projective_t)(d_out)
scalarsC := (*C.BN254_scalar_t)(d_scalars)
pointsC := (*C.BN254_g2_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.msm_batch_g2_cuda_bn254(d_outC, pointsC, scalarsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}

View File

@@ -0,0 +1,355 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"fmt"
"math"
"testing"
"time"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
"github.com/stretchr/testify/assert"
)
func GeneratePoints(count int) []G1PointAffine {
// Declare a slice of integers
var points []G1PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var pointProjective G1ProjectivePoint
pointProjective.Random()
var pointAffine G1PointAffine
pointAffine.FromProjective(&pointProjective)
points = append(points, pointAffine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func GeneratePointsProj(count int) []G1ProjectivePoint {
// Declare a slice of integers
var points []G1ProjectivePoint
// Use a loop to populate the slice
for i := 0; i < count; i++ {
var p G1ProjectivePoint
p.Random()
points = append(points, p)
}
return points
}
func GenerateScalars(count int, skewed bool) []G1ScalarField {
// Declare a slice of integers
var scalars []G1ScalarField
var rand G1ScalarField
var zero G1ScalarField
var one G1ScalarField
var randLarge G1ScalarField
zero.SetZero()
one.SetOne()
randLarge.Random()
if skewed && count > 1_200_000 {
for i := 0; i < count-1_200_000; i++ {
rand.Random()
scalars = append(scalars, rand)
}
for i := 0; i < 600_000; i++ {
scalars = append(scalars, randLarge)
}
for i := 0; i < 400_000; i++ {
scalars = append(scalars, zero)
}
for i := 0; i < 200_000; i++ {
scalars = append(scalars, one)
}
} else {
for i := 0; i < count; i++ {
rand.Random()
scalars = append(scalars, rand)
}
}
return scalars[:count]
}
func TestMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out := new(G1ProjectivePoint)
startTime := time.Now()
_, e := Msm(out, points, scalars, 0) // non mont
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func TestCommitMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1<<v - 1
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := count * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := Commit(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G1ProjectivePoint, 1)
goicicle.CudaMemCpyDtoH[G1ProjectivePoint](outHost, out_d, 96)
assert.Equal(t, e, 0, "error should be 0")
assert.True(t, outHost[0].IsOnCurve())
}
}
func BenchmarkCommit(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := msmSize * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := msmSize * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
e := Commit(out_d, scalars_d, points_d, msmSize, 10)
if e != 0 {
panic("Error occured")
}
}
})
}
}
func TestBenchMSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GeneratePoints(count)
scalars := GenerateScalars(count, false)
a, e := MsmBatch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBN254 returned an error: %v", e)
}
if len(a) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(a))
}
}
}
}
func BenchmarkMSM(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G1ProjectivePoint)
_, e := Msm(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
// G2
func GenerateG2Points(count int) []G2PointAffine {
// Declare a slice of integers
var points []G2PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var p G2Point
p.Random()
var affine G2PointAffine
affine.FromProjective(&p)
points = append(points, affine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func TestMsmG2BN254(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating scalars\n")
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func BenchmarkMsmG2BN254(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GenerateG2Points(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM G2 %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
func TestCommitG2MSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
var sizeCheckG2PointAffine G2PointAffine
inputPointsBytes := count * int(unsafe.Sizeof(sizeCheckG2PointAffine))
var sizeCheckG2Point G2Point
out_d, _ := goicicle.CudaMalloc(int(unsafe.Sizeof(sizeCheckG2Point)))
points_d, _ := goicicle.CudaMalloc(inputPointsBytes)
goicicle.CudaMemCpyHtoD[G2PointAffine](points_d, points, inputPointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := CommitG2(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G2Point, 1)
goicicle.CudaMemCpyDtoH[G2Point](outHost, out_d, int(unsafe.Sizeof(sizeCheckG2Point)))
assert.Equal(t, e, 0, "error should be 0")
assert.Equal(t, len(outHost), 1)
result := outHost[0]
assert.True(t, result.IsOnCurve())
}
}
func TestBatchG2MSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GenerateG2Points(count)
scalars := GenerateScalars(count, false)
pointsResults, e := MsmG2Batch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatchBN254 returned an error: %v", e)
}
if len(pointsResults) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(pointsResults))
}
for _, s := range pointsResults {
assert.True(t, s.IsOnCurve())
}
}
}
}

View File

@@ -0,0 +1,221 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"errors"
"fmt"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbn254
// #include "ntt.h"
import "C"
const (
NONE = 0
DIF = 1
DIT = 2
)
func Ntt(scalars *[]G1ScalarField, isInverse bool, deviceId int) uint64 {
scalarsC := (*C.BN254_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
ret := C.ntt_cuda_bn254(scalarsC, C.uint32_t(len(*scalars)), C.bool(isInverse), C.size_t(deviceId))
return uint64(ret)
}
func NttBatch(scalars *[]G1ScalarField, isInverse bool, batchSize, deviceId int) uint64 {
scalarsC := (*C.BN254_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
isInverseC := C.bool(isInverse)
batchSizeC := C.uint32_t(batchSize)
deviceIdC := C.size_t(deviceId)
ret := C.ntt_batch_cuda_bn254(scalarsC, C.uint32_t(len(*scalars)), batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNtt(values *[]G1ProjectivePoint, isInverse bool, deviceId int) uint64 {
valuesC := (*C.BN254_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
ret := C.ecntt_cuda_bn254(valuesC, n, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNttBatch(values *[]G1ProjectivePoint, isInverse bool, batchSize, deviceId int) uint64 {
valuesC := (*C.BN254_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
batchSizeC := C.uint32_t(batchSize)
ret := C.ecntt_batch_cuda_bn254(valuesC, n, batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func GenerateTwiddles(d_size int, log_d_size int, inverse bool) (up unsafe.Pointer, err error) {
domain_size := C.uint32_t(d_size)
logn := C.uint32_t(log_d_size)
is_inverse := C.bool(inverse)
dp := C.build_domain_cuda_bn254(domain_size, logn, is_inverse, 0, 0)
if dp == nil {
err = errors.New("nullptr returned from generating twiddles")
return unsafe.Pointer(nil), err
}
return unsafe.Pointer(dp), nil
}
// Reverses d_scalars in-place
func ReverseScalars(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BN254_scalar_t)(d_scalars)
lenC := C.int(len)
if success := C.reverse_order_scalars_cuda_bn254(scalarsC, lenC, 0, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func Interpolate(scalars, twiddles, cosetPowers unsafe.Pointer, size int, isCoset bool) unsafe.Pointer {
size_d := size * 32
dp, err := goicicle.CudaMalloc(size_d)
if err != nil {
return nil
}
d_out := (*C.BN254_scalar_t)(dp)
scalarsC := (*C.BN254_scalar_t)(scalars)
twiddlesC := (*C.BN254_scalar_t)(twiddles)
cosetPowersC := (*C.BN254_scalar_t)(cosetPowers)
sizeC := C.uint(size)
var ret C.int
if isCoset {
ret = C.interpolate_scalars_on_coset_cuda_bn254(d_out, scalarsC, twiddlesC, sizeC, cosetPowersC, 0, 0)
} else {
ret = C.interpolate_scalars_cuda_bn254(d_out, scalarsC, twiddlesC, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
}
return unsafe.Pointer(d_out)
}
func Evaluate(scalars_out, scalars, twiddles, coset_powers unsafe.Pointer, scalars_size, twiddles_size int, isCoset bool) int {
scalars_outC := (*C.BN254_scalar_t)(scalars_out)
scalarsC := (*C.BN254_scalar_t)(scalars)
twiddlesC := (*C.BN254_scalar_t)(twiddles)
coset_powersC := (*C.BN254_scalar_t)(coset_powers)
sizeC := C.uint(scalars_size)
twiddlesC_size := C.uint(twiddles_size)
var ret C.int
if isCoset {
ret = C.evaluate_scalars_on_coset_cuda_bn254(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, coset_powersC, 0, 0)
} else {
ret = C.evaluate_scalars_cuda_bn254(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
return -1
}
return 0
}
func VecScalarAdd(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BN254_scalar_t)(in1_d)
in2_dC := (*C.BN254_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.add_scalars_cuda_bn254(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error adding scalar vectors")
return -1
}
return 0
}
func VecScalarSub(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.BN254_scalar_t)(in1_d)
in2_dC := (*C.BN254_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.sub_scalars_cuda_bn254(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error subtracting scalar vectors")
return -1
}
return 0
}
func ToMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BN254_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.to_montgomery_scalars_cuda_bn254(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func FromMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.BN254_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.from_montgomery_scalars_cuda_bn254(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BN254_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_cuda_bn254(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func G2AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.BN254_g2_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_g2_cuda_bn254(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}

View File

@@ -0,0 +1,148 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
import (
"fmt"
"github.com/stretchr/testify/assert"
"reflect"
"testing"
)
func TestNttBN254Batch(t *testing.T) {
count := 1 << 20
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
NttBatch(&nttResult, false, count, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBN254CompareToGnarkDIF(t *testing.T) {
count := 1 << 2
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, DIF, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestINttBN254CompareToGnarkDIT(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, true, DIT, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNttBN254(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, NONE, 0)
assert.NotEqual(t, nttResult, scalars)
inttResult := make([]G1ScalarField, len(nttResult))
copy(inttResult, nttResult)
assert.Equal(t, inttResult, nttResult)
Ntt(&inttResult, true, NONE, 0)
assert.Equal(t, inttResult, scalars)
}
func TestNttBatchBN254(t *testing.T) {
count := 1 << 5
batches := 4
scalars := GenerateScalars(count*batches, false)
var scalarVecOfVec [][]G1ScalarField = make([][]G1ScalarField, 0)
for i := 0; i < batches; i++ {
start := i * count
end := (i + 1) * count
batch := make([]G1ScalarField, len(scalars[start:end]))
copy(batch, scalars[start:end])
scalarVecOfVec = append(scalarVecOfVec, batch)
}
nttBatchResult := make([]G1ScalarField, len(scalars))
copy(nttBatchResult, scalars)
NttBatch(&nttBatchResult, false, count, 0)
var nttResultVecOfVec [][]G1ScalarField
for i := 0; i < batches; i++ {
// Clone the slice
clone := make([]G1ScalarField, len(scalarVecOfVec[i]))
copy(clone, scalarVecOfVec[i])
// Add it to the result vector of vectors
nttResultVecOfVec = append(nttResultVecOfVec, clone)
// Call the ntt_bn254 function
Ntt(&nttResultVecOfVec[i], false, NONE, 0)
}
assert.NotEqual(t, nttBatchResult, scalars)
// Check that the ntt of each vec of scalars is equal to the intt of the specific batch
for i := 0; i < batches; i++ {
if !reflect.DeepEqual(nttResultVecOfVec[i], nttBatchResult[i*count:((i+1)*count)]) {
t.Errorf("ntt of vec of scalars not equal to intt of specific batch")
}
}
}
func BenchmarkNTT(b *testing.B) {
LOG_NTT_SIZES := []int{12, 15, 20, 21, 22, 23, 24, 25, 26}
for _, logNTTSize := range LOG_NTT_SIZES {
nttSize := 1 << logNTTSize
b.Run(fmt.Sprintf("NTT %d", logNTTSize), func(b *testing.B) {
scalars := GenerateScalars(nttSize, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
for n := 0; n < b.N; n++ {
Ntt(&nttResult, false, NONE, 0)
}
})
}
}

View File

@@ -0,0 +1,48 @@
package bn254
import (
"encoding/binary"
"fmt"
"log"
"regexp"
"runtime"
"time"
)
// Function to convert [8]uint32 to [4]uint64
func ConvertUint32ArrToUint64Arr(arr32 [8]uint32) [4]uint64 {
var arr64 [4]uint64
for i := 0; i < len(arr32); i += 2 {
arr64[i/2] = (uint64(arr32[i]) << 32) | uint64(arr32[i+1])
}
return arr64
}
func ConvertUint64ArrToUint32Arr(arr64 [4]uint64) [8]uint32 {
var arr32 [8]uint32
for i, v := range arr64 {
b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, v)
arr32[i*2] = binary.LittleEndian.Uint32(b[0:4])
arr32[i*2+1] = binary.LittleEndian.Uint32(b[4:8])
}
return arr32
}
func TimeTrack(start time.Time) {
elapsed := time.Since(start)
// Skip this function, and fetch the PC and file for its parent.
pc, _, _, _ := runtime.Caller(1)
// Retrieve a function object this functions parent.
funcObj := runtime.FuncForPC(pc)
// Regex to extract just the function name (and not the module path).
runtimeFunc := regexp.MustCompile(`^.*\.(.*)$`)
name := runtimeFunc.ReplaceAllString(funcObj.Name(), "$1")
log.Println(fmt.Sprintf("%s took %s", name, elapsed))
}

View File

@@ -0,0 +1,81 @@
package bn254
import (
"testing"
)
func TestConvertUint32ArrToUint64Arr(t *testing.T) {
testCases := []struct {
name string
input [8]uint32
want [4]uint64
}{
{
name: "Test with incremental array",
input: [8]uint32{1, 2, 3, 4, 5, 6, 7, 8},
want: [4]uint64{4294967298, 12884901892, 21474836486, 30064771080},
},
{
name: "Test with all zeros",
input: [8]uint32{0, 0, 0, 0, 0, 0, 0, 0},
want: [4]uint64{0, 0, 0, 0},
},
{
name: "Test with maximum uint32 values",
input: [8]uint32{4294967295, 4294967295, 4294967295, 4294967295, 4294967295, 4294967295, 4294967295, 4294967295},
want: [4]uint64{18446744073709551615, 18446744073709551615, 18446744073709551615, 18446744073709551615},
},
{
name: "Test with alternating min and max uint32 values",
input: [8]uint32{0, 4294967295, 0, 4294967295, 0, 4294967295, 0, 4294967295},
want: [4]uint64{4294967295, 4294967295, 4294967295, 4294967295},
},
{
name: "Test with alternating max and min uint32 values",
input: [8]uint32{4294967295, 0, 4294967295, 0, 4294967295, 0, 4294967295, 0},
want: [4]uint64{18446744069414584320, 18446744069414584320, 18446744069414584320, 18446744069414584320},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
got := ConvertUint32ArrToUint64Arr(tc.input)
if got != tc.want {
t.Errorf("got %v, want %v", got, tc.want)
}
})
}
}
func TestConvertUint64ArrToUint32Arr(t *testing.T) {
testCases := []struct {
name string
input [4]uint64
expected [8]uint32
}{
{
name: "test one",
input: [4]uint64{1, 2, 3, 4},
expected: [8]uint32{1, 0, 2, 0, 3, 0, 4, 0},
},
{
name: "test two",
input: [4]uint64{100, 200, 300, 400},
expected: [8]uint32{100, 0, 200, 0, 300, 0, 400, 0},
},
{
name: "test three",
input: [4]uint64{1000, 2000, 3000, 4000},
expected: [8]uint32{1000, 0, 2000, 0, 3000, 0, 4000, 0},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
got := ConvertUint64ArrToUint32Arr(tc.input)
if got != tc.expected {
t.Errorf("got %v, want %v", got, tc.expected)
}
})
}
}

View File

@@ -0,0 +1,41 @@
// Copyright 2023 Ingonyama
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by Ingonyama DO NOT EDIT
package bn254
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ -lbn254
// #include "ve_mod_mult.h"
import "C"
import (
"fmt"
"unsafe"
)
func VecScalarMulMod(scalarVec1, scalarVec2 unsafe.Pointer, size int) int {
scalarVec1C := (*C.BN254_scalar_t)(scalarVec1)
scalarVec2C := (*C.BN254_scalar_t)(scalarVec2)
sizeC := C.size_t(size)
ret := C.vec_mod_mult_device_scalar_bn254(scalarVec1C, scalarVec2C, sizeC, 0)
if ret != 0 {
fmt.Print("error multiplying scalar vectors")
return -1
}
return 0
}

49
goicicle/goicicle.go Normal file
View File

@@ -0,0 +1,49 @@
package goicicle
// This file implements CUDA driver context management
// #cgo CFLAGS: -I /usr/loca/cuda/include
// #cgo LDFLAGS: -L/usr/local/cuda/lib64 -lcudart
/*
#include <cuda.h>
#include <cuda_runtime.h>
*/
import "C"
import (
"errors"
"unsafe"
)
func CudaMalloc(size int) (dp unsafe.Pointer, err error) {
var p C.void
dp = unsafe.Pointer(&p)
if err := C.cudaMalloc(&dp, C.size_t(size)); err != 0 {
return nil, errors.New("could not create memory space")
}
return dp, nil
}
func CudaFree(dp unsafe.Pointer) int {
if err := C.cudaFree(dp); err != 0 {
return -1
}
return 0
}
func CudaMemCpyHtoD[T any](dst_d unsafe.Pointer, src []T, size int) int {
src_c := unsafe.Pointer(&src[0])
if err := C.cudaMemcpy(dst_d, src_c, C.size_t(size), 1); err != 0 {
return -1
}
return 0
}
func CudaMemCpyDtoH[T any](dst []T, src_d unsafe.Pointer, size int) int {
dst_c := unsafe.Pointer(&dst[0])
if err := C.cudaMemcpy(dst_c, src_d, C.size_t(size), 2); err != 0 {
return -1
}
return 0
}

41
goicicle/setup.sh Executable file
View File

@@ -0,0 +1,41 @@
#!/bin/bash
SUDO=''
if [ "$EUID" != 0 ]; then
echo "Icicle setup script should be run with root privileges, please run this as root"
SUDO='sudo'
fi
TARGET_BN254="libbn254.so"
TARGET_BLS12_381="libbls12_381.so"
TARGET_BLS12_377="libbls12_377.so"
MAKE_FAIL=0
$SUDO make $1 || MAKE_FAIL=1
if [ $MAKE_FAIL != 0 ]; then
echo "make failed, install dependencies and re-run setup script with root privileges"
exit
fi
TARGET_BN254_PATH=$(dirname "$(find `pwd` -name $TARGET_BN254 -print -quit)")/
TARGET_BLS12_381_PATH=$(dirname "$(find `pwd` -name $TARGET_BLS12_381 -print -quit)")/
TARGET_BLS12_377_PATH=$(dirname "$(find `pwd` -name $TARGET_BLS12_377 -print -quit)")/
if [[ "$TARGET_BLS12_377_PATH" != "" ]]; then
echo "BLS12_377 found @ $TARGET_BLS12_377_PATH"
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$TARGET_BLS12_377_PATH
fi
if [[ "$TARGET_BN254_PATH" != "" ]]; then
echo "BN254 found @ $TARGET_BN254_PATH"
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$TARGET_BN254_PATH
fi
if [[ "$TARGET_BLS12_381_PATH" != "" ]]; then
echo "BLS12_381_PATH found @ $TARGET_BLS12_381_PATH"
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:$TARGET_BLS12_381_PATH
fi

View File

@@ -0,0 +1,42 @@
package config
// {{.SharedLib}}
type Curve struct {
PackageName string
CurveNameUpperCase string
CurveNameLowerCase string
SharedLib string
ScalarSize int
BaseSize int
G2ElementSize int
}
var BN_254 = Curve{
PackageName: "bn254",
CurveNameUpperCase: "BN254",
CurveNameLowerCase: "bn254",
SharedLib: "-lbn254",
ScalarSize: 8,
BaseSize: 8,
G2ElementSize: 4,
}
var BLS_12_377 = Curve{
PackageName: "bls12377",
CurveNameUpperCase: "BLS12_377",
CurveNameLowerCase: "bls12_377",
SharedLib: "-lbls12_377",
ScalarSize: 8,
BaseSize: 12,
G2ElementSize: 6,
}
var BLS_12_381 = Curve{
PackageName: "bls12381",
CurveNameUpperCase: "BLS12_381",
CurveNameLowerCase: "bls12_381",
SharedLib: "-lbls12_381",
ScalarSize: 8,
BaseSize: 12,
G2ElementSize: 6,
}

View File

@@ -0,0 +1,306 @@
import (
"unsafe"
"encoding/binary"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ {{.SharedLib}}
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
const SCALAR_SIZE = {{.ScalarSize}}
const BASE_SIZE = {{.BaseSize}}
type G1ScalarField struct {
S [SCALAR_SIZE]uint32
}
type G1BaseField struct {
S [BASE_SIZE]uint32
}
/*
* BaseField Constrctors
*/
func (f *G1BaseField) SetZero() *G1BaseField {
var S [BASE_SIZE]uint32
f.S = S
return f
}
func (f *G1BaseField) SetOne() *G1BaseField {
var S [BASE_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (p *G1ProjectivePoint) FromAffine(affine *G1PointAffine) *G1ProjectivePoint {
out := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(p))
in := (*C.{{.CurveNameUpperCase}}_affine_t)(unsafe.Pointer(affine))
C.projective_from_affine_{{.CurveNameLowerCase}}(out, in)
return p
}
func (f *G1BaseField) FromLimbs(limbs [BASE_SIZE]uint32) *G1BaseField {
copy(f.S[:], limbs[:])
return f
}
/*
* BaseField methods
*/
func (f *G1BaseField) Limbs() [BASE_SIZE]uint32 {
return f.S
}
func (f *G1BaseField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* ScalarField methods
*/
func (p *G1ScalarField) Random() *G1ScalarField {
outC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(p))
C.random_scalar_{{.CurveNameLowerCase}}(outC)
return p
}
func (f *G1ScalarField) SetZero() *G1ScalarField {
var S [SCALAR_SIZE]uint32
f.S = S
return f
}
func (f *G1ScalarField) SetOne() *G1ScalarField {
var S [SCALAR_SIZE]uint32
S[0] = 1
f.S = S
return f
}
func (a *G1ScalarField) Eq(b *G1ScalarField) bool {
for i, v := range a.S {
if b.S[i] != v {
return false
}
}
return true
}
/*
* ScalarField methods
*/
func (f *G1ScalarField) Limbs() [SCALAR_SIZE]uint32 {
return f.S
}
func (f *G1ScalarField) ToBytesLe() []byte {
bytes := make([]byte, len(f.S)*4)
for i, v := range f.S {
binary.LittleEndian.PutUint32(bytes[i*4:], v)
}
return bytes
}
/*
* Point{{.CurveNameUpperCase}}
*/
type G1ProjectivePoint struct {
X, Y, Z G1BaseField
}
func (f *G1ProjectivePoint) SetZero() *G1ProjectivePoint {
var yOne G1BaseField
yOne.SetOne()
var xZero G1BaseField
xZero.SetZero()
var zZero G1BaseField
zZero.SetZero()
f.X = xZero
f.Y = yOne
f.Z = zZero
return f
}
func (p *G1ProjectivePoint) Eq(pCompare *G1ProjectivePoint) bool {
// Cast *Point{{.CurveNameUpperCase}} to *C.{{.CurveNameUpperCase}}_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It'S your responsibility to ensure that the types are compatible.
pC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it'S fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_{{.CurveNameLowerCase}}(pC, pCompareC))
}
func (p *G1ProjectivePoint) IsOnCurve() bool {
point := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(p))
res := C.projective_is_on_curve_{{.CurveNameLowerCase}}(point)
return bool(res)
}
func (p *G1ProjectivePoint) Random() *G1ProjectivePoint {
outC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(p))
C.random_projective_{{.CurveNameLowerCase}}(outC)
return p
}
func (p *G1ProjectivePoint) StripZ() *G1PointAffine {
return &G1PointAffine{
X: p.X,
Y: p.Y,
}
}
func (p *G1ProjectivePoint) FromLimbs(x, y, z *[]uint32) *G1ProjectivePoint {
var _x G1BaseField
var _y G1BaseField
var _z G1BaseField
_x.FromLimbs(GetFixedLimbs(x))
_y.FromLimbs(GetFixedLimbs(y))
_z.FromLimbs(GetFixedLimbs(z))
p.X = _x
p.Y = _y
p.Z = _z
return p
}
/*
* PointAffineNoInfinity{{.CurveNameUpperCase}}
*/
type G1PointAffine struct {
X, Y G1BaseField
}
func (p *G1PointAffine) FromProjective(projective *G1ProjectivePoint) *G1PointAffine {
in := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(projective))
out := (*C.{{.CurveNameUpperCase}}_affine_t)(unsafe.Pointer(p))
C.projective_to_affine_{{.CurveNameLowerCase}}(out,in)
return p
}
func (p *G1PointAffine) ToProjective() *G1ProjectivePoint {
var Z G1BaseField
Z.SetOne()
return &G1ProjectivePoint{
X: p.X,
Y: p.Y,
Z: Z,
}
}
func (p *G1PointAffine) FromLimbs(X, Y *[]uint32) *G1PointAffine {
var _x G1BaseField
var _y G1BaseField
_x.FromLimbs(GetFixedLimbs(X))
_y.FromLimbs(GetFixedLimbs(Y))
return p
}
/*
* Multiplication
*/
func MultiplyVec(a []G1ProjectivePoint, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
pointsC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(&a[0]))
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_point_{{.CurveNameLowerCase}}(pointsC, scalarsC, nElementsC, deviceIdC)
}
func MultiplyScalar(a []G1ScalarField, b []G1ScalarField, deviceID int) {
if len(a) != len(b) {
panic("a and b have different lengths")
}
aC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&b[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.vec_mod_mult_scalar_{{.CurveNameLowerCase}}(aC, bC, nElementsC, deviceIdC)
}
// Multiply a matrix by a scalar:
//
// `a` - flattenned matrix;
// `b` - vector to multiply `a` by;
func MultiplyMatrix(a []G1ScalarField, b []G1ScalarField, deviceID int) {
c := make([]G1ScalarField, len(b))
for i := range c {
var p G1ScalarField
p.SetZero()
c[i] = p
}
aC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&a[0]))
bC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&b[0]))
cC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&c[0]))
deviceIdC := C.size_t(deviceID)
nElementsC := C.size_t(len(a))
C.matrix_vec_mod_mult_{{.CurveNameLowerCase}}(aC, bC, cC, nElementsC, deviceIdC)
}
/*
* Utils
*/
func GetFixedLimbs(slice *[]uint32) [BASE_SIZE]uint32 {
if len(*slice) <= BASE_SIZE {
limbs := [BASE_SIZE]uint32{}
copy(limbs[:len(*slice)], *slice)
return limbs
}
panic("slice has too many elements")
}

View File

@@ -0,0 +1,180 @@
import (
"encoding/binary"
"testing"
"github.com/stretchr/testify/assert"
)
func TestNewField{{.CurveNameUpperCase}}One(t *testing.T) {
var oneField G1BaseField
oneField.SetOne()
rawOneField := [8]uint32([8]uint32{0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, oneField.S, rawOneField)
}
func TestNewField{{.CurveNameUpperCase}}Zero(t *testing.T) {
var zeroField G1BaseField
zeroField.SetZero()
rawZeroField := [8]uint32([8]uint32{0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0})
assert.Equal(t, zeroField.S, rawZeroField)
}
func TestField{{.CurveNameUpperCase}}ToBytesLe(t *testing.T) {
var p G1ProjectivePoint
p.Random()
expected := make([]byte, len(p.X.S)*4) // each uint32 takes 4 bytes
for i, v := range p.X.S {
binary.LittleEndian.PutUint32(expected[i*4:], v)
}
assert.Equal(t, p.X.ToBytesLe(), expected)
assert.Equal(t, len(p.X.ToBytesLe()), 32)
}
func TestNewPoint{{.CurveNameUpperCase}}Zero(t *testing.T) {
var pointZero G1ProjectivePoint
pointZero.SetZero()
var baseOne G1BaseField
baseOne.SetOne()
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, pointZero.X, zeroSanity)
assert.Equal(t, pointZero.Y, baseOne)
assert.Equal(t, pointZero.Z, zeroSanity)
}
func TestFromProjectiveToAffine(t *testing.T) {
var projective G1ProjectivePoint
var affine G1PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G1ProjectivePoint
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func Test{{.CurveNameUpperCase}}Eq(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
var p2 G1ProjectivePoint
p2.Random()
assert.Equal(t, p1.Eq(&p1), true)
assert.Equal(t, p1.Eq(&p2), false)
}
func Test{{.CurveNameUpperCase}}StripZ(t *testing.T) {
var p1 G1ProjectivePoint
p1.Random()
p2ZLess := p1.StripZ()
assert.IsType(t, G1PointAffine{}, *p2ZLess)
assert.Equal(t, p1.X, p2ZLess.X)
assert.Equal(t, p1.Y, p2ZLess.Y)
}
func TestPoint{{.CurveNameUpperCase}}fromLimbs(t *testing.T) {
var p G1ProjectivePoint
p.Random()
x := p.X.Limbs()
y := p.Y.Limbs()
z := p.Z.Limbs()
xSlice := x[:]
ySlice := y[:]
zSlice := z[:]
var pFromLimbs G1ProjectivePoint
pFromLimbs.FromLimbs(&xSlice, &ySlice, &zSlice)
assert.Equal(t, pFromLimbs, p)
}
func TestNewPointAffineNoInfinity{{.CurveNameUpperCase}}Zero(t *testing.T) {
var zeroP G1PointAffine
var zeroSanity G1BaseField
zeroSanity.SetZero()
assert.Equal(t, zeroP.X, zeroSanity)
assert.Equal(t, zeroP.Y, zeroSanity)
}
func TestPointAffineNoInfinity{{.CurveNameUpperCase}}FromLimbs(t *testing.T) {
// Initialize your test values
x := [8]uint32{1, 2, 3, 4, 5, 6, 7, 8}
y := [8]uint32{9, 10, 11, 12, 13, 14, 15, 16}
xSlice := x[:]
ySlice := y[:]
// Execute your function
var result G1PointAffine
result.FromLimbs(&xSlice, &ySlice)
var xBase G1BaseField
var yBase G1BaseField
xBase.FromLimbs(x)
yBase.FromLimbs(y)
// Define your expected result
expected := &G1PointAffine{
X: xBase,
Y: yBase,
}
// Test if result is as expected
assert.Equal(t, result, expected)
}
func TestGetFixedLimbs(t *testing.T) {
t.Run("case of valid input of length less than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of valid input of length 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8}
expected := [8]uint32{1, 2, 3, 4, 5, 6, 7, 8}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of empty input", func(t *testing.T) {
slice := []uint32{}
expected := [8]uint32{0, 0, 0, 0, 0, 0, 0, 0}
result := GetFixedLimbs(&slice)
assert.Equal(t, result, expected)
})
t.Run("case of input length greater than 8", func(t *testing.T) {
slice := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9}
defer func() {
if r := recover(); r == nil {
t.Errorf("the code did not panic")
}
}()
GetFixedLimbs(&slice)
})
}

View File

@@ -0,0 +1,95 @@
import (
"encoding/binary"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ {{.SharedLib}}
// #include "projective.h"
// #include "ve_mod_mult.h"
import "C"
// G2 extension field
type G2Element [{{.G2ElementSize}}]uint64
type ExtentionField struct {
A0, A1 G2Element
}
type G2PointAffine struct {
X, Y ExtentionField
}
type G2Point struct {
X, Y, Z ExtentionField
}
func (p *G2Point) Random() *G2Point {
outC := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(p))
C.random_g2_projective_{{.CurveNameLowerCase}}(outC)
return p
}
func (p *G2Point) FromAffine(affine *G2PointAffine) *G2Point {
out := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(p))
in := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(unsafe.Pointer(affine))
C.g2_projective_from_affine_{{.CurveNameLowerCase}}(out, in)
return p
}
func (p *G2Point) Eq(pCompare *G2Point) bool {
// Cast *Point{{.CurveNameUpperCase}} to *C.{{.CurveNameUpperCase}}_projective_t
// The unsafe.Pointer cast is necessary because Go doesn't allow direct casts
// between different pointer types.
// It's your responsibility to ensure that the types are compatible.
pC := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(p))
pCompareC := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(pCompare))
// Call the C function
// The C function doesn't keep any references to the data,
// so it's fine if the Go garbage collector moves or deletes the data later.
return bool(C.eq_g2_{{.CurveNameLowerCase}}(pC, pCompareC))
}
func (f *G2Element) ToBytesLe() []byte {
var bytes []byte
for _, val := range f {
buf := make([]byte, 8) // 8 bytes because uint64 is 64-bit
binary.LittleEndian.PutUint64(buf, val)
bytes = append(bytes, buf...)
}
return bytes
}
func (p *G2PointAffine) ToProjective() G2Point {
return G2Point{
X: p.X,
Y: p.Y,
Z: ExtentionField{
A0: G2Element{1, 0, 0, 0},
A1: G2Element{0, 0, 0, 0},
},
}
}
func (p *G2PointAffine) FromProjective(projective *G2Point) *G2PointAffine {
out := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(unsafe.Pointer(p))
in := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(projective))
C.g2_projective_to_affine_{{.CurveNameLowerCase}}(out, in)
return p
}
func (p *G2Point) IsOnCurve() bool {
// Directly copy memory from the C struct to the Go struct
point := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(p))
res := C.g2_projective_is_on_curve_{{.CurveNameLowerCase}}(point)
return bool(res)
}

View File

@@ -0,0 +1,58 @@
import (
"github.com/stretchr/testify/assert"
"testing"
)
func TestG2Eqg2(t *testing.T) {
var point G2Point
point.Random()
assert.True(t, point.Eq(&point))
}
func TestG2FromProjectiveToAffine(t *testing.T) {
var projective G2Point
var affine G2PointAffine
projective.Random()
affine.FromProjective(&projective)
var projective2 G2Point
projective2.FromAffine(&affine)
assert.True(t, projective.IsOnCurve())
assert.True(t, projective2.IsOnCurve())
assert.True(t, projective.Eq(&projective2))
}
func TestG2Eqg2NotEqual(t *testing.T) {
var point G2Point
point.Random()
var point2 G2Point
point2.Random()
assert.False(t, point.Eq(&point2))
}
func TestG2ToBytes(t *testing.T) {
element := G2Element{0x6546098ea84b6298, 0x4a384533d1f68aca, 0xaa0666972d771336, 0x1569e4a34321993}
bytes := element.ToBytesLe()
assert.Equal(t, bytes, []byte{0x98, 0x62, 0x4b, 0xa8, 0x8e, 0x9, 0x46, 0x65, 0xca, 0x8a, 0xf6, 0xd1, 0x33, 0x45, 0x38, 0x4a, 0x36, 0x13, 0x77, 0x2d, 0x97, 0x66, 0x6, 0xaa, 0x93, 0x19, 0x32, 0x34, 0x4a, 0x9e, 0x56, 0x1})
}
func TestG2ShouldConvertToProjective(t *testing.T) {
var pointProjective G2Point
var pointAffine G2PointAffine
pointProjective.Random()
pointAffine.FromProjective(&pointProjective)
proj := pointAffine.ToProjective()
assert.True(t, proj.IsOnCurve())
assert.True(t, pointProjective.Eq(&proj))
}

View File

@@ -0,0 +1,84 @@
#include <cuda.h>
#include <cuda_runtime.h>
#include <stdbool.h>
// msm.h
#ifndef _{{.CurveNameUpperCase}}_MSM_H
#define _{{.CurveNameUpperCase}}_MSM_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of {{.CurveNameUpperCase}} projective and affine structs
typedef struct {{.CurveNameUpperCase}}_projective_t {{.CurveNameUpperCase}}_projective_t;
typedef struct {{.CurveNameUpperCase}}_g2_projective_t {{.CurveNameUpperCase}}_g2_projective_t;
typedef struct {{.CurveNameUpperCase}}_affine_t {{.CurveNameUpperCase}}_affine_t;
typedef struct {{.CurveNameUpperCase}}_g2_affine_t {{.CurveNameUpperCase}}_g2_affine_t;
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
typedef cudaStream_t CudaStream_t;
int msm_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* out, {{.CurveNameUpperCase}}_affine_t* points, {{.CurveNameUpperCase}}_scalar_t* scalars, size_t count, size_t device_id);
int msm_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* out,
{{.CurveNameUpperCase}}_affine_t* points,
{{.CurveNameUpperCase}}_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
{{.CurveNameUpperCase}}_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
{{.CurveNameUpperCase}}_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id);
int msm_g2_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_g2_projective_t* out,
{{.CurveNameUpperCase}}_g2_affine_t* points,
{{.CurveNameUpperCase}}_scalar_t* scalars,
size_t count,
size_t device_id);
int msm_batch_g2_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_g2_projective_t* out,
{{.CurveNameUpperCase}}_g2_affine_t* points,
{{.CurveNameUpperCase}}_scalar_t* scalars,
size_t batch_size,
size_t msm_size,
size_t device_id);
int commit_g2_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_g2_projective_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
{{.CurveNameUpperCase}}_g2_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id);
int commit_batch_g2_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_g2_projective_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
{{.CurveNameUpperCase}}_g2_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id,
cudaStream_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _{{.CurveNameUpperCase}}_MSM_H */

View File

@@ -0,0 +1,181 @@
#include <cuda.h>
#include <stdbool.h>
// ntt.h
#ifndef _{{.CurveNameUpperCase}}_NTT_H
#define _{{.CurveNameUpperCase}}_NTT_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of {{.CurveNameUpperCase}} projective and affine structs
typedef struct {{.CurveNameUpperCase}}_projective_t {{.CurveNameUpperCase}}_projective_t;
typedef struct {{.CurveNameUpperCase}}_affine_t {{.CurveNameUpperCase}}_affine_t;
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
typedef struct {{.CurveNameUpperCase}}_g2_projective_t {{.CurveNameUpperCase}}_g2_projective_t;
typedef struct {{.CurveNameUpperCase}}_g2_affine_t {{.CurveNameUpperCase}}_g2_affine_t;
int ntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* arr, uint32_t n, bool inverse, size_t device_id);
int ntt_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
int ecntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
int ecntt_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
{{.CurveNameUpperCase}}_scalar_t*
build_domain_cuda_{{.CurveNameLowerCase}}(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
int interpolate_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned device_id,
size_t stream);
int interpolate_scalars_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_points_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
size_t device_id,
size_t stream);
int interpolate_points_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int interpolate_scalars_batch_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id,
size_t stream);
int evaluate_scalars_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_points_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id,
size_t stream);
int evaluate_points_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
unsigned device_id,
size_t stream);
int evaluate_scalars_on_coset_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int reverse_order_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_scalars_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int reverse_order_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_points_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int add_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
int sub_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
int to_montgomery_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_inout, unsigned n, size_t stream);
int from_montgomery_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_inout, unsigned n, size_t stream);
// points g1
int to_montgomery_proj_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t* d_inout, unsigned n, size_t stream);
// points g2
int to_montgomery_proj_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_affine_t* d_inout, unsigned n, size_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _{{.CurveNameUpperCase}}_NTT_H */

View File

@@ -0,0 +1,33 @@
#include <cuda.h>
#include <stdbool.h>
// projective.h
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {{.CurveNameUpperCase}}_projective_t {{.CurveNameUpperCase}}_projective_t;
typedef struct {{.CurveNameUpperCase}}_g2_projective_t {{.CurveNameUpperCase}}_g2_projective_t;
typedef struct {{.CurveNameUpperCase}}_affine_t {{.CurveNameUpperCase}}_affine_t;
typedef struct {{.CurveNameUpperCase}}_g2_affine_t {{.CurveNameUpperCase}}_g2_affine_t;
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
bool projective_is_on_curve_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* point1);
int random_scalar_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* out);
int random_projective_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* out);
{{.CurveNameUpperCase}}_projective_t* projective_zero_{{.CurveNameLowerCase}}();
int projective_to_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t* out, {{.CurveNameUpperCase}}_projective_t* point1);
int projective_from_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* out, {{.CurveNameUpperCase}}_affine_t* point1);
int random_g2_projective_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* out);
int g2_projective_to_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_affine_t* out, {{.CurveNameUpperCase}}_g2_projective_t* point1);
int g2_projective_from_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* out, {{.CurveNameUpperCase}}_g2_affine_t* point1);
bool g2_projective_is_on_curve_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* point1);
bool eq_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* point1, {{.CurveNameUpperCase}}_projective_t* point2);
bool eq_g2_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* point1, {{.CurveNameUpperCase}}_g2_projective_t* point2);
#ifdef __cplusplus
}
#endif

View File

@@ -0,0 +1,32 @@
#include <stdbool.h>
#include <cuda.h>
// ve_mod_mult.h
#ifndef _{{.CurveNameUpperCase}}_VEC_MULT_H
#define _{{.CurveNameUpperCase}}_VEC_MULT_H
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {{.CurveNameUpperCase}}_projective_t {{.CurveNameUpperCase}}_projective_t;
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
int32_t vec_mod_mult_point_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* inout, {{.CurveNameUpperCase}}_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_scalar_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* inout, {{.CurveNameUpperCase}}_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
int32_t vec_mod_mult_device_scalar_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* inout, {{.CurveNameUpperCase}}_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
int32_t matrix_vec_mod_mult_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* matrix_flattened,
{{.CurveNameUpperCase}}_scalar_t* input,
{{.CurveNameUpperCase}}_scalar_t* output,
size_t n_elments,
size_t device_id);
#ifdef __cplusplus
}
#endif
#endif /* _{{.CurveNameUpperCase}}_VEC_MULT_H */

246
goicicle/templates/main.go Normal file
View File

@@ -0,0 +1,246 @@
package main
import (
"fmt"
"os"
"path/filepath"
"github.com/consensys/bavard"
config "github.com/ingonyama-zk/icicle/goicicle/templates/curves"
)
const (
copyrightHolder = "Ingonyama"
generatedBy = "Ingonyama"
copyrightYear = 2023
baseDir = "../curves/"
)
var bgen = bavard.NewBatchGenerator(copyrightHolder, copyrightYear, generatedBy)
func genMainFiles() {
bn254_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "g1.go"), Templates: []string{"g1.go.tmpl"}},
}
bls12377_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "g1.go"), Templates: []string{"g1.go.tmpl"}},
}
bls12381_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "g1.go"), Templates: []string{"g1.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_entries...))
bn254_g2_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "g2.go"), Templates: []string{"g2.go.tmpl"}},
}
bls12377_g2_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "g2.go"), Templates: []string{"g2.go.tmpl"}},
}
bls12381_g2_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "g2.go"), Templates: []string{"g2.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_g2_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_g2_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_g2_entries...))
bn254_msm_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "msm.go"), Templates: []string{"msm.go.tmpl"}},
}
bls12377_msm_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "msm.go"), Templates: []string{"msm.go.tmpl"}},
}
bls12381_msm_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "msm.go"), Templates: []string{"msm.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./msm/", bls12377_msm_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./msm/", bn254_msm_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./msm/", bls12381_msm_entries...))
bn254_ntt_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "ntt.go"), Templates: []string{"ntt.go.tmpl"}},
}
bls12377_ntt_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "ntt.go"), Templates: []string{"ntt.go.tmpl"}},
}
bls12381_ntt_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "ntt.go"), Templates: []string{"ntt.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./ntt/", bls12377_ntt_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./ntt/", bn254_ntt_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./ntt/", bls12381_ntt_entries...))
bn254_vec_mod_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "vec_mod.go"), Templates: []string{"vec_mod.go.tmpl"}},
}
bls12377_vec_mod_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "vec_mod.go"), Templates: []string{"vec_mod.go.tmpl"}},
}
bls12381_vec_mod_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "vec_mod.go"), Templates: []string{"vec_mod.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./ops/", bls12377_vec_mod_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./ops/", bn254_vec_mod_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./ops/", bls12381_vec_mod_entries...))
h_msm_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "msm.h"), Templates: []string{"msm.h.tmpl"}},
}
h_msm_bls12377 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "include", "msm.h"), Templates: []string{"msm.h.tmpl"}},
}
h_msm_bls12381 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "include", "msm.h"), Templates: []string{"msm.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", h_msm_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", h_msm_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", h_msm_bls12381...))
h_ntt_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "ntt.h"), Templates: []string{"ntt.h.tmpl"}},
}
h_ntt_bls12377 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "include", "ntt.h"), Templates: []string{"ntt.h.tmpl"}},
}
h_ntt_bls12381 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "include", "ntt.h"), Templates: []string{"ntt.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", h_ntt_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", h_ntt_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", h_ntt_bls12381...))
ve_mod_mult_h_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "ve_mod_mult.h"), Templates: []string{"ve_mod_mult.h.tmpl"}},
}
ve_mod_mult_h_bls12377 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "include", "ve_mod_mult.h"), Templates: []string{"ve_mod_mult.h.tmpl"}},
}
ve_mod_mult_ht_bls12381 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "include", "ve_mod_mult.h"), Templates: []string{"ve_mod_mult.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", ve_mod_mult_h_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", ve_mod_mult_h_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", ve_mod_mult_ht_bls12381...))
projective_bn254 := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "include", "projective.h"), Templates: []string{"projective.h.tmpl"}},
}
projective_bls12377 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "include", "projective.h"), Templates: []string{"projective.h.tmpl"}},
}
projective_bls12381 := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "include", "projective.h"), Templates: []string{"projective.h.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./hfiles/", projective_bls12377...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./hfiles/", projective_bn254...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./hfiles/", projective_bls12381...))
}
func genTestFiles() {
// G1 TESTS
bn254_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "g1_test.go"), Templates: []string{"g1_test.go.tmpl"}},
}
bls12377_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "g1_test.go"), Templates: []string{"g1_test.go.tmpl"}},
}
bls12381_entries := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "g1_test.go"), Templates: []string{"g1_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_entries...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_entries...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_entries...))
// G2 TESTS
bn254_entries_g2_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "g2_test.go"), Templates: []string{"g2_test.go.tmpl"}},
}
bls12377_entries_g2_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "g2_test.go"), Templates: []string{"g2_test.go.tmpl"}},
}
bls12381_entries_g2_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "g2_test.go"), Templates: []string{"g2_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./curves/", bls12377_entries_g2_test...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./curves/", bn254_entries_g2_test...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./curves/", bls12381_entries_g2_test...))
// MSM TEST
bn254_entries_msm_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "msm_test.go"), Templates: []string{"msm_test.go.tmpl"}},
}
bls12377_entries_msm_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "msm_test.go"), Templates: []string{"msm_test.go.tmpl"}},
}
bls12381_entries_msm_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "msm_test.go"), Templates: []string{"msm_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./msm/", bls12377_entries_msm_test...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./msm/", bn254_entries_msm_test...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./msm/", bls12381_entries_msm_test...))
// FFT TEST
bn254_entries_fft_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bn254", "ntt_test.go"), Templates: []string{"ntt_test.go.tmpl"}},
}
bls12377_entries_fft_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12377", "ntt_test.go"), Templates: []string{"ntt_test.go.tmpl"}},
}
bls12381_entries_fft_test := []bavard.Entry{
{File: filepath.Join(baseDir, "bls12381", "ntt_test.go"), Templates: []string{"ntt_test.go.tmpl"}},
}
assertNoError(bgen.Generate(config.BLS_12_377, config.BLS_12_377.PackageName, "./ntt/", bls12377_entries_fft_test...))
assertNoError(bgen.Generate(config.BN_254, config.BN_254.PackageName, "./ntt/", bn254_entries_fft_test...))
assertNoError(bgen.Generate(config.BLS_12_381, config.BLS_12_381.PackageName, "./ntt/", bls12381_entries_fft_test...))
}
func main() {
genMainFiles()
genTestFiles()
}
func assertNoError(err error) {
if err != nil {
fmt.Printf("\n%s\n", err.Error())
os.Exit(-1)
}
}

View File

@@ -0,0 +1,190 @@
import (
"errors"
"fmt"
"unsafe"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ {{.SharedLib}}
// #include "msm.h"
import "C"
func Msm(out *G1ProjectivePoint, points []G1PointAffine, scalars []G1ScalarField, device_id int) (*G1ProjectivePoint, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.{{.CurveNameUpperCase}}_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(out))
ret := C.msm_cuda_{{.CurveNameLowerCase}}(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_cuda_{{.CurveNameLowerCase}} returned error code: %d", ret)
}
return out, nil
}
func MsmG2(out *G2Point, points []G2PointAffine, scalars []G1ScalarField, device_id int) (*G2Point, error) {
if len(points) != len(scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
pointsC := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(unsafe.Pointer(&points[0]))
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&scalars[0]))
outC := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(out))
ret := C.msm_g2_cuda_{{.CurveNameLowerCase}}(outC, pointsC, scalarsC, C.size_t(len(points)), C.size_t(device_id))
if ret != 0 {
return nil, fmt.Errorf("msm_g2_cuda_{{.CurveNameLowerCase}} returned error code: %d", ret)
}
return out, nil
}
func MsmBatch(points *[]G1PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G1ProjectivePoint, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G1ProjectivePoint, batchSize)
for i := 0; i < len(out); i++ {
var p G1ProjectivePoint
p.SetZero()
out[i] = &p
}
outC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.{{.CurveNameUpperCase}}_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_cuda_{{.CurveNameLowerCase}}(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_{{.CurveNameLowerCase}} returned error code: %d", ret)
}
return out, nil
}
func MsmG2Batch(points *[]G2PointAffine, scalars *[]G1ScalarField, batchSize, deviceId int) ([]*G2Point, error) {
// Check for nil pointers
if points == nil || scalars == nil {
return nil, errors.New("points or scalars is nil")
}
if len(*points) != len(*scalars) {
return nil, errors.New("error on: len(points) != len(scalars)")
}
// Check for empty slices
if len(*points) == 0 || len(*scalars) == 0 {
return nil, errors.New("points or scalars is empty")
}
// Check for zero batchSize
if batchSize <= 0 {
return nil, errors.New("error on: batchSize must be greater than zero")
}
out := make([]*G2Point, batchSize)
outC := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(unsafe.Pointer(&out[0]))
pointsC := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(unsafe.Pointer(&(*points)[0]))
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
msmSizeC := C.size_t(len(*points) / batchSize)
deviceIdC := C.size_t(deviceId)
batchSizeC := C.size_t(batchSize)
ret := C.msm_batch_g2_cuda_{{.CurveNameLowerCase}}(outC, pointsC, scalarsC, batchSizeC, msmSizeC, deviceIdC)
if ret != 0 {
return nil, fmt.Errorf("msm_batch_cuda_{{.CurveNameLowerCase}} returned error code: %d", ret)
}
return out, nil
}
func Commit(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.{{.CurveNameUpperCase}}_projective_t)(d_out)
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(d_scalars)
pointsC := (*C.{{.CurveNameUpperCase}}_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_cuda_{{.CurveNameLowerCase}}(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2(d_out, d_scalars, d_points unsafe.Pointer, count, bucketFactor int) int {
d_outC := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(d_out)
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(d_scalars)
pointsC := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(d_points)
countC := (C.size_t)(count)
largeBucketFactorC := C.uint(bucketFactor)
ret := C.commit_g2_cuda_{{.CurveNameLowerCase}}(d_outC, scalarsC, pointsC, countC, largeBucketFactorC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitBatch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.{{.CurveNameUpperCase}}_projective_t)(d_out)
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(d_scalars)
pointsC := (*C.{{.CurveNameUpperCase}}_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.commit_batch_cuda_{{.CurveNameLowerCase}}(d_outC, scalarsC, pointsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}
func CommitG2Batch(d_out, d_scalars, d_points unsafe.Pointer, count, batch_size int) int {
d_outC := (*C.{{.CurveNameUpperCase}}_g2_projective_t)(d_out)
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(d_scalars)
pointsC := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(d_points)
countC := (C.size_t)(count)
batch_sizeC := (C.size_t)(batch_size)
ret := C.msm_batch_g2_cuda_{{.CurveNameLowerCase}}(d_outC, pointsC, scalarsC, countC, batch_sizeC, 0)
if ret != 0 {
return -1
}
return 0
}

View File

@@ -0,0 +1,337 @@
import (
"fmt"
"math"
"testing"
"time"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
"github.com/stretchr/testify/assert"
)
func GeneratePoints(count int) []G1PointAffine {
// Declare a slice of integers
var points []G1PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var pointProjective G1ProjectivePoint
pointProjective.Random()
var pointAffine G1PointAffine
pointAffine.FromProjective(&pointProjective)
points = append(points, pointAffine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func GeneratePointsProj(count int) []G1ProjectivePoint {
// Declare a slice of integers
var points []G1ProjectivePoint
// Use a loop to populate the slice
for i := 0; i < count; i++ {
var p G1ProjectivePoint
p.Random()
points = append(points, p)
}
return points
}
func GenerateScalars(count int, skewed bool) []G1ScalarField {
// Declare a slice of integers
var scalars []G1ScalarField
var rand G1ScalarField
var zero G1ScalarField
var one G1ScalarField
var randLarge G1ScalarField
zero.SetZero()
one.SetOne()
randLarge.Random()
if skewed && count > 1_200_000 {
for i := 0; i < count-1_200_000; i++ {
rand.Random()
scalars = append(scalars, rand)
}
for i := 0; i < 600_000; i++ {
scalars = append(scalars, randLarge)
}
for i := 0; i < 400_000; i++ {
scalars = append(scalars, zero)
}
for i := 0; i < 200_000; i++ {
scalars = append(scalars, one)
}
} else {
for i := 0; i < count; i++ {
rand.Random()
scalars = append(scalars, rand)
}
}
return scalars[:count]
}
func TestMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out := new(G1ProjectivePoint)
startTime := time.Now()
_, e := Msm(out, points, scalars, 0) // non mont
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func TestCommitMSM(t *testing.T) {
for _, v := range []int{24} {
count := 1<<v - 1
points := GeneratePoints(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := count * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := Commit(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G1ProjectivePoint, 1)
goicicle.CudaMemCpyDtoH[G1ProjectivePoint](outHost, out_d, 96)
assert.Equal(t, e, 0, "error should be 0")
assert.True(t, outHost[0].IsOnCurve())
}
}
func BenchmarkCommit(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
out_d, _ := goicicle.CudaMalloc(96)
pointsBytes := msmSize * 64
points_d, _ := goicicle.CudaMalloc(pointsBytes)
goicicle.CudaMemCpyHtoD[G1PointAffine](points_d, points, pointsBytes)
scalarBytes := msmSize * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
e := Commit(out_d, scalars_d, points_d, msmSize, 10)
if e != 0 {
panic("Error occured")
}
}
})
}
}
func TestBenchMSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GeneratePoints(count)
scalars := GenerateScalars(count, false)
a, e := MsmBatch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatch{{.CurveNameUpperCase}} returned an error: %v", e)
}
if len(a) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(a))
}
}
}
}
func BenchmarkMSM(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GeneratePoints(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G1ProjectivePoint)
_, e := Msm(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
// G2
func GenerateG2Points(count int) []G2PointAffine {
// Declare a slice of integers
var points []G2PointAffine
// populate the slice
for i := 0; i < 10; i++ {
var p G2Point
p.Random()
var affine G2PointAffine
affine.FromProjective(&p)
points = append(points, affine)
}
log2_10 := math.Log2(10)
log2Count := math.Log2(float64(count))
log2Size := int(math.Ceil(log2Count - log2_10))
for i := 0; i < log2Size; i++ {
points = append(points, points...)
}
return points[:count]
}
func TestMsmG2{{.CurveNameUpperCase}}(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, false)
fmt.Print("Finished generating scalars\n")
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
assert.Equal(t, e, nil, "error should be nil")
assert.True(t, out.IsOnCurve())
}
}
func BenchmarkMsmG2{{.CurveNameUpperCase}}(b *testing.B) {
LOG_MSM_SIZES := []int{20, 21, 22, 23, 24, 25, 26}
for _, logMsmSize := range LOG_MSM_SIZES {
msmSize := 1 << logMsmSize
points := GenerateG2Points(msmSize)
scalars := GenerateScalars(msmSize, false)
b.Run(fmt.Sprintf("MSM G2 %d", logMsmSize), func(b *testing.B) {
for n := 0; n < b.N; n++ {
out := new(G2Point)
_, e := MsmG2(out, points, scalars, 0)
if e != nil {
panic("Error occured")
}
}
})
}
}
func TestCommitG2MSM(t *testing.T) {
for _, v := range []int{24} {
count := 1 << v
points := GenerateG2Points(count)
fmt.Print("Finished generating points\n")
scalars := GenerateScalars(count, true)
fmt.Print("Finished generating scalars\n")
var sizeCheckG2PointAffine G2PointAffine
inputPointsBytes := count * int(unsafe.Sizeof(sizeCheckG2PointAffine))
var sizeCheckG2Point G2Point
out_d, _ := goicicle.CudaMalloc(int(unsafe.Sizeof(sizeCheckG2Point)))
points_d, _ := goicicle.CudaMalloc(inputPointsBytes)
goicicle.CudaMemCpyHtoD[G2PointAffine](points_d, points, inputPointsBytes)
scalarBytes := count * 32
scalars_d, _ := goicicle.CudaMalloc(scalarBytes)
goicicle.CudaMemCpyHtoD[G1ScalarField](scalars_d, scalars, scalarBytes)
startTime := time.Now()
e := CommitG2(out_d, scalars_d, points_d, count, 10)
fmt.Printf("icicle MSM took: %d ms\n", time.Since(startTime).Milliseconds())
outHost := make([]G2Point, 1)
goicicle.CudaMemCpyDtoH[G2Point](outHost, out_d, int(unsafe.Sizeof(sizeCheckG2Point)))
assert.Equal(t, e, 0, "error should be 0")
assert.Equal(t, len(outHost), 1)
result := outHost[0]
assert.True(t, result.IsOnCurve())
}
}
func TestBatchG2MSM(t *testing.T) {
for _, batchPow2 := range []int{2, 4} {
for _, pow2 := range []int{4, 6} {
msmSize := 1 << pow2
batchSize := 1 << batchPow2
count := msmSize * batchSize
points := GenerateG2Points(count)
scalars := GenerateScalars(count, false)
pointsResults, e := MsmG2Batch(&points, &scalars, batchSize, 0)
if e != nil {
t.Errorf("MsmBatch{{.CurveNameUpperCase}} returned an error: %v", e)
}
if len(pointsResults) != batchSize {
t.Errorf("Expected length %d, but got %d", batchSize, len(pointsResults))
}
for _, s := range pointsResults {
assert.True(t, s.IsOnCurve())
}
}
}
}

View File

@@ -0,0 +1,203 @@
import (
"errors"
"fmt"
"unsafe"
"github.com/ingonyama-zk/icicle/goicicle"
)
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ {{.SharedLib}}
// #include "ntt.h"
import "C"
const (
NONE = 0
DIF = 1
DIT = 2
)
func Ntt(scalars *[]G1ScalarField, isInverse bool, deviceId int) uint64 {
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
ret := C.ntt_cuda_{{.CurveNameLowerCase}}(scalarsC, C.uint32_t(len(*scalars)), C.bool(isInverse), C.size_t(deviceId))
return uint64(ret)
}
func NttBatch(scalars *[]G1ScalarField, isInverse bool, batchSize, deviceId int) uint64 {
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(unsafe.Pointer(&(*scalars)[0]))
isInverseC := C.bool(isInverse)
batchSizeC := C.uint32_t(batchSize)
deviceIdC := C.size_t(deviceId)
ret := C.ntt_batch_cuda_{{.CurveNameLowerCase}}(scalarsC, C.uint32_t(len(*scalars)), batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNtt(values *[]G1ProjectivePoint, isInverse bool, deviceId int) uint64 {
valuesC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
ret := C.ecntt_cuda_{{.CurveNameLowerCase}}(valuesC, n, isInverseC, deviceIdC)
return uint64(ret)
}
func EcNttBatch(values *[]G1ProjectivePoint, isInverse bool, batchSize, deviceId int) uint64 {
valuesC := (*C.{{.CurveNameUpperCase}}_projective_t)(unsafe.Pointer(&(*values)[0]))
deviceIdC := C.size_t(deviceId)
isInverseC := C.bool(isInverse)
n := C.uint32_t(len(*values))
batchSizeC := C.uint32_t(batchSize)
ret := C.ecntt_batch_cuda_{{.CurveNameLowerCase}}(valuesC, n, batchSizeC, isInverseC, deviceIdC)
return uint64(ret)
}
func GenerateTwiddles(d_size int, log_d_size int, inverse bool) (up unsafe.Pointer, err error) {
domain_size := C.uint32_t(d_size)
logn := C.uint32_t(log_d_size)
is_inverse := C.bool(inverse)
dp := C.build_domain_cuda_{{.CurveNameLowerCase}}(domain_size, logn, is_inverse, 0, 0)
if dp == nil {
err = errors.New("nullptr returned from generating twiddles")
return unsafe.Pointer(nil), err
}
return unsafe.Pointer(dp), nil
}
// Reverses d_scalars in-place
func ReverseScalars(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(d_scalars)
lenC := C.int(len)
if success := C.reverse_order_scalars_cuda_{{.CurveNameLowerCase}}(scalarsC, lenC, 0, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func Interpolate(scalars, twiddles, cosetPowers unsafe.Pointer, size int, isCoset bool) unsafe.Pointer {
size_d := size * 32
dp, err := goicicle.CudaMalloc(size_d)
if err != nil {
return nil
}
d_out := (*C.{{.CurveNameUpperCase}}_scalar_t)(dp)
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(scalars)
twiddlesC := (*C.{{.CurveNameUpperCase}}_scalar_t)(twiddles)
cosetPowersC := (*C.{{.CurveNameUpperCase}}_scalar_t)(cosetPowers)
sizeC := C.uint(size)
var ret C.int
if isCoset {
ret = C.interpolate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(d_out, scalarsC, twiddlesC, sizeC, cosetPowersC, 0, 0)
} else {
ret = C.interpolate_scalars_cuda_{{.CurveNameLowerCase}}(d_out, scalarsC, twiddlesC, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
}
return unsafe.Pointer(d_out)
}
func Evaluate(scalars_out, scalars, twiddles, coset_powers unsafe.Pointer, scalars_size, twiddles_size int, isCoset bool) int {
scalars_outC := (*C.{{.CurveNameUpperCase}}_scalar_t)(scalars_out)
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(scalars)
twiddlesC := (*C.{{.CurveNameUpperCase}}_scalar_t)(twiddles)
coset_powersC := (*C.{{.CurveNameUpperCase}}_scalar_t)(coset_powers)
sizeC := C.uint(scalars_size)
twiddlesC_size := C.uint(twiddles_size)
var ret C.int
if isCoset {
ret = C.evaluate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, coset_powersC, 0, 0)
} else {
ret = C.evaluate_scalars_cuda_{{.CurveNameLowerCase}}(scalars_outC, scalarsC, twiddlesC, twiddlesC_size, sizeC, 0, 0)
}
if ret != 0 {
fmt.Print("error interpolating")
return -1
}
return 0
}
func VecScalarAdd(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.{{.CurveNameUpperCase}}_scalar_t)(in1_d)
in2_dC := (*C.{{.CurveNameUpperCase}}_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.add_scalars_cuda_{{.CurveNameLowerCase}}(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error adding scalar vectors")
return -1
}
return 0
}
func VecScalarSub(in1_d, in2_d unsafe.Pointer, size int) int {
in1_dC := (*C.{{.CurveNameUpperCase}}_scalar_t)(in1_d)
in2_dC := (*C.{{.CurveNameUpperCase}}_scalar_t)(in2_d)
sizeC := C.uint(size)
ret := C.sub_scalars_cuda_{{.CurveNameLowerCase}}(in1_dC, in1_dC, in2_dC, sizeC, 0)
if ret != 0 {
fmt.Print("error subtracting scalar vectors")
return -1
}
return 0
}
func ToMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.to_montgomery_scalars_cuda_{{.CurveNameLowerCase}}(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func FromMontgomery(d_scalars unsafe.Pointer, len int) (int, error) {
scalarsC := (*C.{{.CurveNameUpperCase}}_scalar_t)(d_scalars)
lenC := C.uint(len)
if success := C.from_montgomery_scalars_cuda_{{.CurveNameLowerCase}}(scalarsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.{{.CurveNameUpperCase}}_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_cuda_{{.CurveNameLowerCase}}(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}
func G2AffinePointFromMontgomery(d_points unsafe.Pointer, len int) (int, error) {
pointsC := (*C.{{.CurveNameUpperCase}}_g2_affine_t)(d_points)
lenC := C.uint(len)
if success := C.from_montgomery_aff_points_g2_cuda_{{.CurveNameLowerCase}}(pointsC, lenC, 0); success != 0 {
return -1, errors.New("reversing failed")
}
return 0, nil
}

View File

@@ -0,0 +1,130 @@
import (
"fmt"
"github.com/stretchr/testify/assert"
"reflect"
"testing"
)
func TestNtt{{.CurveNameUpperCase}}Batch(t *testing.T) {
count := 1 << 20
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
NttBatch(&nttResult, false, count, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNtt{{.CurveNameUpperCase}}CompareToGnarkDIF(t *testing.T) {
count := 1 << 2
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, DIF, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestINtt{{.CurveNameUpperCase}}CompareToGnarkDIT(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, true, DIT, 0)
assert.NotEqual(t, nttResult, scalars)
assert.Equal(t, nttResult, nttResult)
}
func TestNtt{{.CurveNameUpperCase}}(t *testing.T) {
count := 1 << 3
scalars := GenerateScalars(count, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
assert.Equal(t, nttResult, scalars)
Ntt(&nttResult, false, NONE, 0)
assert.NotEqual(t, nttResult, scalars)
inttResult := make([]G1ScalarField, len(nttResult))
copy(inttResult, nttResult)
assert.Equal(t, inttResult, nttResult)
Ntt(&inttResult, true, NONE, 0)
assert.Equal(t, inttResult, scalars)
}
func TestNttBatch{{.CurveNameUpperCase}}(t *testing.T) {
count := 1 << 5
batches := 4
scalars := GenerateScalars(count*batches, false)
var scalarVecOfVec [][]G1ScalarField = make([][]G1ScalarField, 0)
for i := 0; i < batches; i++ {
start := i * count
end := (i + 1) * count
batch := make([]G1ScalarField, len(scalars[start:end]))
copy(batch, scalars[start:end])
scalarVecOfVec = append(scalarVecOfVec, batch)
}
nttBatchResult := make([]G1ScalarField, len(scalars))
copy(nttBatchResult, scalars)
NttBatch(&nttBatchResult, false, count, 0)
var nttResultVecOfVec [][]G1ScalarField
for i := 0; i < batches; i++ {
// Clone the slice
clone := make([]G1ScalarField, len(scalarVecOfVec[i]))
copy(clone, scalarVecOfVec[i])
// Add it to the result vector of vectors
nttResultVecOfVec = append(nttResultVecOfVec, clone)
// Call the ntt_{{.CurveNameLowerCase}} function
Ntt(&nttResultVecOfVec[i], false, NONE, 0)
}
assert.NotEqual(t, nttBatchResult, scalars)
// Check that the ntt of each vec of scalars is equal to the intt of the specific batch
for i := 0; i < batches; i++ {
if !reflect.DeepEqual(nttResultVecOfVec[i], nttBatchResult[i*count:((i+1)*count)]) {
t.Errorf("ntt of vec of scalars not equal to intt of specific batch")
}
}
}
func BenchmarkNTT(b *testing.B) {
LOG_NTT_SIZES := []int{12, 15, 20, 21, 22, 23, 24, 25, 26}
for _, logNTTSize := range LOG_NTT_SIZES {
nttSize := 1 << logNTTSize
b.Run(fmt.Sprintf("NTT %d", logNTTSize), func(b *testing.B) {
scalars := GenerateScalars(nttSize, false)
nttResult := make([]G1ScalarField, len(scalars)) // Make a new slice with the same length
copy(nttResult, scalars)
for n := 0; n < b.N; n++ {
Ntt(&nttResult, false, NONE, 0)
}
})
}
}

View File

@@ -0,0 +1,23 @@
// #cgo CFLAGS: -I./include/
// #cgo LDFLAGS: -L${SRCDIR}/../../ {{.SharedLib}}
// #include "ve_mod_mult.h"
import "C"
import (
"fmt"
"unsafe"
)
func VecScalarMulMod(scalarVec1, scalarVec2 unsafe.Pointer, size int) int {
scalarVec1C := (*C.{{.CurveNameUpperCase}}_scalar_t)(scalarVec1)
scalarVec2C := (*C.{{.CurveNameUpperCase}}_scalar_t)(scalarVec2)
sizeC := C.size_t(size)
ret := C.vec_mod_mult_device_scalar_{{.CurveNameLowerCase}}(scalarVec1C, scalarVec2C, sizeC, 0)
if ret != 0 {
fmt.Print("error multiplying scalar vectors")
return -1
}
return 0
}

View File

@@ -1,8 +1,8 @@
cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.18)
# GoogleTest requires at least C++14
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CUDA_STANDARD 14)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
# add the target cuda architectures
@@ -22,6 +22,10 @@ FetchContent_Declare(
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
# boosting lib
include_directories("/home/miner/include/boost_1_80_0")
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)

File diff suppressed because it is too large Load Diff

View File

@@ -3,19 +3,46 @@
#pragma once
template <typename S, typename P, typename A>
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream);
void bucket_method_msm(
unsigned bitsize,
unsigned c,
S* scalars,
A* points,
unsigned size,
P* final_result,
bool on_device,
bool big_triangle,
cudaStream_t stream);
template <typename S, typename P, typename A>
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
void batched_bucket_method_msm(
unsigned bitsize,
unsigned c,
S* scalars,
A* points,
unsigned batch_size,
unsigned msm_size,
P* final_results,
bool on_device,
cudaStream_t stream);
template <typename S, typename P, typename A>
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
void batched_large_msm(
S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
template <typename S, typename P, typename A>
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream);
void large_msm(
S* scalars,
A* points,
unsigned size,
P* result,
bool on_device,
bool big_triangle,
unsigned large_bucket_factor,
cudaStream_t stream);
template <typename S, typename P, typename A>
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);
void short_msm(S* h_scalars, A* h_points, unsigned size, P* h_final_result, cudaStream_t stream);
template <typename A, typename S, typename P>
void reference_msm(S* scalars, A* a_points, unsigned size);

View File

@@ -1,113 +1,115 @@
#include <iostream>
#include <chrono>
#include <vector>
#include "msm.cu"
#include "../../utils/cuda_utils.cuh"
#include "../../primitives/projective.cuh"
#include "../../primitives/field.cuh"
#include "../../curves/bls12_381/curve_config.cuh"
#include "../../primitives/projective.cuh"
#include "../../utils/cuda_utils.cuh"
#include "msm.cu"
#include <chrono>
#include <iostream>
#include <vector>
// #include "../../curves/bls12_377/curve_config.cuh"
#include "../../curves/bn254/curve_config.cuh"
using namespace BLS12_381;
// using namespace BLS12_377;
using namespace BN254;
class Dummy_Scalar {
public:
static constexpr unsigned NBITS = 32;
class Dummy_Scalar
{
public:
static constexpr unsigned NBITS = 32;
unsigned x;
unsigned x;
unsigned p = 10;
// unsigned p = 1<<30;
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
os << scalar.x;
return os;
}
static HOST_DEVICE_INLINE Dummy_Scalar zero() { return {0}; }
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) {
return (x>>(digit_num*digit_width))&((1<<digit_width)-1);
}
static HOST_DEVICE_INLINE Dummy_Scalar one() { return {1}; }
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
return {p1.x+p2.x};
}
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar)
{
os << scalar.x;
return os;
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
return (p1.x == p2.x);
}
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width)
{
return (x >> (digit_num * digit_width)) & ((1 << digit_width) - 1);
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) {
return (p1.x == p2);
}
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2)
{
return {(p1.x + p2.x) % p1.p};
}
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
// return {Dummy_Scalar::neg(point.x)};
// }
static HOST_INLINE Dummy_Scalar rand_host() {
return {(unsigned)rand()};
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) { return (p1.x == p2.x); }
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) { return (p1.x == p2); }
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar& scalar) { return {scalar.p - scalar.x}; }
static HOST_INLINE Dummy_Scalar rand_host()
{
return {(unsigned)rand() % 10};
// return {(unsigned)rand()};
}
};
class Dummy_Projective {
class Dummy_Projective
{
public:
Dummy_Scalar x;
public:
Dummy_Scalar x;
static HOST_DEVICE_INLINE Dummy_Projective zero() { return {0}; }
static HOST_DEVICE_INLINE Dummy_Projective zero() {
return {0};
static HOST_DEVICE_INLINE Dummy_Projective one() { return {1}; }
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective& point) { return {point.x}; }
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective& point) { return {point.x}; }
static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective& point) { return {Dummy_Scalar::neg(point.x)}; }
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2)
{
return {p1.x + p2.x};
}
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
// return p1 + neg(p2);
// }
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point)
{
os << point.x;
return os;
}
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point)
{
Dummy_Projective res = zero();
#ifdef CUDA_ARCH
#pragma unroll
#endif
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
if (i > 0) { res = res + res; }
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) { res = res + point; }
}
return res;
}
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
return {point.x};
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2)
{
return (p1.x == p2.x);
}
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective &point) {
return {point.x};
}
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective& point) { return point.x == 0; }
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
// return {Dummy_Scalar::neg(point.x)};
// }
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
return {p1.x+p2.x};
}
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
// return p1 + neg(p2);
// }
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point) {
os << point.x;
return os;
}
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point) {
Dummy_Projective res = zero();
#ifdef CUDA_ARCH
#pragma unroll
#endif
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
if (i > 0) {
res = res + res;
}
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) {
res = res + point;
}
}
return res;
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2) {
return (p1.x == p2.x);
}
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective &point) {
return point.x == 0;
}
static HOST_INLINE Dummy_Projective rand_host() {
return {(unsigned)rand()};
}
static HOST_INLINE Dummy_Projective rand_host()
{
return {(unsigned)rand() % 10};
// return {(unsigned)rand()};
}
};
//switch between dummy and real:
// switch between dummy and real:
typedef scalar_t test_scalar;
typedef projective_t test_projective;
@@ -119,63 +121,100 @@ typedef affine_t test_affine;
int main()
{
unsigned batch_size = 4;
unsigned msm_size = 1<<15;
unsigned N = batch_size*msm_size;
unsigned batch_size = 1;
// unsigned msm_size = 1<<21;
unsigned msm_size = 12180757;
unsigned N = batch_size * msm_size;
test_scalar *scalars = new test_scalar[N];
test_affine *points = new test_affine[N];
for (unsigned i=0;i<N;i++){
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
// scalars[i] = test_scalar::rand_host();
test_scalar* scalars = new test_scalar[N];
test_affine* points = new test_affine[N];
for (unsigned i = 0; i < N; i++) {
// scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
points[i] = (i % msm_size < 10) ? test_projective::to_affine(test_projective::rand_host()) : points[i - 10];
scalars[i] = test_scalar::rand_host();
// scalars[i] = i < N/2? test_scalar::rand_host() : test_scalar::one();
// points[i] = test_projective::to_affine(test_projective::rand_host());
}
std::cout<<"finished generating"<<std::endl;
std::cout << "finished generating" << std::endl;
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
test_projective large_res[batch_size];
test_projective batched_large_res[batch_size];
test_projective large_res[batch_size * 2];
// test_projective batched_large_res[batch_size];
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
// fake_point batched_large_res[256];
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
for (unsigned i=0;i<batch_size;i++){
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
// std::cout<<"final result large"<<std::endl;
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
}
// for (unsigned i=0;i<batch_size;i++){
// large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i,
// false); std::cout<<"final result large"<<std::endl; std::cout<<test_projective::to_affine(*large_res)<<std::endl;
// }
test_scalar* scalars_d;
test_affine* points_d;
test_projective* large_res_d;
cudaMalloc(&scalars_d, sizeof(test_scalar) * msm_size);
cudaMalloc(&points_d, sizeof(test_affine) * msm_size);
cudaMalloc(&large_res_d, sizeof(test_projective));
cudaMemcpy(scalars_d, scalars, sizeof(test_scalar) * msm_size, cudaMemcpyHostToDevice);
cudaMemcpy(points_d, points, sizeof(test_affine) * msm_size, cudaMemcpyHostToDevice);
std::cout << "finished copying" << std::endl;
// batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size,
// batched_large_res, false);
cudaStream_t stream1;
cudaStream_t stream2;
cudaStreamCreate(&stream1);
cudaStreamCreate(&stream2);
auto begin1 = std::chrono::high_resolution_clock::now();
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false, true, stream1);
auto end1 = std::chrono::high_resolution_clock::now();
auto elapsed1 = std::chrono::duration_cast<std::chrono::nanoseconds>(end1 - begin1);
printf("Big Triangle : %.3f seconds.\n", elapsed1.count() * 1e-9);
// std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
auto begin = std::chrono::high_resolution_clock::now();
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
large_msm<test_scalar, test_projective, test_affine>(
scalars_d, points_d, msm_size, large_res_d, true, false, stream2);
// test_reduce_triangle(scalars);
// test_reduce_rectangle(scalars);
// test_reduce_single(scalars);
// test_reduce_var(scalars);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
printf("On Device No Big Triangle: %.3f seconds.\n", elapsed.count() * 1e-9);
cudaStreamSynchronize(stream1);
cudaStreamSynchronize(stream2);
cudaStreamDestroy(stream1);
cudaStreamDestroy(stream2);
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
std::cout << test_projective::to_affine(large_res[0]) << std::endl;
cudaMemcpy(&large_res[1], large_res_d, sizeof(test_projective), cudaMemcpyDeviceToHost);
std::cout << test_projective::to_affine(large_res[1]) << std::endl;
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
// std::cout<<"final results batched large"<<std::endl;
// bool success = true;
// for (unsigned i = 0; i < batch_size; i++)
// {
// std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
// if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
// std::cout<<"good"<<std::endl;
// }
// else{
// std::cout<<"miss"<<std::endl;
// std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
// success = false;
// }
// }
// if (success){
// std::cout<<"success!"<<std::endl;
// }
std::cout<<"final results batched large"<<std::endl;
bool success = true;
for (unsigned i = 0; i < batch_size; i++)
{
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
std::cout<<"good"<<std::endl;
}
else{
std::cout<<"miss"<<std::endl;
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
success = false;
}
}
if (success){
std::cout<<"success!"<<std::endl;
}
// std::cout<<batched_large_res[0]<<std::endl;
// std::cout<<batched_large_res[1]<<std::endl;
// std::cout<<projective_t::to_affine(batched_large_res[0])<<std::endl;

View File

@@ -1,81 +1,125 @@
#ifndef LDE
#define LDE
#include <cuda.h>
#include "ntt.cuh"
#include "lde.cuh"
#include "../vector_manipulation/ve_mod_mult.cuh"
#include "lde.cuh"
#include "ntt.cuh"
#include <cuda.h>
template <typename E, bool SUB>
__global__ void add_sub_array(E* res, E* in1, E* in2, uint32_t n)
{
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid < n) { res[tid] = SUB ? in1[tid] - in2[tid] : in1[tid] + in2[tid]; }
}
template <typename E>
int sub_polys(E* d_out, E* d_in1, E* d_in2, unsigned n, cudaStream_t stream)
{
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
uint32_t NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
add_sub_array<E, true><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, d_in1, d_in2, n);
return 0;
}
template <typename E>
int add_polys(E* d_out, E* d_in1, E* d_in2, unsigned n, cudaStream_t stream)
{
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
uint32_t NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
add_sub_array<E, false><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, d_in1, d_in2, n);
return 0;
}
/**
* Interpolate a batch of polynomials from their evaluations on the same subgroup.
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
* @param d_out The variable to write coefficients of the resulting polynomials into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
* @param d_out The variable to write coefficients of the resulting polynomials into (the coefficients are in
* bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
* @param d_evaluations Input array of evaluations of all polynomials of type E (elements).
* @param d_domain Domain on which the polynomials are evaluated. Must be a subgroup.
* @param n Length of `d_domain` array, also equal to the number of evaluations of each polynomial.
* @param batch_size The size of the batch; the length of `d_evaluations` is `n` * `batch_size`.
*/
template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
uint32_t logn = uint32_t(log(n) / log(2));
template <typename E, typename S>
int interpolate_batch(
E* d_out,
E* d_evaluations,
S* d_domain,
unsigned n,
unsigned batch_size,
bool coset,
S* coset_powers,
cudaStream_t stream)
{
cudaMemcpyAsync(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice, stream);
int NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int NUM_BLOCKS = batch_size * max(int((n / 2) / NUM_THREADS), 1);
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
}
NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>> (d_out, n * batch_size, S::inv_log_size(logn));
cudaStreamSynchronize(stream);
ntt_inplace_batch_template(d_out, d_domain, n, batch_size, true, coset, coset_powers, stream, true);
return 0;
}
/**
* Interpolate a polynomial from its evaluations on a subgroup.
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
* @param d_out The variable to write coefficients of the resulting polynomial into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
* @param d_out The variable to write coefficients of the resulting polynomial into (the coefficients are in
* bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
* @param d_evaluations Input array of evaluations that have type E (elements).
* @param d_domain Domain on which the polynomial is evaluated. Must be a subgroup.
* @param n Length of `d_evaluations` and the size `d_domain` arrays (they should have equal length).
*/
template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n, cudaStream_t stream) {
return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1, stream);
template <typename E, typename S>
int interpolate(E* d_out, E* d_evaluations, S* d_domain, unsigned n, bool coset, S* coset_powers, cudaStream_t stream)
{
return interpolate_batch<E, S>(d_out, d_evaluations, d_domain, n, 1, coset, coset_powers, stream);
}
template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
template <typename E>
__global__ void fill_array(E* arr, E val, uint32_t n)
{
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid < n) {
arr[tid] = val;
}
if (tid < n) { arr[tid] = val; }
}
/**
* Evaluate a batch of polynomials on the same coset.
* @param d_out The evaluations of the polynomials on coset `u` * `d_domain`.
* @param d_coefficients Input array of coefficients of all polynomials of type E (elements) to be evaluated in-place on a coset.
* @param d_coefficients Input array of coefficients of all polynomials of type E (elements) to be evaluated in-place on
* a coset.
* @param d_domain Domain on which the polynomials are evaluated (see `coset` flag). Must be a subgroup.
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
* @param n The number of coefficients, which might be different from `domain_size`.
* @param batch_size The size of the batch; the length of `d_coefficients` is `n` * `batch_size`.
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of
* the coset.
*/
template <typename E, typename S>
int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers, cudaStream_t stream) {
int evaluate_batch(
E* d_out,
E* d_coefficients,
S* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
bool coset,
S* coset_powers,
cudaStream_t stream)
{
uint32_t logn = uint32_t(log(domain_size) / log(2));
if (domain_size > n) {
// allocate and initialize an array of stream handles to parallelize data copying across batches
cudaStream_t *memcpy_streams = (cudaStream_t *) malloc(batch_size * sizeof(cudaStream_t));
for (unsigned i = 0; i < batch_size; i++)
{
cudaStream_t* memcpy_streams = (cudaStream_t*)malloc(batch_size * sizeof(cudaStream_t));
for (unsigned i = 0; i < batch_size; i++) {
cudaStreamCreate(&(memcpy_streams[i]));
cudaMemcpyAsync(&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
cudaMemcpyAsync(
&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
uint32_t NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
fill_array <E> <<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>> (&d_out[i * domain_size + n], E::zero(), domain_size - n);
fill_array<E>
<<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>>(&d_out[i * domain_size + n], E::zero(), domain_size - n);
cudaStreamSynchronize(memcpy_streams[i]);
cudaStreamDestroy(memcpy_streams[i]);
@@ -83,102 +127,153 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
} else
cudaMemcpyAsync(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice, stream);
if (coset)
batch_vector_mult(coset_powers, d_out, domain_size, batch_size, stream);
if (coset) batch_vector_mult(coset_powers, d_out, domain_size, batch_size, stream);
int NUM_THREADS = min(domain_size / 2, MAX_THREADS_BATCH);
int chunks = max(int((domain_size / 2) / NUM_THREADS), 1);
int NUM_BLOCKS = batch_size * chunks;
for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
{
ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
}
cudaStreamSynchronize(stream);
S* _null = nullptr;
ntt_inplace_batch_template(d_out, d_domain, domain_size, batch_size, false, false, _null, stream, true);
return 0;
}
/**
* Evaluate a polynomial on a coset.
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs, so the order of outputs is bit-reversed.
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs, so the order of outputs
* is bit-reversed.
* @param d_out The evaluations of the polynomial on coset `u` * `d_domain`.
* @param d_coefficients Input array of coefficients of a polynomial of type E (elements).
* @param d_domain Domain on which the polynomial is evaluated (see `coset` flag). Must be a subgroup.
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
* @param n The number of coefficients, which might be different from `domain_size`.
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of
* the coset.
*/
template <typename E, typename S>
int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers, cudaStream_t stream) {
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers, stream);
template <typename E, typename S>
int evaluate(
E* d_out,
E* d_coefficients,
S* d_domain,
unsigned domain_size,
unsigned n,
bool coset,
S* coset_powers,
cudaStream_t stream)
{
return evaluate_batch<E, S>(d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers, stream);
}
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
return interpolate(d_out, d_evaluations, d_domain, n, stream);
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream)
{
S* _null = nullptr;
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
}
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
template <typename S>
int interpolate_scalars_batch(
S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream)
{
S* _null = nullptr;
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
}
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
return interpolate(d_out, d_evaluations, d_domain, n, stream);
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream)
{
S* _null = nullptr;
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
}
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
template <typename E, typename S>
int interpolate_points_batch(
E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream)
{
S* _null = nullptr;
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
}
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream)
{
S* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
template <typename S>
int evaluate_scalars_batch(
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream)
{
S* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream)
{
S* _null = nullptr;
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
template <typename E, typename S>
int evaluate_points_batch(
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream)
{
S* _null = nullptr;
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
template <typename S>
int interpolate_scalars_on_coset(
S* d_out, S* d_evaluations, S* d_domain, unsigned n, S* coset_powers, cudaStream_t stream)
{
return interpolate(d_out, d_evaluations, d_domain, n, true, coset_powers, stream);
}
template <typename S>
int interpolate_scalars_on_coset_batch(
S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream)
{
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, true, coset_powers, stream);
}
template <typename S>
int evaluate_scalars_on_coset(
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream)
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
template <typename E, typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
template <typename E, typename S>
int evaluate_scalars_on_coset_batch(
S* d_out,
S* d_coefficients,
S* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
S* coset_powers,
cudaStream_t stream)
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
template <typename E, typename S>
int evaluate_points_on_coset(
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream)
{
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
template <typename E, typename S>
int evaluate_points_on_coset_batch(
E* d_out,
E* d_coefficients,
S* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
S* coset_powers,
cudaStream_t stream)
{
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
#endif

View File

@@ -2,45 +2,62 @@
#define LDE_H
#pragma once
template <typename S>
template <typename S>
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
template <typename S>
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename S>
int interpolate_scalars_batch(
S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename E, typename S>
template <typename E, typename S>
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
template <typename E, typename S>
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename E, typename S>
int interpolate_points_batch(
E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename S>
template <typename S>
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
template <typename S>
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename S>
int evaluate_scalars_batch(
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename E, typename S>
template <typename E, typename S>
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_batch(
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
template <typename S>
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
template <typename S>
int evaluate_scalars_on_coset(
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
template <typename S>
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
template <typename S>
int evaluate_scalars_on_coset_batch(
S* d_out,
S* d_coefficients,
S* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
S* coset_powers,
cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_on_coset(
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
template <typename E, typename S>
int evaluate_points_on_coset_batch(
E* d_out,
E* d_coefficients,
S* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
S* coset_powers,
cudaStream_t stream);
#endif

View File

@@ -2,8 +2,13 @@
#define NTT
#pragma once
#include "../../utils/sharedmem.cuh"
#include "../vector_manipulation/ve_mod_mult.cuh"
const uint32_t MAX_NUM_THREADS = 1024;
const uint32_t MAX_THREADS_BATCH = 256;
const uint32_t MAX_THREADS_BATCH = 512; //TODO: allows 100% occupancy for scalar NTT for sm_86..sm_89
const uint32_t MAX_SHARED_MEM_ELEMENT_SIZE = 32; //TODO: occupancy calculator, hardcoded for sm_86..sm_89
const uint32_t MAX_SHARED_MEM = MAX_SHARED_MEM_ELEMENT_SIZE * 1024;
/**
* Computes the twiddle factors.
@@ -37,40 +42,6 @@ const uint32_t MAX_THREADS_BATCH = 256;
return d_twiddles;
}
/**
* Returns the bit reversed order of a number.
* for example: on inputs num = 6 (110 in binary) and logn = 3
* the function should return 3 (011 in binary.)
* @param num some number with bit representation of size logn.
* @param logn length of bit representation of `num`.
* @return bit reveresed order or `num`.
*/
__device__ __host__ uint32_t reverseBits(uint32_t num, uint32_t logn) {
unsigned int reverse_num = 0;
for (uint32_t i = 0; i < logn; i++) {
if ((num & (1 << i))) reverse_num |= 1 << ((logn - 1) - i);
}
return reverse_num;
}
/**
* Returns the bit reversal ordering of the input array.
* for example: on input ([a[0],a[1],a[2],a[3]], 4, 2) it returns
* [a[0],a[3],a[2],a[1]] (elements in indices 3,1 swhich places).
* @param arr array of some object of type T of size which is a power of 2.
* @param n length of `arr`.
* @param logn log(n).
* @return A new array which is the bit reversed version of input array.
*/
template < typename T > T * template_reverse_order(T * arr, uint32_t n, uint32_t logn) {
T * arrReversed = new T[n];
for (uint32_t i = 0; i < n; i++) {
uint32_t reversed = reverseBits(i, logn);
arrReversed[i] = arr[reversed];
}
return arrReversed;
}
template < typename T > __global__ void reverse_order_kernel(T* arr, T* arr_reversed, uint32_t n, uint32_t logn, uint32_t batch_size) {
int threadId = (blockIdx.x * blockDim.x) + threadIdx.x;
if (threadId < n * batch_size) {
@@ -112,156 +83,146 @@ template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn, cu
reverse_order_batch(arr, n, logn, 1, stream);
}
/**
* Cooley-Tukey butterfly kernel.
* @param arr array of objects of type E (elements).
* @param twiddles array of twiddle factors of type S (scalars).
* @param n size of arr.
* @param n_twiddles size of omegas.
* @param m "pair distance" - indicate distance of butterflies inputs.
* @param i Cooley-Tukey FFT stage number.
* @param max_thread_num maximal number of threads in stage.
*/
template < typename E, typename S > __global__ void template_butterfly_kernel(E * arr, S * twiddles, uint32_t n, uint32_t n_twiddles, uint32_t m, uint32_t i, uint32_t max_thread_num) {
int j = (blockIdx.x * blockDim.x) + threadIdx.x;
if (j < max_thread_num) {
uint32_t g = j * (n / m);
uint32_t k = i + j + (m >> 1);
E u = arr[i + j];
E v = twiddles[g * n_twiddles / n] * arr[k];
arr[i + j] = u + v;
arr[k] = u - v;
}
}
enum Decimation {
NONE = 0,
DIF = 1,
DIT = 2,
};
/**
* Multiply the elements of an input array by a scalar in-place.
* @param arr input array.
* @param n size of arr.
* @param n_inv scalar of type S (scalar).
*/
template < typename E, typename S > __global__ void template_normalize_kernel(E * arr, uint32_t n, S scalar) {
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid < n) {
arr[tid] = scalar * arr[tid];
}
}
/**
* Cooley-Tukey NTT.
* NOTE! this function assumes that d_arr and d_twiddles are located in the device memory.
* @param d_arr input array of type E (elements) allocated on the device memory.
* @param n length of d_arr.
* @param logn log(n).
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
*/
template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles, cudaStream_t stream) {
uint32_t m = 2;
// TODO: optimize with separate streams for each iteration
for (uint32_t s = 0; s < logn; s++) {
for (uint32_t i = 0; i < n; i += m) {
uint32_t shifted_m = m >> 1;
uint32_t number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
uint32_t number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks, 0, stream >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
}
m <<= 1;
}
}
/**
* Cooley-Tukey NTT.
* Cooley-Tuckey NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type E (elements).
* @param arr input array of type E (elements).
* @param n length of d_arr.
* @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of d_twiddles.
* @param inverse indicate if the result array should be normalized by n^(-1).
* @param twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of twiddles.
* @param max_task max count of parallel tasks.
* @param s log2(n) loop index.
*/
template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_twiddles, uint32_t n_twiddles, bool inverse, cudaStream_t stream) {
uint32_t logn = uint32_t(log(n) / log(2));
size_t size_E = n * sizeof(E);
E * arrReversed = template_reverse_order < E > (arr, n, logn);
E * d_arrReversed;
cudaMallocAsync( & d_arrReversed, size_E, stream);
cudaMemcpyAsync(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice, stream);
template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles, stream);
if (inverse) {
int NUM_THREADS = MAX_NUM_THREADS;
int NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream >>> (d_arrReversed, n, S::inv_log_size(logn));
}
cudaMemcpyAsync(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(d_arrReversed, stream);
cudaStreamSynchronize(stream);
return arrReversed;
}
template <typename E, typename S>
__global__ void ntt_template_kernel_shared_rev(E *__restrict__ arr_g, uint32_t n, const S *__restrict__ r_twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t ss, uint32_t logn)
{
SharedMemory<E> smem;
E *arr = smem.getPointer();
/**
* Cooley-Tukey (scalar) NTT.
* @param arr input array of type E (element).
* @param n length of d_arr.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse, cudaStream_t stream) {
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n;
S * twiddles = new S[n_twiddles];
S * d_twiddles;
if (inverse){
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
} else{
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
}
E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse, stream);
for(int i = 0; i < n; i++){
arr[i] = result[i];
}
cudaFreeAsync(d_twiddles, stream);
cudaStreamSynchronize(stream);
return 0; // TODO add
}
uint32_t task = blockIdx.x;
uint32_t loop_limit = blockDim.x;
uint32_t chunks = n / (loop_limit * 2);
uint32_t offset = (task / chunks) * n;
if (task < max_task)
{
// flattened loop allows parallel processing
uint32_t l = threadIdx.x;
if (l < loop_limit)
{
#pragma unroll
for (; ss < logn; ss++)
{
int s = logn - ss - 1;
bool is_beginning = ss == 0;
bool is_end = ss == (logn - 1);
/**
* Returens the bit reversal ordering of the input array according to the batches *in place*.
* The assumption is that arr is divided into N tasks of size n.
* Tasks indicates the index of the task (out of N).
* @param arr input array of type T.
* @param n length of arr.
* @param logn log(n).
* @param task log(n).
*/
template < typename T > __device__ __host__ void reverseOrder_batch(T * arr, uint32_t n, uint32_t logn, uint32_t task) {
for (uint32_t i = 0; i < n; i++) {
uint32_t reversed = reverseBits(i, logn);
if (reversed > i) {
T tmp = arr[task * n + i];
arr[task * n + i] = arr[task * n + reversed];
arr[task * n + reversed] = tmp;
uint32_t ntw_i = task % chunks;
uint32_t n_twiddles_div = n_twiddles >> (s + 1);
uint32_t shift_s = 1 << s;
uint32_t shift2_s = 1 << (s + 1);
l = ntw_i * loop_limit + l; // to l from chunks to full
uint32_t j = l & (shift_s - 1); // Equivalent to: l % (1 << s)
uint32_t i = ((l >> s) * shift2_s) & (n - 1); // (..) % n (assuming n is power of 2)
uint32_t oij = i + j;
uint32_t k = oij + shift_s;
S tw = r_twiddles[j * n_twiddles_div];
E u = is_beginning ? arr_g[offset + oij] : arr[oij];
E v = is_beginning ? arr_g[offset + k] : arr[k];
if (is_end)
{
arr_g[offset + oij] = u + v;
arr_g[offset + k] = tw * (u - v);
}
else
{
arr[oij] = u + v;
arr[k] = tw *(u - v);
}
__syncthreads();
}
}
}
}
/**
* Cooley-Tukey butterfly kernel.
* @param arr array of objects of type E (elements).
* @param twiddles array of twiddle factors of type S (scalars).
* @param n size of arr.
* @param n_twiddles size of omegas.
* @param m "pair distance" - indicate distance of butterflies inputs.
* @param i Cooley-TUckey FFT stage number.
* @param offset offset corr. to the specific taks (in batch).
* Cooley-Tuckey NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type E (elements).
* @param n length of d_arr.
* @param twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
* @param n_twiddles length of twiddles.
* @param max_task max count of parallel tasks.
* @param s log2(n) loop index.
*/
template < typename E, typename S > __device__ __host__ void butterfly(E * arrReversed, S * omegas, uint32_t n, uint32_t n_omegas, uint32_t m, uint32_t i, uint32_t j, uint32_t offset) {
uint32_t g = j * (n / m);
uint32_t k = i + j + (m >> 1);
E u = arrReversed[offset + i + j];
E v = omegas[g * n_omegas / n] * arrReversed[offset + k];
arrReversed[offset + i + j] = u + v;
arrReversed[offset + k] = u - v;
template <typename E, typename S>
__global__ void ntt_template_kernel_shared(E *__restrict__ arr_g, uint32_t n, const S *__restrict__ r_twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t s, uint32_t logn)
{
SharedMemory<E> smem;
E *arr = smem.getPointer();
uint32_t task = blockIdx.x;
uint32_t loop_limit = blockDim.x;
uint32_t chunks = n / (loop_limit * 2);
uint32_t offset = (task / chunks) * n;
if (task < max_task)
{
// flattened loop allows parallel processing
uint32_t l = threadIdx.x;
if (l < loop_limit)
{
#pragma unroll
for (; s < logn; s++) // TODO: this loop also can be unrolled
{
uint32_t ntw_i = task % chunks;
uint32_t n_twiddles_div = n_twiddles >> (s + 1);
uint32_t shift_s = 1 << s;
uint32_t shift2_s = 1 << (s + 1);
l = ntw_i * loop_limit + l; // to l from chunks to full
uint32_t j = l & (shift_s - 1); // Equivalent to: l % (1 << s)
uint32_t i = ((l >> s) * shift2_s) & (n - 1); // (..) % n (assuming n is power of 2)
uint32_t oij = i + j;
uint32_t k = oij + shift_s;
S tw = r_twiddles[j * n_twiddles_div];
E u = s == 0 ? arr_g[offset + oij] : arr[oij];
E v = s == 0 ? arr_g[offset + k] : arr[k];
v = tw * v;
if (s == (logn - 1))
{
arr_g[offset + oij] = u + v;
arr_g[offset + k] = u - v;
}
else
{
arr[oij] = u + v;
arr[k] = u - v;
}
__syncthreads();
}
}
}
}
/**
@@ -296,39 +257,80 @@ __global__ void ntt_template_kernel(E *arr, uint32_t n, S *twiddles, uint32_t n_
l = ntw_i * blockDim.x + l; //to l from chunks to full
uint32_t j = l & (shift_s - 1); // Equivalent to: l % (1 << s)
uint32_t i = ((l / shift_s) * shift2_s) % n;
uint32_t j = l & (shift_s - 1); // Equivalent to: l % (1 << s)
uint32_t i = ((l >> s) * shift2_s) & (n - 1); // (..) % n (assuming n is power of 2)
uint32_t k = i + j + shift_s;
S tw = twiddles[j * n_twiddles_div];
uint32_t offset = (task / chunks) * n;
E u = arr[offset + i + j];
E v = rev ? arr[offset + k] : twiddles[j * n_twiddles_div] * arr[offset + k];
E v = arr[offset + k];
if (!rev) v = tw * v;
arr[offset + i + j] = u + v;
arr[offset + k] = u - v;
if (rev)
arr[offset + k] = twiddles[j * n_twiddles_div] * arr[offset + k];
v = u - v;
arr[offset + k] = rev ? tw * v : v;
}
}
}
/**
* Cooley-Tukey NTT.
* NOTE! this function assumes that d_twiddles are located in the device memory.
* @param arr input array of type E (elements).
* @param n length of arr.
* @param logn log2(n).
* @param max_task max count of parallel tasks.
* NTT/INTT inplace batch
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
* @param d_inout Array for inplace processing
* @param d_twiddles
* @param n Length of `d_twiddles` array
* @param batch_size The size of the batch; the length of `d_inout` is `n` * `batch_size`.
* @param inverse true for iNTT
* @param is_coset true for multiplication by coset
* @param coset should be array of lenght n - or in case of lesser than n, right-padded with zeroes
* @param stream CUDA stream
* @param is_sync_needed do perform sync of the supplied CUDA stream at the end of processing
*/
template <typename E, typename S>
__global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, uint32_t max_task)
template <typename E, typename S> void ntt_inplace_batch_template(
E * d_inout, S * d_twiddles, unsigned n, unsigned batch_size, bool inverse,
bool is_coset, S * coset, cudaStream_t stream, bool is_sync_needed)
{
int task = (blockIdx.x * blockDim.x) + threadIdx.x;
const int logn = int(log(n) / log(2));
bool is_shared_mem_enabled = sizeof(E) <= MAX_SHARED_MEM_ELEMENT_SIZE;
const int log2_shmem_elems = is_shared_mem_enabled ? int(log(int(MAX_SHARED_MEM / sizeof(E))) / log(2)) : logn;
int num_threads = min(min(n / 2, MAX_THREADS_BATCH), 1 << (log2_shmem_elems - 1));
const int chunks = max(int((n / 2) / num_threads), 1);
const int total_tasks = batch_size * chunks;
int num_blocks = total_tasks;
const int shared_mem = 2 * num_threads * sizeof(E); // TODO: calculator, as shared mem size may be more efficient less then max to allow more concurrent blocks on SM
const int logn_shmem = is_shared_mem_enabled ? int(log(2 * num_threads) / log(2)) : 0; //TODO: shared memory support only for types <= 32 bytes
if (task < max_task)
{
reverseOrder_batch<E>(arr, n, logn, task);
}
// if (inverse)
// {
// if (is_shared_mem_enabled) ntt_template_kernel_shared<<<num_blocks, num_threads, shared_mem, stream>>>(d_inout, 1 << logn_shmem, d_twiddles, n, total_tasks, 0, logn_shmem);
// for (int s = logn_shmem; s < logn; s++) // TODO: this loop also can be unrolled
// {
// ntt_template_kernel <E, S> <<<num_blocks, num_threads, 0, stream>>>(d_inout, n, d_twiddles, n, total_tasks, s, false);
// }
// if (is_coset) batch_vector_mult(coset, d_inout, n, batch_size, stream);
// num_threads = min(n / 2, MAX_NUM_THREADS);
// num_blocks = (n * batch_size + num_threads - 1) / num_threads;
// // template_normalize_kernel <E, S> <<<num_blocks, num_threads, 0, stream>>> (d_inout, n * batch_size, S::inv_log_size(logn));
// }
// else
// {
if (is_coset) batch_vector_mult(coset, d_inout, n, batch_size, stream);
for (int s = logn - 1; s >= logn_shmem; s--) // TODO: this loop also can be unrolled
{
ntt_template_kernel<<<num_blocks, num_threads, 0, stream>>>(d_inout, n, d_twiddles, n, total_tasks, s, true);
}
if (is_shared_mem_enabled) ntt_template_kernel_shared_rev<<<num_blocks, num_threads, shared_mem, stream>>>(d_inout, 1 << logn_shmem, d_twiddles, n, total_tasks, 0, logn_shmem);
// }
if (!is_sync_needed) return;
cudaStreamSynchronize(stream);
}
/**
@@ -343,7 +345,7 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse, cudaStream_t stream) {
int batches = int(arr_size / n);
uint32_t logn = uint32_t(log(n) / log(2));
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
uint32_t n_twiddles = n/2; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
size_t size_E = arr_size * sizeof(E);
S * d_twiddles;
if (inverse){
@@ -356,25 +358,10 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
cudaMemcpyAsync(d_arr, arr, size_E, cudaMemcpyHostToDevice, stream);
int NUM_THREADS = MAX_THREADS_BATCH;
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, logn, batches);
NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
int chunks = max(int((n / 2) / NUM_THREADS), 1);
int total_tasks = batches * chunks;
NUM_BLOCKS = total_tasks;
//TODO: this loop also can be unrolled
for (uint32_t s = 0; s < logn; s++)
{
ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
cudaStreamSynchronize(stream);
}
if (inverse == true)
{
NUM_THREADS = MAX_NUM_THREADS;
NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream>>> (d_arr, arr_size, S::inv_log_size(logn));
}
S* _null = nullptr;
ntt_inplace_batch_template(d_arr, d_twiddles, n, batches, inverse, false, _null, stream, false);
reverse_order_batch(d_arr, n, logn, batches, stream);
cudaMemcpyAsync(arr, d_arr, size_E, cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(d_arr, stream);
cudaFreeAsync(d_twiddles, stream);
@@ -382,4 +369,14 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
return 0;
}
/**
* Cooley-Tukey (scalar) NTT.
* @param arr input array of type E (element).
* @param n length of d_arr.
* @param inverse indicate if the result array should be normalized by n^(-1).
*/
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse, cudaStream_t stream) {
return ntt_end2end_batch_template <E, S> (arr, n, n, inverse, stream);
}
#endif

View File

@@ -1,27 +1,27 @@
#pragma once
#include <map>
#include <stdexcept>
#include <cassert>
#include "constants/constants_11.h"
#include "constants/constants_2.h"
#include "constants/constants_4.h"
#include "constants/constants_8.h"
#include "constants/constants_11.h"
#include <cassert>
#include <map>
#include <stdexcept>
uint32_t partial_rounds_number_from_arity(const uint32_t arity) {
switch (arity) {
case 2:
return 55;
case 4:
return 56;
case 8:
return 57;
case 11:
return 57;
default:
throw std::invalid_argument( "unsupported arity" );
}
uint32_t partial_rounds_number_from_arity(const uint32_t arity)
{
switch (arity) {
case 2:
return 55;
case 4:
return 56;
case 8:
return 57;
case 11:
return 57;
default:
throw std::invalid_argument("unsupported arity");
}
};
// TO-DO: change to mapping
@@ -29,23 +29,24 @@ const uint32_t FULL_ROUNDS_DEFAULT = 4;
// TO-DO: for now, the constants are only generated in bls12_381
template <typename S>
S * load_constants(const uint32_t arity) {
unsigned char * constants;
switch (arity) {
case 2:
constants = constants_2;
break;
case 4:
constants = constants_4;
break;
case 8:
constants = constants_8;
break;
case 11:
constants = constants_11;
break;
default:
throw std::invalid_argument( "unsupported arity" );
}
return reinterpret_cast< S * >(constants);
S* load_constants(const uint32_t arity)
{
unsigned char* constants;
switch (arity) {
case 2:
constants = constants_2;
break;
case 4:
constants = constants_4;
break;
case 8:
constants = constants_8;
break;
case 11:
constants = constants_11;
break;
default:
throw std::invalid_argument("unsupported arity");
}
return reinterpret_cast<S*>(constants);
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,273 +1,266 @@
#include "poseidon.cuh"
template <typename S>
__global__ void prepare_poseidon_states(S * states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config) {
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
int state_number = idx / config.t;
if (state_number >= number_of_states) {
return;
}
int element_number = idx % config.t;
__global__ void
prepare_poseidon_states(S* states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config)
{
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
int state_number = idx / config.t;
if (state_number >= number_of_states) { return; }
int element_number = idx % config.t;
S prepared_element;
S prepared_element;
// Domain separation
if (element_number == 0) {
prepared_element = domain_tag;
} else {
prepared_element = states[state_number * config.t + element_number - 1];
}
// Domain separation
if (element_number == 0) {
prepared_element = domain_tag;
} else {
prepared_element = states[state_number * config.t + element_number - 1];
}
// Add pre-round constant
prepared_element = prepared_element + config.round_constants[element_number];
// Add pre-round constant
prepared_element = prepared_element + config.round_constants[element_number];
// Store element in state
states[idx] = prepared_element;
// Store element in state
states[idx] = prepared_element;
}
template <typename S>
__device__ __forceinline__ S sbox_alpha_five(S element) {
S result = S::sqr(element);
result = S::sqr(result);
return result * element;
__device__ __forceinline__ S sbox_alpha_five(S element)
{
S result = S::sqr(element);
result = S::sqr(result);
return result * element;
}
template <typename S>
__device__ S vecs_mul_matrix(S element, S * matrix, int element_number, int vec_number, int size, S * shared_states) {
shared_states[threadIdx.x] = element;
__syncthreads();
__device__ S vecs_mul_matrix(S element, S* matrix, int element_number, int vec_number, int size, S* shared_states)
{
shared_states[threadIdx.x] = element;
__syncthreads();
element = S::zero();
for (int i = 0; i < size; i++) {
element = element + (shared_states[vec_number * size + i] * matrix[i * size + element_number]);
}
__syncthreads();
return element;
element = S::zero();
for (int i = 0; i < size; i++) {
element = element + (shared_states[vec_number * size + i] * matrix[i * size + element_number]);
}
__syncthreads();
return element;
}
template <typename S>
__device__ S full_round(S element,
size_t rc_offset,
int local_state_number,
int element_number,
bool multiply_by_mds,
bool add_round_constant,
S * shared_states,
const PoseidonConfiguration<S> config) {
element = sbox_alpha_five(element);
if (add_round_constant) {
element = element + config.round_constants[rc_offset + element_number];
}
__device__ S full_round(
S element,
size_t rc_offset,
int local_state_number,
int element_number,
bool multiply_by_mds,
bool add_round_constant,
S* shared_states,
const PoseidonConfiguration<S> config)
{
element = sbox_alpha_five(element);
if (add_round_constant) { element = element + config.round_constants[rc_offset + element_number]; }
// Multiply all the states by mds matrix
S * matrix = multiply_by_mds ? config.mds_matrix : config.non_sparse_matrix;
return vecs_mul_matrix(element, matrix, element_number, local_state_number, config.t, shared_states);
// Multiply all the states by mds matrix
S* matrix = multiply_by_mds ? config.mds_matrix : config.non_sparse_matrix;
return vecs_mul_matrix(element, matrix, element_number, local_state_number, config.t, shared_states);
}
// Execute full rounds
template <typename S>
__global__ void full_rounds(S * states, size_t number_of_states, size_t rc_offset, bool first_half, const PoseidonConfiguration<S> config) {
extern __shared__ S shared_states[];
__global__ void full_rounds(
S* states, size_t number_of_states, size_t rc_offset, bool first_half, const PoseidonConfiguration<S> config)
{
extern __shared__ S shared_states[];
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
int state_number = idx / config.t;
if (state_number >= number_of_states) {
return;
}
int local_state_number = threadIdx.x / config.t;
int element_number = idx % config.t;
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
int state_number = idx / config.t;
if (state_number >= number_of_states) { return; }
int local_state_number = threadIdx.x / config.t;
int element_number = idx % config.t;
for (int i = 0; i < config.full_rounds_half - 1; i++) {
states[idx] = full_round(states[idx],
rc_offset,
local_state_number,
element_number,
true,
true,
shared_states,
config);
rc_offset += config.t;
}
for (int i = 0; i < config.full_rounds_half - 1; i++) {
states[idx] =
full_round(states[idx], rc_offset, local_state_number, element_number, true, true, shared_states, config);
rc_offset += config.t;
}
states[idx] = full_round(states[idx],
rc_offset,
local_state_number,
element_number,
!first_half,
first_half,
shared_states,
config);
states[idx] = full_round(
states[idx], rc_offset, local_state_number, element_number, !first_half, first_half, shared_states, config);
}
template <typename S>
__device__ S partial_round(S * state,
size_t rc_offset,
int round_number,
const PoseidonConfiguration<S> config) {
S element = state[0];
element = sbox_alpha_five(element);
element = element + config.round_constants[rc_offset];
__device__ S partial_round(S* state, size_t rc_offset, int round_number, const PoseidonConfiguration<S> config)
{
S element = state[0];
element = sbox_alpha_five(element);
element = element + config.round_constants[rc_offset];
S * sparse_matrix = &config.sparse_matrices[(config.t * 2 - 1) * round_number];
S* sparse_matrix = &config.sparse_matrices[(config.t * 2 - 1) * round_number];
state[0] = element * sparse_matrix[0];
for (int i = 1; i < config.t; i++) {
state[0] = state[0] + (state[i] * sparse_matrix[i]);
}
state[0] = element * sparse_matrix[0];
for (int i = 1; i < config.t; i++) {
state[0] = state[0] + (state[i] * sparse_matrix[i]);
}
for (int i = 1; i < config.t; i++) {
state[i] = state[i] + (element * sparse_matrix[config.t + i - 1]);
}
for (int i = 1; i < config.t; i++) {
state[i] = state[i] + (element * sparse_matrix[config.t + i - 1]);
}
}
// Execute partial rounds
template <typename S>
__global__ void partial_rounds(S * states, size_t number_of_states, size_t rc_offset, const PoseidonConfiguration<S> config) {
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) {
return;
}
__global__ void
partial_rounds(S* states, size_t number_of_states, size_t rc_offset, const PoseidonConfiguration<S> config)
{
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) { return; }
S * state = &states[idx * config.t];
S* state = &states[idx * config.t];
for (int i = 0; i < config.partial_rounds; i++) {
partial_round(state, rc_offset, i, config);
rc_offset++;
}
for (int i = 0; i < config.partial_rounds; i++) {
partial_round(state, rc_offset, i, config);
rc_offset++;
}
}
// These function is just doing copy from the states to the output
template <typename S>
__global__ void get_hash_results(S * states, size_t number_of_states, S * out, int t) {
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) {
return;
}
__global__ void get_hash_results(S* states, size_t number_of_states, S* out, int t)
{
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
if (idx >= number_of_states) { return; }
out[idx] = states[idx * t + 1];
out[idx] = states[idx * t + 1];
}
template <typename S>
__host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream) {
S * states;
__host__ void Poseidon<S>::hash_blocks(const S* inp, size_t blocks, S* out, HashType hash_type, cudaStream_t stream)
{
S* states;
// allocate memory for {blocks} states of {t} scalars each
if (cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream) != cudaSuccess) {
throw std::runtime_error("Failed memory allocation on the device");
}
// allocate memory for {blocks} states of {t} scalars each
if (cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream) != cudaSuccess) {
throw std::runtime_error("Failed memory allocation on the device");
}
// This is where the input matrix of size Arity x NumberOfBlocks is
// padded and coppied to device in a T x NumberOfBlocks matrix
cudaMemcpy2DAsync(states, this->t * sizeof(S), // Device pointer and device pitch
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
(this->t - 1) * sizeof(S), blocks, // Size of the source matrix (Arity x NumberOfBlocks)
cudaMemcpyHostToDevice, stream);
// This is where the input matrix of size Arity x NumberOfBlocks is
// padded and coppied to device in a T x NumberOfBlocks matrix
cudaMemcpy2DAsync(
states, this->t * sizeof(S), // Device pointer and device pitch
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
(this->t - 1) * sizeof(S), blocks, // Size of the source matrix (Arity x NumberOfBlocks)
cudaMemcpyHostToDevice, stream);
size_t rc_offset = 0;
size_t rc_offset = 0;
// The logic behind this is that 1 thread only works on 1 element
// We have {t} elements in each state, and {blocks} states total
int number_of_threads = (256 / this->t) * this->t;
int hashes_per_block = number_of_threads / this->t;
int total_number_of_threads = blocks * this->t;
int number_of_blocks = total_number_of_threads / number_of_threads +
static_cast<bool>(total_number_of_threads % number_of_threads);
// The logic behind this is that 1 thread only works on 1 element
// We have {t} elements in each state, and {blocks} states total
int number_of_threads = (256 / this->t) * this->t;
int hashes_per_block = number_of_threads / this->t;
int total_number_of_threads = blocks * this->t;
int number_of_blocks =
total_number_of_threads / number_of_threads + static_cast<bool>(total_number_of_threads % number_of_threads);
// The partial rounds operates on the whole state, so we define
// the parallelism params for processing a single hash preimage per thread
int singlehash_block_size = 128;
int number_of_singlehash_blocks = blocks / singlehash_block_size + static_cast<bool>(blocks % singlehash_block_size);
// The partial rounds operates on the whole state, so we define
// the parallelism params for processing a single hash preimage per thread
int singlehash_block_size = 128;
int number_of_singlehash_blocks = blocks / singlehash_block_size + static_cast<bool>(blocks % singlehash_block_size);
// Pick the domain_tag accordinaly
S domain_tag;
switch (hash_type) {
case HashType::ConstInputLen:
domain_tag = this->const_input_no_pad_domain_tag;
break;
// Pick the domain_tag accordinaly
S domain_tag;
switch (hash_type) {
case HashType::ConstInputLen:
domain_tag = this->const_input_no_pad_domain_tag;
break;
case HashType::MerkleTree:
domain_tag = this->tree_domain_tag;
}
case HashType::MerkleTree:
domain_tag = this->tree_domain_tag;
}
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
auto start_time = std::chrono::high_resolution_clock::now();
#endif
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
auto start_time = std::chrono::high_resolution_clock::now();
#endif
// Domain separation and adding pre-round constants
prepare_poseidon_states <<< number_of_blocks, number_of_threads, 0, stream >>> (states, blocks, domain_tag, this->config);
rc_offset += this->t;
// Domain separation and adding pre-round constants
prepare_poseidon_states<<<number_of_blocks, number_of_threads, 0, stream>>>(states, blocks, domain_tag, this->config);
rc_offset += this->t;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Domain separation: " << rc_offset << std::endl;
//print_buffer_from_cuda<S>(states, blocks * this->t);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Domain separation: " << rc_offset << std::endl;
// print_buffer_from_cuda<S>(states, blocks * this->t);
auto end_time = std::chrono::high_resolution_clock::now();
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
auto end_time = std::chrono::high_resolution_clock::now();
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// execute half full rounds
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, true, this->config);
rc_offset += this->t * this->config.full_rounds_half;
// execute half full rounds
full_rounds<<<number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block* this->t, stream>>>(
states, blocks, rc_offset, true, this->config);
rc_offset += this->t * this->config.full_rounds_half;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
// print_buffer_from_cuda<S>(states, blocks * this->t);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
// print_buffer_from_cuda<S>(states, blocks * this->t);
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// execute partial rounds
partial_rounds <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, rc_offset, this->config);
rc_offset += this->config.partial_rounds;
// execute partial rounds
partial_rounds<<<number_of_singlehash_blocks, singlehash_block_size, 0, stream>>>(
states, blocks, rc_offset, this->config);
rc_offset += this->config.partial_rounds;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
//print_buffer_from_cuda<S>(states, blocks * this->t);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
// print_buffer_from_cuda<S>(states, blocks * this->t);
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// execute half full rounds
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, false, this->config);
// execute half full rounds
full_rounds<<<number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block* this->t, stream>>>(
states, blocks, rc_offset, false, this->config);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
//print_buffer_from_cuda<S>(states, blocks * this->t);
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
// print_buffer_from_cuda<S>(states, blocks * this->t);
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
start_time = std::chrono::high_resolution_clock::now();
#endif
// get output
S * out_device;
cudaMalloc(&out_device, blocks * sizeof(S));
get_hash_results <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, out_device, this->config.t);
// get output
S* out_device;
cudaMalloc(&out_device, blocks * sizeof(S));
get_hash_results<<<number_of_singlehash_blocks, singlehash_block_size, 0, stream>>>(
states, blocks, out_device, this->config.t);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Get hash results" << std::endl;
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
#endif
cudaMemcpyAsync(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(out_device, stream);
cudaFreeAsync(states, stream);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaStreamSynchronize(stream);
std::cout << "Get hash results" << std::endl;
end_time = std::chrono::high_resolution_clock::now();
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
#endif
cudaMemcpyAsync(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(out_device, stream);
cudaFreeAsync(states, stream);
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaDeviceReset();
#endif
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
cudaDeviceReset();
#endif
}

View File

@@ -2,19 +2,20 @@
#include "constants.cuh"
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
#include <iostream>
#include <iomanip>
#include <string>
#include <sstream>
#include <chrono>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <string>
template <typename S>
__host__ void print_buffer_from_cuda(S * device_ptr, size_t size, size_t t) {
S * buffer = static_cast< S * >(malloc(size * sizeof(S)));
__host__ void print_buffer_from_cuda(S* device_ptr, size_t size, size_t t)
{
S* buffer = static_cast<S*>(malloc(size * sizeof(S)));
cudaMemcpy(buffer, device_ptr, size * sizeof(S), cudaMemcpyDeviceToHost);
std::cout << "Start print" << std::endl;
for(int i = 0; i < size / t; i++) {
for (int i = 0; i < size / t; i++) {
std::cout << "State #" << i << std::endl;
for (int j = 0; j < t; j++) {
std::cout << buffer[i * t + j] << std::endl;
@@ -28,136 +29,129 @@ __host__ void print_buffer_from_cuda(S * device_ptr, size_t size, size_t t) {
#ifdef DEBUG
template <typename S>
__device__ void print_scalar(S element, int data) {
printf("D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
data,
threadIdx.x,
element.limbs_storage.limbs[0],
element.limbs_storage.limbs[1],
element.limbs_storage.limbs[2],
element.limbs_storage.limbs[3],
element.limbs_storage.limbs[4],
element.limbs_storage.limbs[5],
element.limbs_storage.limbs[6],
element.limbs_storage.limbs[7]
);
__device__ void print_scalar(S element, int data)
{
printf(
"D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n", data, threadIdx.x, element.limbs_storage.limbs[0],
element.limbs_storage.limbs[1], element.limbs_storage.limbs[2], element.limbs_storage.limbs[3],
element.limbs_storage.limbs[4], element.limbs_storage.limbs[5], element.limbs_storage.limbs[6],
element.limbs_storage.limbs[7]);
}
#endif
template <typename S>
struct PoseidonConfiguration {
uint32_t partial_rounds, full_rounds_half, t;
S * round_constants, * mds_matrix, * non_sparse_matrix, *sparse_matrices;
uint32_t partial_rounds, full_rounds_half, t;
S *round_constants, *mds_matrix, *non_sparse_matrix, *sparse_matrices;
};
template <typename S>
class Poseidon {
public:
uint32_t t;
PoseidonConfiguration<S> config;
class Poseidon
{
public:
uint32_t t;
PoseidonConfiguration<S> config;
enum HashType {
ConstInputLen,
MerkleTree,
};
enum HashType {
ConstInputLen,
MerkleTree,
};
Poseidon(const uint32_t arity, cudaStream_t stream) {
t = arity + 1;
this->config.t = t;
this->stream = stream;
Poseidon(const uint32_t arity, cudaStream_t stream)
{
t = arity + 1;
this->config.t = t;
this->stream = stream;
// Pre-calculate domain tags
// Domain tags will vary for different applications of Poseidon
uint32_t tree_domain_tag_value = 1;
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
tree_domain_tag = S::from(tree_domain_tag_value);
// Pre-calculate domain tags
// Domain tags will vary for different applications of Poseidon
uint32_t tree_domain_tag_value = 1;
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
tree_domain_tag = S::from(tree_domain_tag_value);
const_input_no_pad_domain_tag = S::one();
const_input_no_pad_domain_tag = S::one();
// TO-DO: implement binary shifts for scalar type
// const_input_no_pad_domain_tag = S::one() << 64;
// const_input_no_pad_domain_tag *= S::from(arity);
// TO-DO: implement binary shifts for scalar type
// const_input_no_pad_domain_tag = S::one() << 64;
// const_input_no_pad_domain_tag *= S::from(arity);
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
uint32_t mds_matrix_len = t * t;
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
uint32_t mds_matrix_len = t * t;
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
// All the constants are stored in a single file
S * constants = load_constants<S>(arity);
// All the constants are stored in a single file
S* constants = load_constants<S>(arity);
S * mds_offset = constants + round_constants_len;
S * non_sparse_offset = mds_offset + mds_matrix_len;
S * sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
S* mds_offset = constants + round_constants_len;
S* non_sparse_offset = mds_offset + mds_matrix_len;
S* sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
#endif
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
#endif
// Create streams for copying constants
cudaStream_t stream_copy_round_constants, stream_copy_mds_matrix, stream_copy_non_sparse, stream_copy_sparse_matrices;
cudaStreamCreate(&stream_copy_round_constants);
cudaStreamCreate(&stream_copy_mds_matrix);
cudaStreamCreate(&stream_copy_non_sparse);
cudaStreamCreate(&stream_copy_sparse_matrices);
// Create events for copying constants
cudaEvent_t event_copied_round_constants, event_copy_mds_matrix, event_copy_non_sparse, event_copy_sparse_matrices;
cudaEventCreateWithFlags(&event_copied_round_constants, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_mds_matrix, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_non_sparse, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_sparse_matrices, cudaEventDisableTiming);
// Create streams for copying constants
cudaStream_t stream_copy_round_constants, stream_copy_mds_matrix, stream_copy_non_sparse,
stream_copy_sparse_matrices;
cudaStreamCreate(&stream_copy_round_constants);
cudaStreamCreate(&stream_copy_mds_matrix);
cudaStreamCreate(&stream_copy_non_sparse);
cudaStreamCreate(&stream_copy_sparse_matrices);
// Malloc memory for copying constants
cudaMallocAsync(&this->config.round_constants, sizeof(S) * round_constants_len, stream_copy_round_constants);
cudaMallocAsync(&this->config.mds_matrix, sizeof(S) * mds_matrix_len, stream_copy_mds_matrix);
cudaMallocAsync(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len, stream_copy_non_sparse);
cudaMallocAsync(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len, stream_copy_sparse_matrices);
// Create events for copying constants
cudaEvent_t event_copied_round_constants, event_copy_mds_matrix, event_copy_non_sparse, event_copy_sparse_matrices;
cudaEventCreateWithFlags(&event_copied_round_constants, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_mds_matrix, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_non_sparse, cudaEventDisableTiming);
cudaEventCreateWithFlags(&event_copy_sparse_matrices, cudaEventDisableTiming);
// Copy constants
cudaMemcpyAsync(this->config.round_constants, constants,
sizeof(S) * round_constants_len,
cudaMemcpyHostToDevice, stream_copy_round_constants
);
cudaMemcpyAsync(this->config.mds_matrix, mds_offset,
sizeof(S) * mds_matrix_len,
cudaMemcpyHostToDevice, stream_copy_mds_matrix
);
cudaMemcpyAsync(this->config.non_sparse_matrix, non_sparse_offset,
sizeof(S) * mds_matrix_len,
cudaMemcpyHostToDevice, stream_copy_non_sparse
);
cudaMemcpyAsync(this->config.sparse_matrices, sparse_matrices_offset,
sizeof(S) * sparse_matrices_len,
cudaMemcpyHostToDevice, stream_copy_sparse_matrices
);
// Malloc memory for copying constants
cudaMallocAsync(&this->config.round_constants, sizeof(S) * round_constants_len, stream_copy_round_constants);
cudaMallocAsync(&this->config.mds_matrix, sizeof(S) * mds_matrix_len, stream_copy_mds_matrix);
cudaMallocAsync(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len, stream_copy_non_sparse);
cudaMallocAsync(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len, stream_copy_sparse_matrices);
// Record finished copying event for streams
cudaEventRecord(event_copied_round_constants, stream_copy_round_constants);
cudaEventRecord(event_copy_mds_matrix, stream_copy_mds_matrix);
cudaEventRecord(event_copy_non_sparse, stream_copy_non_sparse);
cudaEventRecord(event_copy_sparse_matrices, stream_copy_sparse_matrices);
// Copy constants
cudaMemcpyAsync(
this->config.round_constants, constants, sizeof(S) * round_constants_len, cudaMemcpyHostToDevice,
stream_copy_round_constants);
cudaMemcpyAsync(
this->config.mds_matrix, mds_offset, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice, stream_copy_mds_matrix);
cudaMemcpyAsync(
this->config.non_sparse_matrix, non_sparse_offset, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice,
stream_copy_non_sparse);
cudaMemcpyAsync(
this->config.sparse_matrices, sparse_matrices_offset, sizeof(S) * sparse_matrices_len, cudaMemcpyHostToDevice,
stream_copy_sparse_matrices);
// Main stream waits for copying to finish
cudaStreamWaitEvent(stream, event_copied_round_constants);
cudaStreamWaitEvent(stream, event_copy_mds_matrix);
cudaStreamWaitEvent(stream, event_copy_non_sparse);
cudaStreamWaitEvent(stream, event_copy_sparse_matrices);
}
// Record finished copying event for streams
cudaEventRecord(event_copied_round_constants, stream_copy_round_constants);
cudaEventRecord(event_copy_mds_matrix, stream_copy_mds_matrix);
cudaEventRecord(event_copy_non_sparse, stream_copy_non_sparse);
cudaEventRecord(event_copy_sparse_matrices, stream_copy_sparse_matrices);
~Poseidon() {
cudaFreeAsync(this->config.round_constants, this->stream);
cudaFreeAsync(this->config.mds_matrix, this->stream);
cudaFreeAsync(this->config.non_sparse_matrix, this->stream);
cudaFreeAsync(this->config.sparse_matrices, this->stream);
}
// Main stream waits for copying to finish
cudaStreamWaitEvent(stream, event_copied_round_constants);
cudaStreamWaitEvent(stream, event_copy_mds_matrix);
cudaStreamWaitEvent(stream, event_copy_non_sparse);
cudaStreamWaitEvent(stream, event_copy_sparse_matrices);
}
// Hash multiple preimages in parallel
void hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream);
~Poseidon()
{
cudaFreeAsync(this->config.round_constants, this->stream);
cudaFreeAsync(this->config.mds_matrix, this->stream);
cudaFreeAsync(this->config.non_sparse_matrix, this->stream);
cudaFreeAsync(this->config.sparse_matrices, this->stream);
}
private:
S tree_domain_tag, const_input_no_pad_domain_tag;
cudaStream_t stream;
// Hash multiple preimages in parallel
void hash_blocks(const S* inp, size_t blocks, S* out, HashType hash_type, cudaStream_t stream);
private:
S tree_domain_tag, const_input_no_pad_domain_tag;
cudaStream_t stream;
};

File diff suppressed because it is too large Load Diff

View File

@@ -1,115 +1,135 @@
#ifndef VEC_MULT
#define VEC_MULT
#pragma once
#include <stdexcept>
#include <cuda.h>
#include <stdexcept>
#define MAX_THREADS_PER_BLOCK 256
/**
* Multiply the elements of an input array by a scalar in-place.
* @param arr input array.
* @param n size of arr.
* @param n_inv scalar of type S (scalar).
*/
template <typename E, typename S>
__global__ void template_normalize_kernel(E* arr, uint32_t n, S scalar)
{
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
if (tid < n) { arr[tid] = scalar * arr[tid]; }
}
// TODO: headers for prototypes and .c .cpp .cu files for implementations
template <typename E, typename S>
__global__ void vectorModMult(S *scalar_vec, E *element_vec, E *result, size_t n_elments)
__global__ void vectorModMult(S* scalar_vec, E* element_vec, E* result, size_t n_elments)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_elments)
{
result[tid] = scalar_vec[tid] * element_vec[tid];
}
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_elments) { result[tid] = scalar_vec[tid] * element_vec[tid]; }
}
template <typename E, typename S>
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments, cudaStream_t stream) // TODO: in place so no need for third result vector
int vector_mod_mult(S* vec_a, E* vec_b, E* result, size_t n_elments, cudaStream_t stream) // TODO: in place so no need
// for third result vector
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Allocate memory on the device for the input vectors, the output vector, and the modulus
S *d_vec_a;
E *d_vec_b, *d_result;
cudaMallocAsync(&d_vec_a, n_elments * sizeof(S), stream);
cudaMallocAsync(&d_vec_b, n_elments * sizeof(E), stream);
cudaMallocAsync(&d_result, n_elments * sizeof(E), stream);
// Allocate memory on the device for the input vectors, the output vector, and the modulus
S* d_vec_a;
E *d_vec_b, *d_result;
cudaMallocAsync(&d_vec_a, n_elments * sizeof(S), stream);
cudaMallocAsync(&d_vec_b, n_elments * sizeof(E), stream);
cudaMallocAsync(&d_result, n_elments * sizeof(E), stream);
// Copy the input vectors and the modulus from the host to the device
cudaMemcpyAsync(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice, stream);
// Copy the input vectors and the modulus from the host to the device
cudaMemcpyAsync(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice, stream);
// Call the kernel to perform element-wise modular multiplication
vectorModMult<<<num_blocks, threads_per_block, 0, stream>>>(d_vec_a, d_vec_b, d_result, n_elments);
// Call the kernel to perform element-wise modular multiplication
vectorModMult<<<num_blocks, threads_per_block, 0, stream>>>(d_vec_a, d_vec_b, d_result, n_elments);
cudaMemcpyAsync(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost, stream);
cudaMemcpyAsync(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(d_vec_a, stream);
cudaFreeAsync(d_vec_b, stream);
cudaFreeAsync(d_result, stream);
cudaFreeAsync(d_vec_a, stream);
cudaFreeAsync(d_vec_b, stream);
cudaFreeAsync(d_result, stream);
cudaStreamSynchronize(stream);
return 0;
cudaStreamSynchronize(stream);
return 0;
}
template <typename E, typename S>
__global__ void batchVectorMult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size)
int vector_mod_mult_device(
S* d_vec_a, E* d_vec_b, E* d_result, size_t n_elments) // TODO: in place so no need for third result vector
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_scalars * batch_size)
{
int scalar_id = tid % n_scalars;
element_vec[tid] = scalar_vec[scalar_id] * element_vec[tid];
}
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Call the kernel to perform element-wise modular multiplication
vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);
return 0;
}
template <typename E, typename S>
int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size, cudaStream_t stream)
__global__ void batchVectorMult(S* scalar_vec, E* element_vec, unsigned n_scalars, unsigned batch_size)
{
// Set the grid and block dimensions
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(scalar_vec, element_vec, n_scalars, batch_size);
return 0;
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < n_scalars * batch_size) {
int scalar_id = tid % n_scalars;
element_vec[tid] = scalar_vec[scalar_id] * element_vec[tid];
}
}
template <typename E, typename S>
int batch_vector_mult(S* scalar_vec, E* element_vec, unsigned n_scalars, unsigned batch_size, cudaStream_t stream)
{
// Set the grid and block dimensions
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(scalar_vec, element_vec, n_scalars, batch_size);
return 0;
}
template <typename E>
__global__ void matrixVectorMult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
__global__ void matrixVectorMult(E* matrix_elements, E* vector_elements, E* result, size_t dim)
{
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < dim)
{
result[tid] = E::zero();
for (int i = 0; i < dim; i++)
result[tid] = result[tid] + matrix_elements[tid * dim + i] * vector_elements[i];
}
int tid = blockDim.x * blockIdx.x + threadIdx.x;
if (tid < dim) {
result[tid] = E::zero();
for (int i = 0; i < dim; i++)
result[tid] = result[tid] + matrix_elements[tid * dim + i] * vector_elements[i];
}
}
template <typename E>
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim, cudaStream_t stream)
int matrix_mod_mult(E* matrix_elements, E* vector_elements, E* result, size_t dim, cudaStream_t stream)
{
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Set the grid and block dimensions
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
int threads_per_block = MAX_THREADS_PER_BLOCK;
// Allocate memory on the device for the input vectors, the output vector, and the modulus
E *d_matrix, *d_vector, *d_result;
cudaMallocAsync(&d_matrix, (dim * dim) * sizeof(E), stream);
cudaMallocAsync(&d_vector, dim * sizeof(E), stream);
cudaMallocAsync(&d_result, dim * sizeof(E), stream);
// Allocate memory on the device for the input vectors, the output vector, and the modulus
E *d_matrix, *d_vector, *d_result;
cudaMallocAsync(&d_matrix, (dim * dim) * sizeof(E), stream);
cudaMallocAsync(&d_vector, dim * sizeof(E), stream);
cudaMallocAsync(&d_result, dim * sizeof(E), stream);
// Copy the input vectors and the modulus from the host to the device
cudaMemcpyAsync(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice, stream);
// Copy the input vectors and the modulus from the host to the device
cudaMemcpyAsync(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice, stream);
cudaMemcpyAsync(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice, stream);
// Call the kernel to perform element-wise modular multiplication
matrixVectorMult<<<num_blocks, threads_per_block, 0, stream>>>(d_matrix, d_vector, d_result, dim);
// Call the kernel to perform element-wise modular multiplication
matrixVectorMult<<<num_blocks, threads_per_block, 0, stream>>>(d_matrix, d_vector, d_result, dim);
cudaMemcpyAsync(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost, stream);
cudaMemcpyAsync(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost, stream);
cudaFreeAsync(d_matrix, stream);
cudaFreeAsync(d_vector, stream);
cudaFreeAsync(d_result, stream);
cudaFreeAsync(d_matrix, stream);
cudaFreeAsync(d_vector, stream);
cudaFreeAsync(d_result, stream);
cudaStreamSynchronize(stream);
return 0;
cudaStreamSynchronize(stream);
return 0;
}
#endif

View File

@@ -2,21 +2,24 @@
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#if defined(G2_DEFINED)
#include "../../primitives/extension_field.cuh"
#endif
#include "params.cuh"
namespace BLS12_377 {
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_377::weierstrass_b };
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_re },
point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_im }};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
}
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
typedef scalar_field_t scalar_t;
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
static constexpr point_field_t b = point_field_t{PARAMS_BLS12_377::weierstrass_b};
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
typedef Affine<point_field_t> affine_t;
#if defined(G2_DEFINED)
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
point_field_t{PARAMS_BLS12_377::weierstrass_b_g2_re}, point_field_t{PARAMS_BLS12_377::weierstrass_b_g2_im}};
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
typedef Affine<g2_point_field_t> g2_affine_t;
#endif
} // namespace BLS12_377

View File

@@ -1,327 +1,610 @@
#ifndef _BLS12_377_LDE
#define _BLS12_377_LDE
#include <cuda.h>
#include "../../appUtils/ntt/lde.cu"
#include "../../appUtils/ntt/ntt.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "../../utils/mont.cuh"
#include "curve_config.cuh"
#include <cuda.h>
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(
uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn), stream);
} else {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn), stream);
}
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return nullptr;
try {
cudaStreamCreate(&stream);
if (inverse) {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn), stream);
} else {
return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn), stream);
}
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return nullptr;
}
}
extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int ntt_cuda_bls12_377(
BLS12_377::scalar_t* arr,
uint32_t n,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_377::scalar_t, BLS12_377::scalar_t>(
arr, n, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int ecntt_cuda_bls12_377(
BLS12_377::projective_t* arr,
uint32_t n,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return ntt_end2end_template<BLS12_377::projective_t, BLS12_377::scalar_t>(
arr, n, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int ntt_batch_cuda_bls12_377(
BLS12_377::scalar_t* arr,
uint32_t arr_size,
uint32_t batch_size,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_377::scalar_t, BLS12_377::scalar_t>(
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int ecntt_batch_cuda_bls12_377(
BLS12_377::projective_t* arr,
uint32_t arr_size,
uint32_t batch_size,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return ntt_end2end_batch_template<BLS12_377::projective_t, BLS12_377::scalar_t>(
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
extern "C" int interpolate_scalars_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_evaluations,
BLS12_377::scalar_t* d_domain,
unsigned n,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_evaluations, BLS12_377::scalar_t* d_domain, unsigned n,
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int interpolate_scalars_batch_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_evaluations,
BLS12_377::scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int interpolate_scalars_on_coset_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_evaluations,
BLS12_377::scalar_t* d_domain,
unsigned n,
BLS12_377::scalar_t* coset_powers,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try
{
return interpolate(d_out, d_evaluations, d_domain, n, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
return interpolate(d_out, d_evaluations, d_domain, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_evaluations, BLS12_377::scalar_t* d_domain,
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int interpolate_scalars_batch_on_coset_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_evaluations,
BLS12_377::scalar_t* d_domain,
unsigned n,
unsigned batch_size,
BLS12_377::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
extern "C" int interpolate_points_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::projective_t* d_evaluations,
BLS12_377::scalar_t* d_domain,
unsigned n,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int interpolate_points_batch_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::projective_t* d_evaluations,
BLS12_377::scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain,
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_scalars_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_scalars_batch_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_points_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::projective_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_points_batch_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::projective_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
BLS12_377::scalar_t* _null = nullptr;
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BLS12_377::scalar_t* coset_powers,
unsigned device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BLS12_377::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_points_on_coset_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::projective_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
BLS12_377::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::projective_t* d_coefficients,
BLS12_377::scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
BLS12_377::scalar_t* coset_powers,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int ntt_inplace_batch_cuda_bls12_377(
BLS12_377::scalar_t* d_inout,
BLS12_377::scalar_t* d_twiddles,
unsigned n,
unsigned batch_size,
bool inverse,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
BLS12_377::scalar_t* _null = nullptr;
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, false, _null, stream, true);
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int ntt_inplace_coset_batch_cuda_bls12_377(
BLS12_377::scalar_t* d_inout,
BLS12_377::scalar_t* d_twiddles,
unsigned n,
unsigned batch_size,
bool inverse,
bool is_coset,
BLS12_377::scalar_t* coset,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, is_coset, coset, stream, true);
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#endif
extern "C" int sub_scalars_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_in1,
BLS12_377::scalar_t* d_in2,
unsigned n,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return sub_polys(d_out, d_in1, d_in2, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int add_scalars_cuda_bls12_377(
BLS12_377::scalar_t* d_out,
BLS12_377::scalar_t* d_in1,
BLS12_377::scalar_t* d_in2,
unsigned n,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return add_polys(d_out, d_in1, d_in2, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int to_montgomery_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery(d_inout, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int from_montgomery_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery(d_inout, n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
to_montgomery_proj_points_cuda_bls12_377(BLS12_377::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BLS12_377::point_field_t*)d_inout, 3 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
from_montgomery_proj_points_cuda_bls12_377(BLS12_377::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BLS12_377::point_field_t*)d_inout, 3 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
to_montgomery_aff_points_cuda_bls12_377(BLS12_377::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BLS12_377::point_field_t*)d_inout, 2 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
from_montgomery_aff_points_cuda_bls12_377(BLS12_377::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BLS12_377::point_field_t*)d_inout, 2 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#if defined(G2_DEFINED)
extern "C" int
to_montgomery_proj_points_g2_cuda_bls12_377(BLS12_377::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BLS12_377::point_field_t*)d_inout, 6 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
from_montgomery_proj_points_g2_cuda_bls12_377(BLS12_377::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BLS12_377::point_field_t*)d_inout, 6 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
to_montgomery_aff_points_g2_cuda_bls12_377(BLS12_377::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return to_montgomery((BLS12_377::point_field_t*)d_inout, 4 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
from_montgomery_aff_points_g2_cuda_bls12_377(BLS12_377::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
return from_montgomery((BLS12_377::point_field_t*)d_inout, 4 * n, stream);
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#endif
extern "C" int
reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
cudaStreamSynchronize(stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_scalars_batch_cuda_bls12_377(
BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int
reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order(arr, n, logn, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int reverse_order_points_batch_cuda_bls12_377(
BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try {
uint32_t logn = uint32_t(log(n) / log(2));
cudaStreamCreate(&stream);
reverse_order_batch(arr, n, logn, batch_size, stream);
return 0;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#endif

View File

@@ -1,40 +1,47 @@
#ifndef _BLS12_377_MSM
#define _BLS12_377_MSM
#include "../../appUtils/msm/msm.cu"
#include <stdexcept>
#include <cuda.h>
#include "curve_config.cuh"
#include <cuda.h>
#include <stdexcept>
extern "C"
int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int msm_cuda_bls12_377(
BLS12_377::projective_t* out,
BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[],
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
try {
cudaStreamCreate(&stream);
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(
scalars, points, count, out, false, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
extern "C" int msm_batch_cuda_bls12_377(
BLS12_377::projective_t* out,
BLS12_377::affine_t points[],
BLS12_377::scalar_t scalars[],
size_t batch_size,
size_t msm_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try
{
try {
cudaStreamCreate(&stream);
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(
scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
@@ -42,52 +49,168 @@ extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377:
/**
* Commit to a polynomial using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points Points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C"
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C"
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
{
try
{
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return 0;
}
catch (const std::runtime_error &ex)
{
printf("error %s", ex.what());
return -1;
}
}
extern "C" int commit_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::scalar_t* d_scalars,
BLS12_377::affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C" int commit_batch_cuda_bls12_377(
BLS12_377::projective_t* d_out,
BLS12_377::scalar_t* d_scalars,
BLS12_377::affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#if defined(G2_DEFINED)
extern "C" int msm_g2_cuda_bls12_377(
BLS12_377::g2_projective_t* out,
BLS12_377::g2_affine_t points[],
BLS12_377::scalar_t scalars[],
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
large_msm<BLS12_377::scalar_t, BLS12_377::g2_projective_t, BLS12_377::g2_affine_t>(
scalars, points, count, out, false, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int msm_batch_g2_cuda_bls12_377(
BLS12_377::g2_projective_t* out,
BLS12_377::g2_affine_t points[],
BLS12_377::scalar_t scalars[],
size_t batch_size,
size_t msm_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
try {
cudaStreamCreate(&stream);
batched_large_msm<BLS12_377::scalar_t, BLS12_377::g2_projective_t, BLS12_377::g2_affine_t>(
scalars, points, batch_size, msm_size, out, false, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a polynomial using the MSM in G2 group.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut G2 point to write the result to.
* @param d_scalars Scalars for the MSM. Must be on device.
* @param d_points G2 affine points for the MSM. Must be on device.
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
*/
extern "C" int commit_g2_cuda_bls12_377(
BLS12_377::g2_projective_t* d_out,
BLS12_377::scalar_t* d_scalars,
BLS12_377::g2_affine_t* d_points,
size_t count,
unsigned large_bucket_factor,
size_t device_id = 0,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
cudaStreamCreate(&stream);
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
/**
* Commit to a batch of polynomials using the MSM.
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
* points.
* @param d_out Ouptut G2 point to write the results to.
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for
* each MSM.
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
* @param batch_size Size of the batch.
*/
extern "C" int commit_batch_g2_cuda_bls12_377(
BLS12_377::g2_projective_t* d_out,
BLS12_377::scalar_t* d_scalars,
BLS12_377::g2_affine_t* d_points,
size_t count,
size_t batch_size,
size_t device_id = 0,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
cudaStreamCreate(&stream);
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
cudaStreamSynchronize(stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#endif
#endif

View File

@@ -1,180 +1,329 @@
#pragma once
#include "../../utils/storage.cuh"
namespace PARAMS_BLS12_377{
struct fp_config{
namespace PARAMS_BLS12_377 {
struct fp_config {
static constexpr unsigned limbs_count = 8;
static constexpr unsigned omegas_count = 32;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_squared = {
0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3,
0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7,
0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f,
0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
static constexpr unsigned modulus_bit_count = 253;
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b,
0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f,
0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95,
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega4= {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega5= {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291, 0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
static constexpr storage<limbs_count> omega6= {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34, 0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
static constexpr storage<limbs_count> omega7= {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1, 0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
static constexpr storage<limbs_count> omega8= {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111, 0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
static constexpr storage<limbs_count> omega9= {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244, 0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
static constexpr storage<limbs_count> omega10= {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4, 0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
static constexpr storage<limbs_count> omega11= {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645, 0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
static constexpr storage<limbs_count> omega12= {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb, 0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
static constexpr storage<limbs_count> omega13= {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc, 0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
static constexpr storage<limbs_count> omega14= {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f, 0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
static constexpr storage<limbs_count> omega15= {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557, 0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
static constexpr storage<limbs_count> omega16= {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112, 0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
static constexpr storage<limbs_count> omega1 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega2 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega3 = {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe,
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega4 = {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765,
0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega5 = {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291,
0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
static constexpr storage<limbs_count> omega6 = {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34,
0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
static constexpr storage<limbs_count> omega7 = {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1,
0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
static constexpr storage<limbs_count> omega8 = {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111,
0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
static constexpr storage<limbs_count> omega9 = {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244,
0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
static constexpr storage<limbs_count> omega10 = {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4,
0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
static constexpr storage<limbs_count> omega11 = {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645,
0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
static constexpr storage<limbs_count> omega12 = {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb,
0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
static constexpr storage<limbs_count> omega13 = {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc,
0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
static constexpr storage<limbs_count> omega14 = {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f,
0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
static constexpr storage<limbs_count> omega15 = {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557,
0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
static constexpr storage<limbs_count> omega16 = {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112,
0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
static constexpr storage<limbs_count> omega17 = {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4,
0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
static constexpr storage<limbs_count> omega18 = {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe,
0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
static constexpr storage<limbs_count> omega19 = {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee,
0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
static constexpr storage<limbs_count> omega20 = {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed,
0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
static constexpr storage<limbs_count> omega21 = {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba,
0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
static constexpr storage<limbs_count> omega22 = {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2,
0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
static constexpr storage<limbs_count> omega23 = {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da,
0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
static constexpr storage<limbs_count> omega24 = {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3,
0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
static constexpr storage<limbs_count> omega25 = {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804,
0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
static constexpr storage<limbs_count> omega26 = {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374,
0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
static constexpr storage<limbs_count> omega27 = {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267,
0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
static constexpr storage<limbs_count> omega28 = {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae,
0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
static constexpr storage<limbs_count> omega29 = {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d,
0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
static constexpr storage<limbs_count> omega30 = {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e,
0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
static constexpr storage<limbs_count> omega31 = {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c,
0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
static constexpr storage<limbs_count> omega32 = {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc,
0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
static constexpr storage_array<omegas_count, limbs_count> omega = {
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8, omega9, omega10, omega11,
omega12, omega13, omega14, omega15, omega16, omega17, omega18, omega19, omega20, omega21, omega22,
omega23, omega24, omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
};
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv4= {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv5= {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63, 0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
static constexpr storage<limbs_count> omega_inv6= {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f, 0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
static constexpr storage<limbs_count> omega_inv7= {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6, 0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
static constexpr storage<limbs_count> omega_inv8= {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd, 0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
static constexpr storage<limbs_count> omega_inv9= {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b, 0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
static constexpr storage<limbs_count> omega_inv10= {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a, 0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
static constexpr storage<limbs_count> omega_inv11= {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69, 0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
static constexpr storage<limbs_count> omega_inv12= {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c, 0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
static constexpr storage<limbs_count> omega_inv13= {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3, 0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
static constexpr storage<limbs_count> omega_inv14= {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2, 0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
static constexpr storage<limbs_count> omega_inv15= {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3, 0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
static constexpr storage<limbs_count> omega_inv16= {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016, 0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
static constexpr storage<limbs_count> omega_inv17= {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571, 0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
static constexpr storage<limbs_count> omega_inv18= {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894, 0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
static constexpr storage<limbs_count> omega_inv19= {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c, 0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
static constexpr storage<limbs_count> omega_inv20= {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec, 0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
static constexpr storage<limbs_count> omega_inv21= {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc, 0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
static constexpr storage<limbs_count> omega_inv22= {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024, 0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
static constexpr storage<limbs_count> omega_inv23= {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7, 0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
static constexpr storage<limbs_count> omega_inv24= {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db, 0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
static constexpr storage<limbs_count> omega_inv25= {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3, 0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
static constexpr storage<limbs_count> omega_inv26= {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e, 0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
static constexpr storage<limbs_count> omega_inv27= {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1, 0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
static constexpr storage<limbs_count> omega_inv28= {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697, 0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
static constexpr storage<limbs_count> omega_inv29= {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf, 0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
static constexpr storage<limbs_count> omega_inv1 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv2 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> omega_inv3 = {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe,
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv4 = {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99,
0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
static constexpr storage<limbs_count> omega_inv5 = {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63,
0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
static constexpr storage<limbs_count> omega_inv6 = {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f,
0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
static constexpr storage<limbs_count> omega_inv7 = {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6,
0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
static constexpr storage<limbs_count> omega_inv8 = {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd,
0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
static constexpr storage<limbs_count> omega_inv9 = {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b,
0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
static constexpr storage<limbs_count> omega_inv10 = {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a,
0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
static constexpr storage<limbs_count> omega_inv11 = {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69,
0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
static constexpr storage<limbs_count> omega_inv12 = {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c,
0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
static constexpr storage<limbs_count> omega_inv13 = {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3,
0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
static constexpr storage<limbs_count> omega_inv14 = {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2,
0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
static constexpr storage<limbs_count> omega_inv15 = {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3,
0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
static constexpr storage<limbs_count> omega_inv16 = {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016,
0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
static constexpr storage<limbs_count> omega_inv17 = {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571,
0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
static constexpr storage<limbs_count> omega_inv18 = {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894,
0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
static constexpr storage<limbs_count> omega_inv19 = {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c,
0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
static constexpr storage<limbs_count> omega_inv20 = {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec,
0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
static constexpr storage<limbs_count> omega_inv21 = {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc,
0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
static constexpr storage<limbs_count> omega_inv22 = {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024,
0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
static constexpr storage<limbs_count> omega_inv23 = {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7,
0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
static constexpr storage<limbs_count> omega_inv24 = {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db,
0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
static constexpr storage<limbs_count> omega_inv25 = {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3,
0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
static constexpr storage<limbs_count> omega_inv26 = {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e,
0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
static constexpr storage<limbs_count> omega_inv27 = {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1,
0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
static constexpr storage<limbs_count> omega_inv28 = {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697,
0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
static constexpr storage<limbs_count> omega_inv29 = {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf,
0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
static constexpr storage<limbs_count> omega_inv30 = {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f,
0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
static constexpr storage<limbs_count> omega_inv31 = {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8,
0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
static constexpr storage<limbs_count> omega_inv32 = {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314,
0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
};
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
static constexpr storage<limbs_count> inv4= {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
static constexpr storage<limbs_count> inv5= {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
static constexpr storage<limbs_count> inv6= {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
static constexpr storage<limbs_count> inv7= {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
static constexpr storage<limbs_count> inv8= {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
static constexpr storage<limbs_count> inv9= {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
static constexpr storage<limbs_count> inv10= {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
static constexpr storage<limbs_count> inv11= {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
static constexpr storage<limbs_count> inv12= {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
static constexpr storage<limbs_count> inv13= {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
static constexpr storage<limbs_count> inv14= {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
static constexpr storage<limbs_count> inv15= {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
static constexpr storage<limbs_count> inv16= {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
static constexpr storage<limbs_count> inv17= {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
static constexpr storage<limbs_count> inv18= {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
static constexpr storage<limbs_count> inv19= {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
static constexpr storage<limbs_count> inv20= {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
static constexpr storage<limbs_count> inv21= {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
static constexpr storage<limbs_count> inv22= {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
static constexpr storage<limbs_count> inv23= {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
static constexpr storage<limbs_count> inv24= {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
static constexpr storage<limbs_count> inv25= {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
static constexpr storage<limbs_count> inv26= {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
static constexpr storage<limbs_count> inv27= {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
static constexpr storage<limbs_count> inv28= {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
static constexpr storage<limbs_count> inv29= {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
static constexpr storage<limbs_count> inv1 = {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f,
0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
static constexpr storage<limbs_count> inv2 = {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f,
0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
static constexpr storage<limbs_count> inv3 = {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e,
0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
static constexpr storage<limbs_count> inv4 = {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e,
0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
static constexpr storage<limbs_count> inv5 = {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346,
0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
static constexpr storage<limbs_count> inv6 = {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22,
0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
static constexpr storage<limbs_count> inv7 = {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210,
0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
static constexpr storage<limbs_count> inv8 = {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87,
0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
static constexpr storage<limbs_count> inv9 = {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3,
0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
static constexpr storage<limbs_count> inv10 = {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61,
0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
static constexpr storage<limbs_count> inv11 = {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af,
0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
static constexpr storage<limbs_count> inv12 = {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57,
0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
static constexpr storage<limbs_count> inv13 = {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab,
0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
static constexpr storage<limbs_count> inv14 = {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054,
0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
static constexpr storage<limbs_count> inv15 = {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9,
0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
static constexpr storage<limbs_count> inv16 = {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54,
0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
static constexpr storage<limbs_count> inv17 = {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29,
0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
static constexpr storage<limbs_count> inv18 = {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094,
0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
static constexpr storage<limbs_count> inv19 = {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9,
0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
static constexpr storage<limbs_count> inv20 = {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164,
0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
static constexpr storage<limbs_count> inv21 = {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31,
0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
static constexpr storage<limbs_count> inv22 = {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198,
0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
static constexpr storage<limbs_count> inv23 = {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b,
0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
static constexpr storage<limbs_count> inv24 = {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5,
0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
static constexpr storage<limbs_count> inv25 = {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651,
0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
static constexpr storage<limbs_count> inv26 = {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8,
0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
static constexpr storage<limbs_count> inv27 = {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3,
0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
static constexpr storage<limbs_count> inv28 = {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9,
0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
static constexpr storage<limbs_count> inv29 = {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4,
0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
static constexpr storage<limbs_count> inv30 = {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9,
0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
static constexpr storage<limbs_count> inv31 = {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc,
0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
static constexpr storage<limbs_count> inv32 = {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd,
0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
static constexpr storage_array<omegas_count, limbs_count> inv = {
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
};
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8, inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24, inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
};
};
struct fq_config{
struct fq_config {
static constexpr unsigned limbs_count = 12;
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44,
0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88,
0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6,
0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510,
0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc,
0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
static constexpr storage<2 * limbs_count> modulus_wide = {
0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<2 * limbs_count> modulus_squared = {
0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2,
0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af,
0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4,
0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f,
0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48,
0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be,
0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
static constexpr unsigned modulus_bit_count = 377;
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488,
0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<limbs_count> montgomery_r = {0xffffff, 0xf73fffff, 0xffffff7a, 0xf4a2bbcf,
0xf6b7ffe8, 0x0c9dd045, 0x0aec70e1, 0xdd260cff,
0x5eb6c4e5, 0xc4fa3f93, 0x3aef1539, 0x51c5b9e8};
static constexpr storage<limbs_count> montgomery_r_inv = {0x934f3a1, 0xb0909a28, 0xc1cfac62, 0x3264aa55,
0x2a491ae8, 0xaccd49ca, 0xe80e9a61, 0x28b2dce9,
0x26f7c08a, 0x4d313ea1, 0x36254563, 0x161de1ee};
// i^2, the square of the imaginary unit for the extension field
static constexpr uint32_t i_squared = 1;
static constexpr uint32_t i_squared = 5;
// true if i^2 is negative
static constexpr bool i_squared_is_negative = true;
// G1 and G2 generators
static constexpr storage<limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
static constexpr storage<limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
static constexpr storage<limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52, 0x57db6b9b, 0x7ea501f5,
0x203e5031, 0xc565f071, 0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
static constexpr storage<limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984, 0x0799c9de, 0xe7223ece,
0x6651cecb, 0x532777ee, 0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
static constexpr storage<limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888, 0xf832d204, 0xe458c282,
0x74b49a58, 0xde03ed72, 0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
static constexpr storage<limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f, 0xcee304c2, 0x2463b01a,
0x3d591bf1, 0x61ef11ac, 0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
// G1 and G2 generators
static constexpr storage<limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512,
0xbd37cb5c, 0x188282c8, 0xaa9d41bb, 0x85951e2c,
0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
static constexpr storage<limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36,
0x4fb82305, 0x6d182ad4, 0xca3e52d9, 0xbd7fb348,
0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
static constexpr storage<limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52,
0x57db6b9b, 0x7ea501f5, 0x203e5031, 0xc565f071,
0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
static constexpr storage<limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984,
0x0799c9de, 0xe7223ece, 0x6651cecb, 0x532777ee,
0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
static constexpr storage<limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888,
0xf832d204, 0xe458c282, 0x74b49a58, 0xde03ed72,
0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
static constexpr storage<limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f,
0xcee304c2, 0x2463b01a, 0x3d591bf1, 0x61ef11ac,
0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
};
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
}
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
} // namespace PARAMS_BLS12_377

View File

@@ -1,22 +1,111 @@
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include <cuda.h>
#include "curve_config.cuh"
extern "C" int random_projective_bls12_377(BLS12_377::projective_t* out) {
try {
out[0] = BLS12_377::projective_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
#include "../../primitives/projective.cuh"
extern "C" BLS12_377::projective_t projective_zero_bls12_377() { return BLS12_377::projective_t::zero(); }
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
extern "C" bool projective_is_on_curve_bls12_377(BLS12_377::projective_t* point1)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
return BLS12_377::projective_t::is_on_curve(*point1);
}
extern "C" int projective_to_affine_bls12_377(BLS12_377::affine_t* out, BLS12_377::projective_t* point1)
{
try {
out[0] = BLS12_377::projective_t::to_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int projective_from_affine_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t* point1)
{
try {
out[0] = BLS12_377::projective_t::from_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int random_scalar_bls12_377(BLS12_377::scalar_field_t* out) {
try {
out[0] = BLS12_377::scalar_field_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" bool eq_bls12_377(BLS12_377::projective_t* point1, BLS12_377::projective_t* point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) &&
(point1->z == BLS12_377::point_field_t::zero())) &&
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) &&
(point2->z == BLS12_377::point_field_t::zero()));
}
#if defined(G2_DEFINED)
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t* point1, BLS12_377::g2_projective_t* point2)
{
return (*point1 == *point2) &&
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
return (*point1 == *point2) &&
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) &&
(point1->z == BLS12_377::g2_point_field_t::zero())) &&
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) &&
(point2->z == BLS12_377::g2_point_field_t::zero()));
}
extern "C" int random_g2_projective_bls12_377(BLS12_377::g2_projective_t* out)
{
try {
out[0] = BLS12_377::g2_projective_t::rand_host();
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int g2_projective_to_affine_bls12_377(BLS12_377::g2_affine_t* out, BLS12_377::g2_projective_t* point1)
{
try {
out[0] = BLS12_377::g2_projective_t::to_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" int g2_projective_from_affine_bls12_377(BLS12_377::g2_projective_t* out, BLS12_377::g2_affine_t* point1)
{
try {
out[0] = BLS12_377::g2_projective_t::from_affine(*point1);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what());
return -1;
}
}
extern "C" bool g2_projective_is_on_curve_bls12_377(BLS12_377::g2_projective_t* point1)
{
return BLS12_377::g2_projective_t::is_on_curve(*point1);
}
#endif

View File

@@ -1,5 +1,4 @@
#include "projective.cu"
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"
#include "projective.cu"
#include "ve_mod_mult.cu"

View File

@@ -1,69 +1,80 @@
#ifndef _BLS12_377_VEC_MULT
#define _BLS12_377_VEC_MULT
#include <stdio.h>
#include <iostream>
#include "../../primitives/field.cuh"
#include "../../utils/storage.cuh"
#include "../../primitives/projective.cuh"
#include "curve_config.cuh"
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
#include "../../primitives/field.cuh"
#include "../../primitives/projective.cuh"
#include "../../utils/storage.cuh"
#include "curve_config.cuh"
#include <iostream>
#include <stdio.h>
extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
BLS12_377::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
extern "C" int32_t vec_mod_mult_point_bls12_377(
BLS12_377::projective_t* inout,
BLS12_377::scalar_t* scalar_vec,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
BLS12_377::scalar_t *scalar_vec,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
extern "C" int32_t vec_mod_mult_scalar_bls12_377(
BLS12_377::scalar_t* inout,
BLS12_377::scalar_t* scalar_vec,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
{
try
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_flattened,
BLS12_377::scalar_t *input,
BLS12_377::scalar_t *output,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
extern "C" int32_t vec_mod_mult_device_scalar_bls12_377(
BLS12_377::scalar_t* inout, BLS12_377::scalar_t* scalar_vec, size_t n_elements, size_t device_id)
{
try
{
// TODO: device_id
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments, stream);
try {
vector_mod_mult_device<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elements);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
{
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif
extern "C" int32_t matrix_vec_mod_mult_bls12_377(
BLS12_377::scalar_t* matrix_flattened,
BLS12_377::scalar_t* input,
BLS12_377::scalar_t* output,
size_t n_elments,
size_t device_id,
cudaStream_t stream = 0)
{
// TODO: use device_id when working with multiple devices
(void)device_id;
try {
// TODO: device_id
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments, stream);
return CUDA_SUCCESS;
} catch (const std::runtime_error& ex) {
printf("error %s", ex.what()); // TODO: error code and message
return -1;
}
}
#endif

Some files were not shown because too many files have changed in this diff Show More