mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-08 23:17:54 -05:00
Fix formatting for all files (#153)
This commit is contained in:
@@ -4,7 +4,7 @@ AlignConsecutiveMacros: true
|
||||
AlignTrailingComments: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortBlocksOnASingleLine: true
|
||||
AllowShortCaseLabelsOnASingleLine: true
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: true
|
||||
AlwaysBreakTemplateDeclarations: true
|
||||
@@ -23,9 +23,9 @@ DisableFormat: false
|
||||
IndentFunctionDeclarationAfterType: false
|
||||
IndentWidth: 2
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MaxEmptyLinesToKeep: 2
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: All
|
||||
PointerAlignment: Right
|
||||
PointerAlignment: Left
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceInEmptyParentheses: false
|
||||
|
||||
10
.rustfmt.toml
Normal file
10
.rustfmt.toml
Normal file
@@ -0,0 +1,10 @@
|
||||
# https://github.com/rust-lang/rustfmt/blob/master/Configurations.md
|
||||
|
||||
# Stable Configs
|
||||
chain_width = 0
|
||||
max_width = 120
|
||||
merge_derives = true
|
||||
use_field_init_shorthand = true
|
||||
use_try_shorthand = true
|
||||
|
||||
# Unstable Configs
|
||||
@@ -34,16 +34,20 @@ fn bench_msm(c: &mut Criterion) {
|
||||
#[cfg(feature = "g2")]
|
||||
let mut d_g2_points = DeviceBuffer::from_slice(&g2_batch_points[..]).unwrap();
|
||||
|
||||
group.sample_size(30).bench_function(
|
||||
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch_bls12_381(&mut d_points, &mut d_scalars, batch_size)),
|
||||
);
|
||||
group
|
||||
.sample_size(30)
|
||||
.bench_function(
|
||||
&format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch_bls12_381(&mut d_points, &mut d_scalars, batch_size)),
|
||||
);
|
||||
|
||||
#[cfg(feature = "g2")]
|
||||
group.sample_size(10).bench_function(
|
||||
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size))
|
||||
);
|
||||
group
|
||||
.sample_size(10)
|
||||
.bench_function(
|
||||
&format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
|
||||
|b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size)),
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -21,46 +21,59 @@ fn bench_ntt(c: &mut Criterion) {
|
||||
|
||||
let (_, mut d_evals, mut d_domain) = set_up_scalars_bls12_381(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
group.sample_size(scalar_samples).bench_function(
|
||||
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| evaluate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
|
||||
group.sample_size(scalar_samples).bench_function(
|
||||
&format!("Scalar iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
group
|
||||
.sample_size(scalar_samples)
|
||||
.bench_function(
|
||||
&format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| evaluate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size)),
|
||||
);
|
||||
|
||||
group.sample_size(scalar_samples).bench_function(
|
||||
&format!("Scalar inplace NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| ntt_inplace_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size, false, 0))
|
||||
);
|
||||
|
||||
group.sample_size(scalar_samples).bench_function(
|
||||
&format!("Scalar inplace iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| ntt_inplace_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size, true, 0))
|
||||
);
|
||||
group
|
||||
.sample_size(scalar_samples)
|
||||
.bench_function(
|
||||
&format!("Scalar iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_scalars_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size)),
|
||||
);
|
||||
|
||||
group
|
||||
.sample_size(scalar_samples)
|
||||
.bench_function(
|
||||
&format!("Scalar inplace NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| ntt_inplace_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size, false, 0)),
|
||||
);
|
||||
|
||||
group
|
||||
.sample_size(scalar_samples)
|
||||
.bench_function(
|
||||
&format!("Scalar inplace iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| ntt_inplace_batch_bls12_381(&mut d_evals, &mut d_domain, batch_size, true, 0)),
|
||||
);
|
||||
|
||||
drop(d_evals);
|
||||
drop(d_domain);
|
||||
|
||||
if ntt_size * batch_size > 1 << 18{
|
||||
if ntt_size * batch_size > 1 << 18 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let point_samples = 10;
|
||||
|
||||
let (_, mut d_points_evals, mut d_domain) = set_up_points_bls12_381(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
group.sample_size(point_samples).bench_function(
|
||||
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
let (_, mut d_points_evals, mut d_domain) =
|
||||
set_up_points_bls12_381(ntt_size * batch_size, log_ntt_size, true);
|
||||
|
||||
group.sample_size(point_samples).bench_function(
|
||||
&format!("EC iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| evaluate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size))
|
||||
);
|
||||
group
|
||||
.sample_size(point_samples)
|
||||
.bench_function(
|
||||
&format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| interpolate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size)),
|
||||
);
|
||||
|
||||
group
|
||||
.sample_size(point_samples)
|
||||
.bench_function(
|
||||
&format!("EC iNTT of size 2^{} in batch {}", log_ntt_size, batch_size),
|
||||
|b| b.iter(|| evaluate_points_batch_bls12_381(&mut d_points_evals, &mut d_domain, batch_size)),
|
||||
);
|
||||
|
||||
drop(d_points_evals);
|
||||
drop(d_domain);
|
||||
@@ -70,4 +83,3 @@ fn bench_ntt(c: &mut Criterion) {
|
||||
|
||||
criterion_group!(ntt_benches, bench_ntt);
|
||||
criterion_main!(ntt_benches);
|
||||
|
||||
|
||||
4
build.rs
4
build.rs
@@ -26,8 +26,6 @@ fn main() {
|
||||
nvcc.debug(false);
|
||||
nvcc.flag(&arch);
|
||||
nvcc.flag(&stream);
|
||||
nvcc.files([
|
||||
"./icicle/curves/index.cu",
|
||||
]);
|
||||
nvcc.files(["./icicle/curves/index.cu"]);
|
||||
nvcc.compile("ingo_icicle"); //TODO: extension??
|
||||
}
|
||||
|
||||
@@ -204,14 +204,14 @@ newpath = f'./icicle/curves/{curve_name_lower}'
|
||||
if not os.path.exists(newpath):
|
||||
os.makedirs(newpath)
|
||||
|
||||
with open("./icicle/curves/curve_template/params.cuh", "r") as params_file:
|
||||
with open("./icicle/curves/curve_template/params.cuh.tmpl", "r") as params_file:
|
||||
params_file_template = Template(params_file.read())
|
||||
params = get_params(config)
|
||||
params_content = params_file_template.safe_substitute(params)
|
||||
with open(f'./icicle/curves/{curve_name_lower}/params.cuh', 'w') as f:
|
||||
f.write(params_content)
|
||||
|
||||
with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
|
||||
with open("./icicle/curves/curve_template/lde.cu.tmpl", "r") as lde_file:
|
||||
template_content = Template(lde_file.read())
|
||||
lde_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
@@ -220,7 +220,7 @@ with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
|
||||
with open(f'./icicle/curves/{curve_name_lower}/lde.cu', 'w') as f:
|
||||
f.write(lde_content)
|
||||
|
||||
with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
|
||||
with open("./icicle/curves/curve_template/msm.cu.tmpl", "r") as msm_file:
|
||||
template_content = Template(msm_file.read())
|
||||
msm_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
@@ -229,7 +229,7 @@ with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
|
||||
with open(f'./icicle/curves/{curve_name_lower}/msm.cu', 'w') as f:
|
||||
f.write(msm_content)
|
||||
|
||||
with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
|
||||
with open("./icicle/curves/curve_template/ve_mod_mult.cu.tmpl", "r") as ve_mod_mult_file:
|
||||
template_content = Template(ve_mod_mult_file.read())
|
||||
ve_mod_mult_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
@@ -239,7 +239,7 @@ with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_f
|
||||
f.write(ve_mod_mult_content)
|
||||
|
||||
|
||||
with open(f'./icicle/curves/curve_template/curve_config.cuh', 'r') as cc:
|
||||
with open(f'./icicle/curves/curve_template/curve_config.cuh.tmpl', 'r') as cc:
|
||||
template_content = Template(cc.read())
|
||||
cc_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
@@ -248,7 +248,7 @@ with open(f'./icicle/curves/curve_template/curve_config.cuh', 'r') as cc:
|
||||
f.write(cc_content)
|
||||
|
||||
|
||||
with open(f'./icicle/curves/curve_template/projective.cu', 'r') as proj:
|
||||
with open(f'./icicle/curves/curve_template/projective.cu.tmpl', 'r') as proj:
|
||||
template_content = Template(proj.read())
|
||||
proj_content = template_content.safe_substitute(
|
||||
CURVE_NAME_U=curve_name_upper,
|
||||
@@ -258,7 +258,7 @@ with open(f'./icicle/curves/curve_template/projective.cu', 'r') as proj:
|
||||
f.write(proj_content)
|
||||
|
||||
|
||||
with open(f'./icicle/curves/curve_template/supported_operations.cu', 'r') as supp_ops:
|
||||
with open(f'./icicle/curves/curve_template/supported_operations.cu.tmpl', 'r') as supp_ops:
|
||||
template_content = Template(supp_ops.read())
|
||||
supp_ops_content = template_content.safe_substitute()
|
||||
with open(f'./icicle/curves/{curve_name_lower}/supported_operations.cu', 'w') as f:
|
||||
|
||||
@@ -1,9 +1,6 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use icicle_utils::{
|
||||
curves::bls12_381::{Point_BLS12_381, ScalarField_BLS12_381},
|
||||
test_bls12_381::*,
|
||||
};
|
||||
use icicle_utils::{curves::bls12_381::ScalarField_BLS12_381, test_bls12_381::*};
|
||||
use rustacuda::prelude::DeviceBuffer;
|
||||
|
||||
const LOG_NTT_SIZES: [usize; 3] = [20, 10, 9];
|
||||
@@ -22,13 +19,7 @@ fn bench_lde() {
|
||||
d_twiddles: &mut DeviceBuffer<ScalarField_BLS12_381>,
|
||||
batch_size: usize,
|
||||
) -> i32 {
|
||||
ntt_inplace_batch_bls12_381(
|
||||
d_inout,
|
||||
d_twiddles,
|
||||
batch_size,
|
||||
false,
|
||||
0,
|
||||
);
|
||||
ntt_inplace_batch_bls12_381(d_inout, d_twiddles, batch_size, false, 0);
|
||||
0
|
||||
}
|
||||
|
||||
@@ -37,13 +28,7 @@ fn bench_lde() {
|
||||
d_twiddles: &mut DeviceBuffer<ScalarField_BLS12_381>,
|
||||
batch_size: usize,
|
||||
) -> i32 {
|
||||
ntt_inplace_batch_bls12_381(
|
||||
d_inout,
|
||||
d_twiddles,
|
||||
batch_size,
|
||||
true,
|
||||
0,
|
||||
);
|
||||
ntt_inplace_batch_bls12_381(d_inout, d_twiddles, batch_size, true, 0);
|
||||
0
|
||||
}
|
||||
|
||||
@@ -129,16 +114,8 @@ fn bench_ntt_template<E, S, R>(
|
||||
ntt_size: usize,
|
||||
batch_size: usize,
|
||||
log_ntt_size: usize,
|
||||
set_data: fn(
|
||||
test_size: usize,
|
||||
log_domain_size: usize,
|
||||
inverse: bool,
|
||||
) -> (Vec<E>, DeviceBuffer<E>, DeviceBuffer<S>),
|
||||
bench_fn: fn(
|
||||
d_evaluations: &mut DeviceBuffer<E>,
|
||||
d_domain: &mut DeviceBuffer<S>,
|
||||
batch_size: usize,
|
||||
) -> R,
|
||||
set_data: fn(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<E>, DeviceBuffer<E>, DeviceBuffer<S>),
|
||||
bench_fn: fn(d_evaluations: &mut DeviceBuffer<E>, d_domain: &mut DeviceBuffer<S>, batch_size: usize) -> R,
|
||||
id: &str,
|
||||
inverse: bool,
|
||||
samples: usize,
|
||||
@@ -159,7 +136,7 @@ fn bench_ntt_template<E, S, R>(
|
||||
let first = bench_fn(&mut d_evals, &mut d_domain, batch_size);
|
||||
|
||||
let start = Instant::now();
|
||||
for i in 0..samples {
|
||||
for _ in 0..samples {
|
||||
bench_fn(&mut d_evals, &mut d_domain, batch_size);
|
||||
}
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
@@ -1,23 +1,23 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdbool.h>
|
||||
// msm.h
|
||||
|
||||
#ifndef _BLS12_377_MSM_H
|
||||
@@ -35,24 +35,61 @@ typedef struct BLS12_377_g2_affine_t BLS12_377_g2_affine_t;
|
||||
typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
|
||||
typedef cudaStream_t CudaStream_t;
|
||||
|
||||
int msm_cuda_bls12_377(BLS12_377_projective_t* out, BLS12_377_affine_t* points,
|
||||
BLS12_377_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_cuda_bls12_377(
|
||||
BLS12_377_projective_t* out, BLS12_377_affine_t* points, BLS12_377_scalar_t* scalars, size_t count, size_t device_id);
|
||||
|
||||
int msm_batch_cuda_bls12_377(BLS12_377_projective_t* out, BLS12_377_affine_t* points,
|
||||
BLS12_377_scalar_t* scalars, size_t batch_size,
|
||||
size_t msm_size, size_t device_id);
|
||||
int msm_batch_cuda_bls12_377(
|
||||
BLS12_377_projective_t* out,
|
||||
BLS12_377_affine_t* points,
|
||||
BLS12_377_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
|
||||
int commit_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_scalar_t* d_scalars,
|
||||
BLS12_377_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_scalar_t* d_scalars,
|
||||
BLS12_377_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
|
||||
int commit_batch_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_scalar_t* d_scalars,
|
||||
BLS12_377_affine_t* d_points, size_t count,
|
||||
size_t batch_size, size_t device_id);
|
||||
int commit_batch_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_scalar_t* d_scalars,
|
||||
BLS12_377_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id);
|
||||
|
||||
int msm_g2_cuda_bls12_377(BLS12_377_g2_projective_t *out, BLS12_377_g2_affine_t* points, BLS12_377_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_batch_g2_cuda_bls12_377(BLS12_377_g2_projective_t* out, BLS12_377_g2_affine_t* points, BLS12_377_scalar_t* scalars, size_t batch_size, size_t msm_size, size_t device_id);
|
||||
int commit_g2_cuda_bls12_377(BLS12_377_g2_projective_t* d_out, BLS12_377_scalar_t* d_scalars, BLS12_377_g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_batch_g2_cuda_bls12_377(BLS12_377_g2_projective_t* d_out, BLS12_377_scalar_t* d_scalars, BLS12_377_g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id, cudaStream_t stream);
|
||||
int msm_g2_cuda_bls12_377(
|
||||
BLS12_377_g2_projective_t* out,
|
||||
BLS12_377_g2_affine_t* points,
|
||||
BLS12_377_scalar_t* scalars,
|
||||
size_t count,
|
||||
size_t device_id);
|
||||
int msm_batch_g2_cuda_bls12_377(
|
||||
BLS12_377_g2_projective_t* out,
|
||||
BLS12_377_g2_affine_t* points,
|
||||
BLS12_377_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
int commit_g2_cuda_bls12_377(
|
||||
BLS12_377_g2_projective_t* d_out,
|
||||
BLS12_377_scalar_t* d_scalars,
|
||||
BLS12_377_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
int commit_batch_g2_cuda_bls12_377(
|
||||
BLS12_377_g2_projective_t* d_out,
|
||||
BLS12_377_scalar_t* d_scalars,
|
||||
BLS12_377_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id,
|
||||
cudaStream_t stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// ntt.h
|
||||
|
||||
#ifndef _BLS12_377_NTT_H
|
||||
@@ -34,34 +34,145 @@ typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
|
||||
typedef struct BLS12_377_g2_projective_t BLS12_377_g2_projective_t;
|
||||
typedef struct BLS12_377_g2_affine_t BLS12_377_g2_affine_t;
|
||||
|
||||
int ntt_cuda_bls12_377(BLS12_377_scalar_t *arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_bls12_377(BLS12_377_scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ntt_cuda_bls12_377(BLS12_377_scalar_t* arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
int ecntt_cuda_bls12_377(BLS12_377_projective_t *arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_bls12_377(BLS12_377_projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ecntt_cuda_bls12_377(BLS12_377_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_bls12_377(
|
||||
BLS12_377_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
|
||||
BLS12_377_scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t *d_evaluations, BLS12_377_scalar_t *d_domain, unsigned n, unsigned device_id, size_t stream);
|
||||
int interpolate_scalars_batch_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_evaluations, BLS12_377_scalar_t* d_domain, unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_points_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_projective_t *d_evaluations, BLS12_377_scalar_t *d_domain, unsigned n, size_t device_id, size_t stream);
|
||||
int interpolate_points_batch_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_projective_t* d_evaluations, BLS12_377_scalar_t* d_domain,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_evaluations, BLS12_377_scalar_t* d_domain, unsigned n, BLS12_377_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_evaluations, BLS12_377_scalar_t* d_domain, unsigned n, unsigned batch_size, BLS12_377_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t *d_coefficients, BLS12_377_scalar_t *d_domain, unsigned domain_size, unsigned n, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_batch_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_coefficients, BLS12_377_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_points_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_projective_t *d_coefficients, BLS12_377_scalar_t *d_domain, unsigned domain_size, unsigned n, size_t device_id, size_t stream);
|
||||
int evaluate_points_batch_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_projective_t* d_coefficients, BLS12_377_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t *d_coefficients, BLS12_377_scalar_t *d_domain, unsigned domain_size,unsigned n, BLS12_377_scalar_t *coset_powers, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_coefficients, BLS12_377_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, BLS12_377_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_projective_t *d_coefficients, BLS12_377_scalar_t *d_domain, unsigned domain_size,unsigned n, BLS12_377_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377_projective_t* d_out, BLS12_377_projective_t* d_coefficients, BLS12_377_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, BLS12_377_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
BLS12_377_scalar_t*
|
||||
build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_evaluations,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_evaluations,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_projective_t* d_evaluations,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_batch_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_projective_t* d_evaluations,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_evaluations,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
BLS12_377_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_evaluations,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_377_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_batch_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_projective_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_batch_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_projective_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BLS12_377_scalar_t* coset_powers,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out,
|
||||
BLS12_377_scalar_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_377_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_projective_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BLS12_377_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_bls12_377(
|
||||
BLS12_377_projective_t* d_out,
|
||||
BLS12_377_projective_t* d_coefficients,
|
||||
BLS12_377_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_377_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int reverse_order_scalars_cuda_bls12_377(BLS12_377_scalar_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_batch_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int reverse_order_points_cuda_bls12_377(BLS12_377_projective_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_points_batch_cuda_bls12_377(BLS12_377_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_in1, BLS12_377_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_in1, BLS12_377_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int reverse_order_points_batch_cuda_bls12_377(
|
||||
BLS12_377_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_in1, BLS12_377_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_bls12_377(
|
||||
BLS12_377_scalar_t* d_out, BLS12_377_scalar_t* d_in1, BLS12_377_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int to_montgomery_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
int from_montgomery_scalars_cuda_bls12_377(BLS12_377_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// projective.h
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -24,25 +24,25 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct BLS12_377_projective_t BLS12_377_projective_t;
|
||||
typedef struct BLS12_377_g2_projective_t BLS12_377_g2_projective_t;
|
||||
typedef struct BLS12_377_g2_projective_t BLS12_377_g2_projective_t;
|
||||
typedef struct BLS12_377_affine_t BLS12_377_affine_t;
|
||||
typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
|
||||
|
||||
bool projective_is_on_curve_bls12_377(BLS12_377_projective_t *point1);
|
||||
bool projective_is_on_curve_bls12_377(BLS12_377_projective_t* point1);
|
||||
|
||||
BLS12_377_scalar_t* random_scalar_bls12_377();
|
||||
BLS12_377_projective_t* random_projective_bls12_377();
|
||||
BLS12_377_projective_t* projective_zero_bls12_377();
|
||||
BLS12_377_affine_t* projective_to_affine_bls12_377(BLS12_377_projective_t *point1);
|
||||
BLS12_377_projective_t* projective_from_affine_bls12_377(BLS12_377_affine_t *point1);
|
||||
BLS12_377_affine_t* projective_to_affine_bls12_377(BLS12_377_projective_t* point1);
|
||||
BLS12_377_projective_t* projective_from_affine_bls12_377(BLS12_377_affine_t* point1);
|
||||
|
||||
BLS12_377_g2_projective_t* random_g2_projective_bls12_377();
|
||||
BLS12_377_affine_t* g2_projective_to_affine_bls12_377(BLS12_377_g2_projective_t *point1);
|
||||
BLS12_377_g2_projective_t* g2_projective_from_affine_bls12_377(BLS12_377_affine_t *point1);
|
||||
bool g2_projective_is_on_curve_bls12_377(BLS12_377_g2_projective_t *point1);
|
||||
BLS12_377_affine_t* g2_projective_to_affine_bls12_377(BLS12_377_g2_projective_t* point1);
|
||||
BLS12_377_g2_projective_t* g2_projective_from_affine_bls12_377(BLS12_377_affine_t* point1);
|
||||
bool g2_projective_is_on_curve_bls12_377(BLS12_377_g2_projective_t* point1);
|
||||
|
||||
bool eq_bls12_377(BLS12_377_projective_t *point1, BLS12_377_projective_t *point2);
|
||||
bool eq_g2_bls12_377(BLS12_377_g2_projective_t *point1, BLS12_377_g2_projective_t *point2);
|
||||
bool eq_bls12_377(BLS12_377_projective_t* point1, BLS12_377_projective_t* point2);
|
||||
bool eq_g2_bls12_377(BLS12_377_g2_projective_t* point1, BLS12_377_g2_projective_t* point2);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// ve_mod_mult.h
|
||||
|
||||
#ifndef _BLS12_377_VEC_MULT_H
|
||||
@@ -29,11 +29,18 @@ extern "C" {
|
||||
typedef struct BLS12_377_projective_t BLS12_377_projective_t;
|
||||
typedef struct BLS12_377_scalar_t BLS12_377_scalar_t;
|
||||
|
||||
int32_t vec_mod_mult_point_bls12_377(BLS12_377_projective_t *inout, BLS12_377_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_scalar_bls12_377(BLS12_377_scalar_t *inout, BLS12_377_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_bls12_377(BLS12_377_scalar_t *inout, BLS12_377_scalar_t *scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_bls12_377(BLS12_377_scalar_t *matrix_flattened, BLS12_377_scalar_t *input, BLS12_377_scalar_t *output, size_t n_elments, size_t device_id);
|
||||
|
||||
int32_t vec_mod_mult_point_bls12_377(
|
||||
BLS12_377_projective_t* inout, BLS12_377_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_scalar_bls12_377(
|
||||
BLS12_377_scalar_t* inout, BLS12_377_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_bls12_377(
|
||||
BLS12_377_scalar_t* inout, BLS12_377_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_bls12_377(
|
||||
BLS12_377_scalar_t* matrix_flattened,
|
||||
BLS12_377_scalar_t* input,
|
||||
BLS12_377_scalar_t* output,
|
||||
size_t n_elments,
|
||||
size_t device_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,23 +1,23 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdbool.h>
|
||||
// msm.h
|
||||
|
||||
#ifndef _BLS12_381_MSM_H
|
||||
@@ -35,24 +35,61 @@ typedef struct BLS12_381_g2_affine_t BLS12_381_g2_affine_t;
|
||||
typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
|
||||
typedef cudaStream_t CudaStream_t;
|
||||
|
||||
int msm_cuda_bls12_381(BLS12_381_projective_t* out, BLS12_381_affine_t* points,
|
||||
BLS12_381_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_cuda_bls12_381(
|
||||
BLS12_381_projective_t* out, BLS12_381_affine_t* points, BLS12_381_scalar_t* scalars, size_t count, size_t device_id);
|
||||
|
||||
int msm_batch_cuda_bls12_381(BLS12_381_projective_t* out, BLS12_381_affine_t* points,
|
||||
BLS12_381_scalar_t* scalars, size_t batch_size,
|
||||
size_t msm_size, size_t device_id);
|
||||
int msm_batch_cuda_bls12_381(
|
||||
BLS12_381_projective_t* out,
|
||||
BLS12_381_affine_t* points,
|
||||
BLS12_381_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
|
||||
int commit_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_scalar_t* d_scalars,
|
||||
BLS12_381_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_scalar_t* d_scalars,
|
||||
BLS12_381_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
|
||||
int commit_batch_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_scalar_t* d_scalars,
|
||||
BLS12_381_affine_t* d_points, size_t count,
|
||||
size_t batch_size, size_t device_id);
|
||||
int commit_batch_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_scalar_t* d_scalars,
|
||||
BLS12_381_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id);
|
||||
|
||||
int msm_g2_cuda_bls12_381(BLS12_381_g2_projective_t *out, BLS12_381_g2_affine_t* points, BLS12_381_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_batch_g2_cuda_bls12_381(BLS12_381_g2_projective_t* out, BLS12_381_g2_affine_t* points, BLS12_381_scalar_t* scalars, size_t batch_size, size_t msm_size, size_t device_id);
|
||||
int commit_g2_cuda_bls12_381(BLS12_381_g2_projective_t* d_out, BLS12_381_scalar_t* d_scalars, BLS12_381_g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_batch_g2_cuda_bls12_381(BLS12_381_g2_projective_t* d_out, BLS12_381_scalar_t* d_scalars, BLS12_381_g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id, cudaStream_t stream);
|
||||
int msm_g2_cuda_bls12_381(
|
||||
BLS12_381_g2_projective_t* out,
|
||||
BLS12_381_g2_affine_t* points,
|
||||
BLS12_381_scalar_t* scalars,
|
||||
size_t count,
|
||||
size_t device_id);
|
||||
int msm_batch_g2_cuda_bls12_381(
|
||||
BLS12_381_g2_projective_t* out,
|
||||
BLS12_381_g2_affine_t* points,
|
||||
BLS12_381_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
int commit_g2_cuda_bls12_381(
|
||||
BLS12_381_g2_projective_t* d_out,
|
||||
BLS12_381_scalar_t* d_scalars,
|
||||
BLS12_381_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
int commit_batch_g2_cuda_bls12_381(
|
||||
BLS12_381_g2_projective_t* d_out,
|
||||
BLS12_381_scalar_t* d_scalars,
|
||||
BLS12_381_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id,
|
||||
cudaStream_t stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// ntt.h
|
||||
|
||||
#ifndef _BLS12_381_NTT_H
|
||||
@@ -34,34 +34,145 @@ typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
|
||||
typedef struct BLS12_381_g2_projective_t BLS12_381_g2_projective_t;
|
||||
typedef struct BLS12_381_g2_affine_t BLS12_381_g2_affine_t;
|
||||
|
||||
int ntt_cuda_bls12_381(BLS12_381_scalar_t *arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_bls12_381(BLS12_381_scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ntt_cuda_bls12_381(BLS12_381_scalar_t* arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
int ecntt_cuda_bls12_381(BLS12_381_projective_t *arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_bls12_381(BLS12_381_projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ecntt_cuda_bls12_381(BLS12_381_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_bls12_381(
|
||||
BLS12_381_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
|
||||
BLS12_381_scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t *d_evaluations, BLS12_381_scalar_t *d_domain, unsigned n, unsigned device_id, size_t stream);
|
||||
int interpolate_scalars_batch_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_evaluations, BLS12_381_scalar_t* d_domain, unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_points_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_projective_t *d_evaluations, BLS12_381_scalar_t *d_domain, unsigned n, size_t device_id, size_t stream);
|
||||
int interpolate_points_batch_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_projective_t* d_evaluations, BLS12_381_scalar_t* d_domain,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_evaluations, BLS12_381_scalar_t* d_domain, unsigned n, BLS12_381_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_evaluations, BLS12_381_scalar_t* d_domain, unsigned n, unsigned batch_size, BLS12_381_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t *d_coefficients, BLS12_381_scalar_t *d_domain, unsigned domain_size, unsigned n, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_batch_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_coefficients, BLS12_381_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_points_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_projective_t *d_coefficients, BLS12_381_scalar_t *d_domain, unsigned domain_size, unsigned n, size_t device_id, size_t stream);
|
||||
int evaluate_points_batch_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_projective_t* d_coefficients, BLS12_381_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t *d_coefficients, BLS12_381_scalar_t *d_domain, unsigned domain_size,unsigned n, BLS12_381_scalar_t *coset_powers, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_coefficients, BLS12_381_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, BLS12_381_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_projective_t *d_coefficients, BLS12_381_scalar_t *d_domain, unsigned domain_size,unsigned n, BLS12_381_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381_projective_t* d_out, BLS12_381_projective_t* d_coefficients, BLS12_381_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, BLS12_381_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
BLS12_381_scalar_t*
|
||||
build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_evaluations,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_evaluations,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_projective_t* d_evaluations,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_batch_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_projective_t* d_evaluations,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_evaluations,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
BLS12_381_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_evaluations,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_381_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_batch_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_projective_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_batch_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_projective_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BLS12_381_scalar_t* coset_powers,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out,
|
||||
BLS12_381_scalar_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_381_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_projective_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BLS12_381_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_bls12_381(
|
||||
BLS12_381_projective_t* d_out,
|
||||
BLS12_381_projective_t* d_coefficients,
|
||||
BLS12_381_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_381_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int reverse_order_scalars_cuda_bls12_381(BLS12_381_scalar_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_batch_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int reverse_order_points_cuda_bls12_381(BLS12_381_projective_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_points_batch_cuda_bls12_381(BLS12_381_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_in1, BLS12_381_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_in1, BLS12_381_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int reverse_order_points_batch_cuda_bls12_381(
|
||||
BLS12_381_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_in1, BLS12_381_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_bls12_381(
|
||||
BLS12_381_scalar_t* d_out, BLS12_381_scalar_t* d_in1, BLS12_381_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int to_montgomery_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
int from_montgomery_scalars_cuda_bls12_381(BLS12_381_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// projective.h
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -24,25 +24,25 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct BLS12_381_projective_t BLS12_381_projective_t;
|
||||
typedef struct BLS12_381_g2_projective_t BLS12_381_g2_projective_t;
|
||||
typedef struct BLS12_381_g2_projective_t BLS12_381_g2_projective_t;
|
||||
typedef struct BLS12_381_affine_t BLS12_381_affine_t;
|
||||
typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
|
||||
|
||||
bool projective_is_on_curve_bls12_381(BLS12_381_projective_t *point1);
|
||||
bool projective_is_on_curve_bls12_381(BLS12_381_projective_t* point1);
|
||||
|
||||
BLS12_381_scalar_t* random_scalar_bls12_381();
|
||||
BLS12_381_projective_t* random_projective_bls12_381();
|
||||
BLS12_381_projective_t* projective_zero_bls12_381();
|
||||
BLS12_381_affine_t* projective_to_affine_bls12_381(BLS12_381_projective_t *point1);
|
||||
BLS12_381_projective_t* projective_from_affine_bls12_381(BLS12_381_affine_t *point1);
|
||||
BLS12_381_affine_t* projective_to_affine_bls12_381(BLS12_381_projective_t* point1);
|
||||
BLS12_381_projective_t* projective_from_affine_bls12_381(BLS12_381_affine_t* point1);
|
||||
|
||||
BLS12_381_g2_projective_t* random_g2_projective_bls12_381();
|
||||
BLS12_381_affine_t* g2_projective_to_affine_bls12_381(BLS12_381_g2_projective_t *point1);
|
||||
BLS12_381_g2_projective_t* g2_projective_from_affine_bls12_381(BLS12_381_affine_t *point1);
|
||||
bool g2_projective_is_on_curve_bls12_381(BLS12_381_g2_projective_t *point1);
|
||||
BLS12_381_affine_t* g2_projective_to_affine_bls12_381(BLS12_381_g2_projective_t* point1);
|
||||
BLS12_381_g2_projective_t* g2_projective_from_affine_bls12_381(BLS12_381_affine_t* point1);
|
||||
bool g2_projective_is_on_curve_bls12_381(BLS12_381_g2_projective_t* point1);
|
||||
|
||||
bool eq_bls12_381(BLS12_381_projective_t *point1, BLS12_381_projective_t *point2);
|
||||
bool eq_g2_bls12_381(BLS12_381_g2_projective_t *point1, BLS12_381_g2_projective_t *point2);
|
||||
bool eq_bls12_381(BLS12_381_projective_t* point1, BLS12_381_projective_t* point2);
|
||||
bool eq_g2_bls12_381(BLS12_381_g2_projective_t* point1, BLS12_381_g2_projective_t* point2);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// ve_mod_mult.h
|
||||
|
||||
#ifndef _BLS12_381_VEC_MULT_H
|
||||
@@ -29,11 +29,18 @@ extern "C" {
|
||||
typedef struct BLS12_381_projective_t BLS12_381_projective_t;
|
||||
typedef struct BLS12_381_scalar_t BLS12_381_scalar_t;
|
||||
|
||||
int32_t vec_mod_mult_point_bls12_381(BLS12_381_projective_t *inout, BLS12_381_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_scalar_bls12_381(BLS12_381_scalar_t *inout, BLS12_381_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_bls12_381(BLS12_381_scalar_t *inout, BLS12_381_scalar_t *scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_bls12_381(BLS12_381_scalar_t *matrix_flattened, BLS12_381_scalar_t *input, BLS12_381_scalar_t *output, size_t n_elments, size_t device_id);
|
||||
|
||||
int32_t vec_mod_mult_point_bls12_381(
|
||||
BLS12_381_projective_t* inout, BLS12_381_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_scalar_bls12_381(
|
||||
BLS12_381_scalar_t* inout, BLS12_381_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_bls12_381(
|
||||
BLS12_381_scalar_t* inout, BLS12_381_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_bls12_381(
|
||||
BLS12_381_scalar_t* matrix_flattened,
|
||||
BLS12_381_scalar_t* input,
|
||||
BLS12_381_scalar_t* output,
|
||||
size_t n_elments,
|
||||
size_t device_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,23 +1,23 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdbool.h>
|
||||
// msm.h
|
||||
|
||||
#ifndef _BN254_MSM_H
|
||||
@@ -35,24 +35,57 @@ typedef struct BN254_g2_affine_t BN254_g2_affine_t;
|
||||
typedef struct BN254_scalar_t BN254_scalar_t;
|
||||
typedef cudaStream_t CudaStream_t;
|
||||
|
||||
int msm_cuda_bn254(BN254_projective_t* out, BN254_affine_t* points,
|
||||
BN254_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_cuda_bn254(
|
||||
BN254_projective_t* out, BN254_affine_t* points, BN254_scalar_t* scalars, size_t count, size_t device_id);
|
||||
|
||||
int msm_batch_cuda_bn254(BN254_projective_t* out, BN254_affine_t* points,
|
||||
BN254_scalar_t* scalars, size_t batch_size,
|
||||
size_t msm_size, size_t device_id);
|
||||
int msm_batch_cuda_bn254(
|
||||
BN254_projective_t* out,
|
||||
BN254_affine_t* points,
|
||||
BN254_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
|
||||
int commit_cuda_bn254(BN254_projective_t* d_out, BN254_scalar_t* d_scalars,
|
||||
BN254_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_scalar_t* d_scalars,
|
||||
BN254_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
|
||||
int commit_batch_cuda_bn254(BN254_projective_t* d_out, BN254_scalar_t* d_scalars,
|
||||
BN254_affine_t* d_points, size_t count,
|
||||
size_t batch_size, size_t device_id);
|
||||
int commit_batch_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_scalar_t* d_scalars,
|
||||
BN254_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id);
|
||||
|
||||
int msm_g2_cuda_bn254(BN254_g2_projective_t *out, BN254_g2_affine_t* points, BN254_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_batch_g2_cuda_bn254(BN254_g2_projective_t* out, BN254_g2_affine_t* points, BN254_scalar_t* scalars, size_t batch_size, size_t msm_size, size_t device_id);
|
||||
int commit_g2_cuda_bn254(BN254_g2_projective_t* d_out, BN254_scalar_t* d_scalars, BN254_g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_batch_g2_cuda_bn254(BN254_g2_projective_t* d_out, BN254_scalar_t* d_scalars, BN254_g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id, cudaStream_t stream);
|
||||
int msm_g2_cuda_bn254(
|
||||
BN254_g2_projective_t* out, BN254_g2_affine_t* points, BN254_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_batch_g2_cuda_bn254(
|
||||
BN254_g2_projective_t* out,
|
||||
BN254_g2_affine_t* points,
|
||||
BN254_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
int commit_g2_cuda_bn254(
|
||||
BN254_g2_projective_t* d_out,
|
||||
BN254_scalar_t* d_scalars,
|
||||
BN254_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
int commit_batch_g2_cuda_bn254(
|
||||
BN254_g2_projective_t* d_out,
|
||||
BN254_scalar_t* d_scalars,
|
||||
BN254_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id,
|
||||
cudaStream_t stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// ntt.h
|
||||
|
||||
#ifndef _BN254_NTT_H
|
||||
@@ -34,34 +34,143 @@ typedef struct BN254_scalar_t BN254_scalar_t;
|
||||
typedef struct BN254_g2_projective_t BN254_g2_projective_t;
|
||||
typedef struct BN254_g2_affine_t BN254_g2_affine_t;
|
||||
|
||||
int ntt_cuda_bn254(BN254_scalar_t *arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_bn254(BN254_scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ntt_cuda_bn254(BN254_scalar_t* arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_bn254(BN254_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
int ecntt_cuda_bn254(BN254_projective_t *arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_bn254(BN254_projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ecntt_cuda_bn254(BN254_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_bn254(
|
||||
BN254_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
|
||||
BN254_scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t *d_evaluations, BN254_scalar_t *d_domain, unsigned n, unsigned device_id, size_t stream);
|
||||
int interpolate_scalars_batch_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t* d_evaluations, BN254_scalar_t* d_domain, unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_points_cuda_bn254(BN254_projective_t* d_out, BN254_projective_t *d_evaluations, BN254_scalar_t *d_domain, unsigned n, size_t device_id, size_t stream);
|
||||
int interpolate_points_batch_cuda_bn254(BN254_projective_t* d_out, BN254_projective_t* d_evaluations, BN254_scalar_t* d_domain,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t* d_evaluations, BN254_scalar_t* d_domain, unsigned n, BN254_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t* d_evaluations, BN254_scalar_t* d_domain, unsigned n, unsigned batch_size, BN254_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t *d_coefficients, BN254_scalar_t *d_domain, unsigned domain_size, unsigned n, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_batch_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t* d_coefficients, BN254_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_points_cuda_bn254(BN254_projective_t* d_out, BN254_projective_t *d_coefficients, BN254_scalar_t *d_domain, unsigned domain_size, unsigned n, size_t device_id, size_t stream);
|
||||
int evaluate_points_batch_cuda_bn254(BN254_projective_t* d_out, BN254_projective_t* d_coefficients, BN254_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t *d_coefficients, BN254_scalar_t *d_domain, unsigned domain_size,unsigned n, BN254_scalar_t *coset_powers, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t* d_coefficients, BN254_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, BN254_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_cuda_bn254(BN254_projective_t* d_out, BN254_projective_t *d_coefficients, BN254_scalar_t *d_domain, unsigned domain_size,unsigned n, BN254_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_bn254(BN254_projective_t* d_out, BN254_projective_t* d_coefficients, BN254_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, BN254_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
BN254_scalar_t*
|
||||
build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_evaluations,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_evaluations,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_projective_t* d_evaluations,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_batch_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_projective_t* d_evaluations,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_evaluations,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
BN254_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_evaluations,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BN254_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_batch_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_projective_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_batch_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_projective_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BN254_scalar_t* coset_powers,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_bn254(
|
||||
BN254_scalar_t* d_out,
|
||||
BN254_scalar_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BN254_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_projective_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BN254_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_bn254(
|
||||
BN254_projective_t* d_out,
|
||||
BN254_projective_t* d_coefficients,
|
||||
BN254_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BN254_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int reverse_order_scalars_cuda_bn254(BN254_scalar_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_batch_cuda_bn254(BN254_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int reverse_order_points_cuda_bn254(BN254_projective_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_points_batch_cuda_bn254(BN254_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t* d_in1, BN254_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_bn254(BN254_scalar_t* d_out, BN254_scalar_t* d_in1, BN254_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int reverse_order_points_batch_cuda_bn254(
|
||||
BN254_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_bn254(
|
||||
BN254_scalar_t* d_out, BN254_scalar_t* d_in1, BN254_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_bn254(
|
||||
BN254_scalar_t* d_out, BN254_scalar_t* d_in1, BN254_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int to_montgomery_scalars_cuda_bn254(BN254_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
int from_montgomery_scalars_cuda_bn254(BN254_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// projective.h
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -24,25 +24,25 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct BN254_projective_t BN254_projective_t;
|
||||
typedef struct BN254_g2_projective_t BN254_g2_projective_t;
|
||||
typedef struct BN254_g2_projective_t BN254_g2_projective_t;
|
||||
typedef struct BN254_affine_t BN254_affine_t;
|
||||
typedef struct BN254_scalar_t BN254_scalar_t;
|
||||
|
||||
bool projective_is_on_curve_bn254(BN254_projective_t *point1);
|
||||
bool projective_is_on_curve_bn254(BN254_projective_t* point1);
|
||||
|
||||
BN254_scalar_t* random_scalar_bn254();
|
||||
BN254_projective_t* random_projective_bn254();
|
||||
BN254_projective_t* projective_zero_bn254();
|
||||
BN254_affine_t* projective_to_affine_bn254(BN254_projective_t *point1);
|
||||
BN254_projective_t* projective_from_affine_bn254(BN254_affine_t *point1);
|
||||
BN254_affine_t* projective_to_affine_bn254(BN254_projective_t* point1);
|
||||
BN254_projective_t* projective_from_affine_bn254(BN254_affine_t* point1);
|
||||
|
||||
BN254_g2_projective_t* random_g2_projective_bn254();
|
||||
BN254_affine_t* g2_projective_to_affine_bn254(BN254_g2_projective_t *point1);
|
||||
BN254_g2_projective_t* g2_projective_from_affine_bn254(BN254_affine_t *point1);
|
||||
bool g2_projective_is_on_curve_bn254(BN254_g2_projective_t *point1);
|
||||
BN254_affine_t* g2_projective_to_affine_bn254(BN254_g2_projective_t* point1);
|
||||
BN254_g2_projective_t* g2_projective_from_affine_bn254(BN254_affine_t* point1);
|
||||
bool g2_projective_is_on_curve_bn254(BN254_g2_projective_t* point1);
|
||||
|
||||
bool eq_bn254(BN254_projective_t *point1, BN254_projective_t *point2);
|
||||
bool eq_g2_bn254(BN254_g2_projective_t *point1, BN254_g2_projective_t *point2);
|
||||
bool eq_bn254(BN254_projective_t* point1, BN254_projective_t* point2);
|
||||
bool eq_g2_bn254(BN254_g2_projective_t* point1, BN254_g2_projective_t* point2);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Copyright 2023 Ingonyama
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by Ingonyama DO NOT EDIT
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// ve_mod_mult.h
|
||||
|
||||
#ifndef _BN254_VEC_MULT_H
|
||||
@@ -29,11 +29,14 @@ extern "C" {
|
||||
typedef struct BN254_projective_t BN254_projective_t;
|
||||
typedef struct BN254_scalar_t BN254_scalar_t;
|
||||
|
||||
int32_t vec_mod_mult_point_bn254(BN254_projective_t *inout, BN254_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_scalar_bn254(BN254_scalar_t *inout, BN254_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_bn254(BN254_scalar_t *inout, BN254_scalar_t *scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_bn254(BN254_scalar_t *matrix_flattened, BN254_scalar_t *input, BN254_scalar_t *output, size_t n_elments, size_t device_id);
|
||||
|
||||
int32_t
|
||||
vec_mod_mult_point_bn254(BN254_projective_t* inout, BN254_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t
|
||||
vec_mod_mult_scalar_bn254(BN254_scalar_t* inout, BN254_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_bn254(
|
||||
BN254_scalar_t* inout, BN254_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_bn254(
|
||||
BN254_scalar_t* matrix_flattened, BN254_scalar_t* input, BN254_scalar_t* output, size_t n_elments, size_t device_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdbool.h>
|
||||
// msm.h
|
||||
|
||||
#ifndef _{{.CurveNameUpperCase}}_MSM_H
|
||||
@@ -18,24 +18,64 @@ typedef struct {{.CurveNameUpperCase}}_g2_affine_t {{.CurveNameUpperCase}}_g2_af
|
||||
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
|
||||
typedef cudaStream_t CudaStream_t;
|
||||
|
||||
int msm_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* out, {{.CurveNameUpperCase}}_affine_t* points,
|
||||
{{.CurveNameUpperCase}}_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* out, {{.CurveNameUpperCase}}_affine_t* points, {{.CurveNameUpperCase}}_scalar_t* scalars, size_t count, size_t device_id);
|
||||
|
||||
int msm_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* out, {{.CurveNameUpperCase}}_affine_t* points,
|
||||
{{.CurveNameUpperCase}}_scalar_t* scalars, size_t batch_size,
|
||||
size_t msm_size, size_t device_id);
|
||||
int msm_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* out,
|
||||
{{.CurveNameUpperCase}}_affine_t* points,
|
||||
{{.CurveNameUpperCase}}_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
|
||||
int commit_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_scalars,
|
||||
{{.CurveNameUpperCase}}_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
|
||||
{{.CurveNameUpperCase}}_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
|
||||
int commit_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_scalars,
|
||||
{{.CurveNameUpperCase}}_affine_t* d_points, size_t count,
|
||||
size_t batch_size, size_t device_id);
|
||||
int commit_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
|
||||
{{.CurveNameUpperCase}}_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id);
|
||||
|
||||
int msm_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t *out, {{.CurveNameUpperCase}}_g2_affine_t* points, {{.CurveNameUpperCase}}_scalar_t* scalars, size_t count, size_t device_id);
|
||||
int msm_batch_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* out, {{.CurveNameUpperCase}}_g2_affine_t* points, {{.CurveNameUpperCase}}_scalar_t* scalars, size_t batch_size, size_t msm_size, size_t device_id);
|
||||
int commit_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_scalars, {{.CurveNameUpperCase}}_g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id);
|
||||
int commit_batch_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_scalars, {{.CurveNameUpperCase}}_g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id, cudaStream_t stream);
|
||||
int msm_g2_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_g2_projective_t* out,
|
||||
{{.CurveNameUpperCase}}_g2_affine_t* points,
|
||||
{{.CurveNameUpperCase}}_scalar_t* scalars,
|
||||
size_t count,
|
||||
size_t device_id);
|
||||
|
||||
int msm_batch_g2_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_g2_projective_t* out,
|
||||
{{.CurveNameUpperCase}}_g2_affine_t* points,
|
||||
{{.CurveNameUpperCase}}_scalar_t* scalars,
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id);
|
||||
|
||||
int commit_g2_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_g2_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
|
||||
{{.CurveNameUpperCase}}_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id);
|
||||
|
||||
int commit_batch_g2_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_g2_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_scalars,
|
||||
{{.CurveNameUpperCase}}_g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id,
|
||||
cudaStream_t stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// ntt.h
|
||||
|
||||
#ifndef _{{.CurveNameUpperCase}}_NTT_H
|
||||
@@ -17,34 +17,148 @@ typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t
|
||||
typedef struct {{.CurveNameUpperCase}}_g2_projective_t {{.CurveNameUpperCase}}_g2_projective_t;
|
||||
typedef struct {{.CurveNameUpperCase}}_g2_affine_t {{.CurveNameUpperCase}}_g2_affine_t;
|
||||
|
||||
int ntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t *arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* arr, uint32_t n, bool inverse, size_t decimation, size_t device_id);
|
||||
int ntt_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
int ecntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t *arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
int ecntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
|
||||
int ecntt_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
|
||||
|
||||
{{.CurveNameUpperCase}}_scalar_t*
|
||||
build_domain_cuda_{{.CurveNameLowerCase}}(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
|
||||
int interpolate_scalars_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_projective_t* d_evaluations,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_points_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_projective_t* d_evaluations,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
|
||||
int evaluate_scalars_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
|
||||
unsigned device_id,
|
||||
size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* d_out,
|
||||
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
|
||||
size_t device_id,
|
||||
size_t stream);
|
||||
|
||||
{{.CurveNameUpperCase}}_scalar_t* build_domain_cuda_{{.CurveNameLowerCase}}(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t *d_evaluations, {{.CurveNameUpperCase}}_scalar_t *d_domain, unsigned n, unsigned device_id, size_t stream);
|
||||
int interpolate_scalars_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_evaluations, {{.CurveNameUpperCase}}_scalar_t* d_domain, unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_projective_t *d_evaluations, {{.CurveNameUpperCase}}_scalar_t *d_domain, unsigned n, size_t device_id, size_t stream);
|
||||
int interpolate_points_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_projective_t* d_evaluations, {{.CurveNameUpperCase}}_scalar_t* d_domain,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_evaluations, {{.CurveNameUpperCase}}_scalar_t* d_domain, unsigned n, {{.CurveNameUpperCase}}_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int interpolate_scalars_batch_on_coset_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_evaluations, {{.CurveNameUpperCase}}_scalar_t* d_domain, unsigned n, unsigned batch_size, {{.CurveNameUpperCase}}_scalar_t* coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t *d_coefficients, {{.CurveNameUpperCase}}_scalar_t *d_domain, unsigned domain_size, unsigned n, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_coefficients, {{.CurveNameUpperCase}}_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_projective_t *d_coefficients, {{.CurveNameUpperCase}}_scalar_t *d_domain, unsigned domain_size, unsigned n, size_t device_id, size_t stream);
|
||||
int evaluate_points_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_projective_t* d_coefficients, {{.CurveNameUpperCase}}_scalar_t* d_domain, unsigned domain_size,unsigned n, unsigned batch_size, size_t device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t *d_coefficients, {{.CurveNameUpperCase}}_scalar_t *d_domain, unsigned domain_size,unsigned n, {{.CurveNameUpperCase}}_scalar_t *coset_powers, unsigned device_id, size_t stream);
|
||||
int evaluate_scalars_on_coset_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_coefficients, {{.CurveNameUpperCase}}_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, {{.CurveNameUpperCase}}_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_projective_t *d_coefficients, {{.CurveNameUpperCase}}_scalar_t *d_domain, unsigned domain_size,unsigned n, {{.CurveNameUpperCase}}_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int evaluate_points_on_coset_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_out, {{.CurveNameUpperCase}}_projective_t* d_coefficients, {{.CurveNameUpperCase}}_scalar_t* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, {{.CurveNameUpperCase}}_scalar_t *coset_powers, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int reverse_order_scalars_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int reverse_order_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* arr, int n, size_t device_id, size_t stream);
|
||||
int reverse_order_points_batch_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int reverse_order_points_batch_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
|
||||
int add_scalars_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int sub_scalars_cuda_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
|
||||
int to_montgomery_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
int from_montgomery_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_inout, unsigned n, size_t stream);
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include <stdbool.h>
|
||||
#include <cuda.h>
|
||||
#include <stdbool.h>
|
||||
// projective.h
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -7,25 +7,25 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {{.CurveNameUpperCase}}_projective_t {{.CurveNameUpperCase}}_projective_t;
|
||||
typedef struct {{.CurveNameUpperCase}}_g2_projective_t {{.CurveNameUpperCase}}_g2_projective_t;
|
||||
typedef struct {{.CurveNameUpperCase}}_g2_projective_t {{.CurveNameUpperCase}}_g2_projective_t;
|
||||
typedef struct {{.CurveNameUpperCase}}_affine_t {{.CurveNameUpperCase}}_affine_t;
|
||||
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
|
||||
|
||||
bool projective_is_on_curve_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t *point1);
|
||||
bool projective_is_on_curve_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* point1);
|
||||
|
||||
{{.CurveNameUpperCase}}_scalar_t* random_scalar_{{.CurveNameLowerCase}}();
|
||||
{{.CurveNameUpperCase}}_projective_t* random_projective_{{.CurveNameLowerCase}}();
|
||||
{{.CurveNameUpperCase}}_projective_t* projective_zero_{{.CurveNameLowerCase}}();
|
||||
{{.CurveNameUpperCase}}_affine_t* projective_to_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t *point1);
|
||||
{{.CurveNameUpperCase}}_projective_t* projective_from_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t *point1);
|
||||
{{.CurveNameUpperCase}}_affine_t* projective_to_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* point1);
|
||||
{{.CurveNameUpperCase}}_projective_t* projective_from_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t* point1);
|
||||
|
||||
{{.CurveNameUpperCase}}_g2_projective_t* random_g2_projective_{{.CurveNameLowerCase}}();
|
||||
{{.CurveNameUpperCase}}_affine_t* g2_projective_to_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t *point1);
|
||||
{{.CurveNameUpperCase}}_g2_projective_t* g2_projective_from_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t *point1);
|
||||
bool g2_projective_is_on_curve_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t *point1);
|
||||
{{.CurveNameUpperCase}}_affine_t* g2_projective_to_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* point1);
|
||||
{{.CurveNameUpperCase}}_g2_projective_t* g2_projective_from_affine_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t* point1);
|
||||
bool g2_projective_is_on_curve_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* point1);
|
||||
|
||||
bool eq_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t *point1, {{.CurveNameUpperCase}}_projective_t *point2);
|
||||
bool eq_g2_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t *point1, {{.CurveNameUpperCase}}_g2_projective_t *point2);
|
||||
bool eq_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* point1, {{.CurveNameUpperCase}}_projective_t* point2);
|
||||
bool eq_g2_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* point1, {{.CurveNameUpperCase}}_g2_projective_t* point2);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -12,11 +12,18 @@ extern "C" {
|
||||
typedef struct {{.CurveNameUpperCase}}_projective_t {{.CurveNameUpperCase}}_projective_t;
|
||||
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
|
||||
|
||||
int32_t vec_mod_mult_point_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t *inout, {{.CurveNameUpperCase}}_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_scalar_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t *inout, {{.CurveNameUpperCase}}_scalar_t *scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t *inout, {{.CurveNameUpperCase}}_scalar_t *scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t *matrix_flattened, {{.CurveNameUpperCase}}_scalar_t *input, {{.CurveNameUpperCase}}_scalar_t *output, size_t n_elments, size_t device_id);
|
||||
|
||||
int32_t vec_mod_mult_point_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_projective_t* inout, {{.CurveNameUpperCase}}_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_scalar_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* inout, {{.CurveNameUpperCase}}_scalar_t* scalar_vec, size_t n_elments, size_t device_id);
|
||||
int32_t vec_mod_mult_device_scalar_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* inout, {{.CurveNameUpperCase}}_scalar_t* scalar_vec, size_t n_elements, size_t device_id);
|
||||
int32_t matrix_vec_mod_mult_{{.CurveNameLowerCase}}(
|
||||
{{.CurveNameUpperCase}}_scalar_t* matrix_flattened,
|
||||
{{.CurveNameUpperCase}}_scalar_t* input,
|
||||
{{.CurveNameUpperCase}}_scalar_t* output,
|
||||
size_t n_elments,
|
||||
size_t device_id);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -3,19 +3,46 @@
|
||||
#pragma once
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, bool big_triangle, cudaStream_t stream);
|
||||
void bucket_method_msm(
|
||||
unsigned bitsize,
|
||||
unsigned c,
|
||||
S* scalars,
|
||||
A* points,
|
||||
unsigned size,
|
||||
P* final_result,
|
||||
bool on_device,
|
||||
bool big_triangle,
|
||||
cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
|
||||
void batched_bucket_method_msm(
|
||||
unsigned bitsize,
|
||||
unsigned c,
|
||||
S* scalars,
|
||||
A* points,
|
||||
unsigned batch_size,
|
||||
unsigned msm_size,
|
||||
P* final_results,
|
||||
bool on_device,
|
||||
cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
|
||||
void batched_large_msm(
|
||||
S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, bool big_triangle, unsigned large_bucket_factor, cudaStream_t stream);
|
||||
void large_msm(
|
||||
S* scalars,
|
||||
A* points,
|
||||
unsigned size,
|
||||
P* result,
|
||||
bool on_device,
|
||||
bool big_triangle,
|
||||
unsigned large_bucket_factor,
|
||||
cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);
|
||||
void short_msm(S* h_scalars, A* h_points, unsigned size, P* h_final_result, cudaStream_t stream);
|
||||
|
||||
template <typename A, typename S, typename P>
|
||||
void reference_msm(S* scalars, A* a_points, unsigned size);
|
||||
|
||||
@@ -1,131 +1,115 @@
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
#include <vector>
|
||||
#include "msm.cu"
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "msm.cu"
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
// #include "../../curves/bls12_377/curve_config.cuh"
|
||||
#include "../../curves/bn254/curve_config.cuh"
|
||||
|
||||
// using namespace BLS12_377;
|
||||
using namespace BN254;
|
||||
|
||||
class Dummy_Scalar {
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
class Dummy_Scalar
|
||||
{
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
unsigned p = 10;
|
||||
// unsigned p = 1<<30;
|
||||
unsigned x;
|
||||
unsigned p = 10;
|
||||
// unsigned p = 1<<30;
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() {
|
||||
return {0};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar zero() { return {0}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() {
|
||||
return {1};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar one() { return {1}; }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar)
|
||||
{
|
||||
os << scalar.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width) {
|
||||
return (x>>(digit_num*digit_width))&((1<<digit_width)-1);
|
||||
}
|
||||
HOST_DEVICE_INLINE unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width)
|
||||
{
|
||||
return (x >> (digit_num * digit_width)) & ((1 << digit_width) - 1);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
|
||||
return {(p1.x+p2.x)%p1.p};
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2)
|
||||
{
|
||||
return {(p1.x + p2.x) % p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
|
||||
return (p1.x == p2.x);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) { return (p1.x == p2.x); }
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) {
|
||||
return (p1.x == p2);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const unsigned p2) { return (p1.x == p2); }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
|
||||
return {scalar.p-scalar.x};
|
||||
}
|
||||
static HOST_INLINE Dummy_Scalar rand_host() {
|
||||
return {(unsigned)rand()%10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar& scalar) { return {scalar.p - scalar.x}; }
|
||||
static HOST_INLINE Dummy_Scalar rand_host()
|
||||
{
|
||||
return {(unsigned)rand() % 10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
class Dummy_Projective {
|
||||
class Dummy_Projective
|
||||
{
|
||||
public:
|
||||
Dummy_Scalar x;
|
||||
|
||||
public:
|
||||
Dummy_Scalar x;
|
||||
static HOST_DEVICE_INLINE Dummy_Projective zero() { return {0}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective zero() {
|
||||
return {0};
|
||||
static HOST_DEVICE_INLINE Dummy_Projective one() { return {1}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective& point) { return {point.x}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective& point) { return {point.x}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective& point) { return {Dummy_Scalar::neg(point.x)}; }
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2)
|
||||
{
|
||||
return {p1.x + p2.x};
|
||||
}
|
||||
|
||||
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
// return p1 + neg(p2);
|
||||
// }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point)
|
||||
{
|
||||
os << point.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point)
|
||||
{
|
||||
Dummy_Projective res = zero();
|
||||
#ifdef CUDA_ARCH
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
|
||||
if (i > 0) { res = res + res; }
|
||||
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) { res = res + point; }
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective one() {
|
||||
return {1};
|
||||
}
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2)
|
||||
{
|
||||
return (p1.x == p2.x);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective& point) { return point.x == 0; }
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective from_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
|
||||
return {Dummy_Scalar::neg(point.x)};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
return {p1.x+p2.x};
|
||||
}
|
||||
|
||||
// friend HOST_DEVICE_INLINE Dummy_Projective operator-(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
// return p1 + neg(p2);
|
||||
// }
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Projective& point) {
|
||||
os << point.x;
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator*(Dummy_Scalar scalar, const Dummy_Projective& point) {
|
||||
Dummy_Projective res = zero();
|
||||
#ifdef CUDA_ARCH
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < Dummy_Scalar::NBITS; i++) {
|
||||
if (i > 0) {
|
||||
res = res + res;
|
||||
}
|
||||
if (scalar.get_scalar_digit(Dummy_Scalar::NBITS - i - 1, 1)) {
|
||||
res = res + point;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Projective& p1, const Dummy_Projective& p2) {
|
||||
return (p1.x == p2.x);
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Dummy_Projective &point) {
|
||||
return point.x == 0;
|
||||
}
|
||||
|
||||
static HOST_INLINE Dummy_Projective rand_host() {
|
||||
return {(unsigned)rand()%10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
static HOST_INLINE Dummy_Projective rand_host()
|
||||
{
|
||||
return {(unsigned)rand() % 10};
|
||||
// return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
|
||||
//switch between dummy and real:
|
||||
// switch between dummy and real:
|
||||
|
||||
typedef scalar_t test_scalar;
|
||||
typedef projective_t test_projective;
|
||||
@@ -138,62 +122,62 @@ typedef affine_t test_affine;
|
||||
int main()
|
||||
{
|
||||
unsigned batch_size = 1;
|
||||
// unsigned msm_size = 1<<21;
|
||||
// unsigned msm_size = 1<<21;
|
||||
unsigned msm_size = 12180757;
|
||||
unsigned N = batch_size*msm_size;
|
||||
unsigned N = batch_size * msm_size;
|
||||
|
||||
test_scalar *scalars = new test_scalar[N];
|
||||
test_affine *points = new test_affine[N];
|
||||
|
||||
for (unsigned i=0;i<N;i++){
|
||||
test_scalar* scalars = new test_scalar[N];
|
||||
test_affine* points = new test_affine[N];
|
||||
|
||||
for (unsigned i = 0; i < N; i++) {
|
||||
// scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
|
||||
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
|
||||
points[i] = (i % msm_size < 10) ? test_projective::to_affine(test_projective::rand_host()) : points[i - 10];
|
||||
scalars[i] = test_scalar::rand_host();
|
||||
// scalars[i] = i < N/2? test_scalar::rand_host() : test_scalar::one();
|
||||
// points[i] = test_projective::to_affine(test_projective::rand_host());
|
||||
}
|
||||
std::cout<<"finished generating"<<std::endl;
|
||||
std::cout << "finished generating" << std::endl;
|
||||
|
||||
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
|
||||
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
|
||||
test_projective large_res[batch_size*2];
|
||||
test_projective large_res[batch_size * 2];
|
||||
// test_projective batched_large_res[batch_size];
|
||||
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
|
||||
// fake_point batched_large_res[256];
|
||||
|
||||
|
||||
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
|
||||
// for (unsigned i=0;i<batch_size;i++){
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
|
||||
// std::cout<<"final result large"<<std::endl;
|
||||
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i,
|
||||
// false); std::cout<<"final result large"<<std::endl; std::cout<<test_projective::to_affine(*large_res)<<std::endl;
|
||||
// }
|
||||
|
||||
test_scalar *scalars_d;
|
||||
test_affine *points_d;
|
||||
test_projective *large_res_d;
|
||||
test_scalar* scalars_d;
|
||||
test_affine* points_d;
|
||||
test_projective* large_res_d;
|
||||
|
||||
cudaMalloc(&scalars_d, sizeof(test_scalar) * msm_size);
|
||||
cudaMalloc(&points_d, sizeof(test_affine) * msm_size);
|
||||
cudaMalloc(&large_res_d, sizeof(test_projective));
|
||||
cudaMemcpy(scalars_d, scalars, sizeof(test_scalar) * msm_size, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(points_d, points, sizeof(test_affine) * msm_size, cudaMemcpyHostToDevice);
|
||||
|
||||
std::cout<<"finished copying"<<std::endl;
|
||||
|
||||
// batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
|
||||
std::cout << "finished copying" << std::endl;
|
||||
|
||||
// batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size,
|
||||
// batched_large_res, false);
|
||||
cudaStream_t stream1;
|
||||
cudaStream_t stream2;
|
||||
cudaStreamCreate(&stream1);
|
||||
cudaStreamCreate(&stream2);
|
||||
auto begin1 = std::chrono::high_resolution_clock::now();
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false, true,stream1);
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false, true, stream1);
|
||||
auto end1 = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed1 = std::chrono::duration_cast<std::chrono::nanoseconds>(end1 - begin1);
|
||||
printf("Big Triangle : %.3f seconds.\n", elapsed1.count() * 1e-9);
|
||||
// std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars_d, points_d, msm_size, large_res_d, true, false,stream2);
|
||||
large_msm<test_scalar, test_projective, test_affine>(
|
||||
scalars_d, points_d, msm_size, large_res_d, true, false, stream2);
|
||||
// test_reduce_triangle(scalars);
|
||||
// test_reduce_rectangle(scalars);
|
||||
// test_reduce_single(scalars);
|
||||
@@ -201,17 +185,17 @@ int main()
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||
printf("On Device No Big Triangle: %.3f seconds.\n", elapsed.count() * 1e-9);
|
||||
cudaStreamSynchronize(stream1);
|
||||
cudaStreamSynchronize(stream2);
|
||||
cudaStreamDestroy(stream1);
|
||||
cudaStreamDestroy(stream2);
|
||||
cudaStreamSynchronize(stream1);
|
||||
cudaStreamSynchronize(stream2);
|
||||
cudaStreamDestroy(stream1);
|
||||
cudaStreamDestroy(stream2);
|
||||
|
||||
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
std::cout << test_projective::to_affine(large_res[0]) << std::endl;
|
||||
|
||||
cudaMemcpy(&large_res[1], large_res_d, sizeof(test_projective), cudaMemcpyDeviceToHost);
|
||||
std::cout<<test_projective::to_affine(large_res[1])<<std::endl;
|
||||
std::cout << test_projective::to_affine(large_res[1]) << std::endl;
|
||||
|
||||
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
|
||||
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
|
||||
|
||||
// std::cout<<"final results batched large"<<std::endl;
|
||||
// bool success = true;
|
||||
@@ -230,7 +214,7 @@ int main()
|
||||
// if (success){
|
||||
// std::cout<<"success!"<<std::endl;
|
||||
// }
|
||||
|
||||
|
||||
// std::cout<<batched_large_res[0]<<std::endl;
|
||||
// std::cout<<batched_large_res[1]<<std::endl;
|
||||
// std::cout<<projective_t::to_affine(batched_large_res[0])<<std::endl;
|
||||
|
||||
@@ -1,47 +1,60 @@
|
||||
#ifndef LDE
|
||||
#define LDE
|
||||
#include <cuda.h>
|
||||
#include "ntt.cuh"
|
||||
#include "lde.cuh"
|
||||
#include "../vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "lde.cuh"
|
||||
#include "ntt.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
template <typename E, bool SUB>
|
||||
__global__ void add_sub_array(E* res, E* in1, E* in2, uint32_t n)
|
||||
{
|
||||
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid < n) { res[tid] = SUB ? in1[tid] - in2[tid] : in1[tid] + in2[tid]; }
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int sub_polys(E* d_out, E* d_in1, E* d_in2, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
|
||||
uint32_t NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
|
||||
add_sub_array<E, true><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, d_in1, d_in2, n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int add_polys(E* d_out, E* d_in1, E* d_in2, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
|
||||
uint32_t NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
|
||||
add_sub_array<E, false><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, d_in1, d_in2, n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template < typename E, bool SUB > __global__ void add_sub_array(E* res, E* in1, E* in2, uint32_t n) {
|
||||
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid < n) {
|
||||
res[tid] = SUB ? in1[tid] - in2[tid] : in1[tid] + in2[tid];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int sub_polys(E* d_out, E* d_in1, E* d_in2, unsigned n, cudaStream_t stream) {
|
||||
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
|
||||
uint32_t NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
|
||||
add_sub_array <E, true> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, d_in1, d_in2, n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int add_polys(E* d_out, E* d_in1, E* d_in2, unsigned n, cudaStream_t stream) {
|
||||
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
|
||||
uint32_t NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
|
||||
add_sub_array <E, false> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, d_in1, d_in2, n);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Interpolate a batch of polynomials from their evaluations on the same subgroup.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
|
||||
* @param d_out The variable to write coefficients of the resulting polynomials into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_out The variable to write coefficients of the resulting polynomials into (the coefficients are in
|
||||
* bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_evaluations Input array of evaluations of all polynomials of type E (elements).
|
||||
* @param d_domain Domain on which the polynomials are evaluated. Must be a subgroup.
|
||||
* @param n Length of `d_domain` array, also equal to the number of evaluations of each polynomial.
|
||||
* @param batch_size The size of the batch; the length of `d_evaluations` is `n` * `batch_size`.
|
||||
*/
|
||||
template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size, bool coset, S * coset_powers, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int interpolate_batch(
|
||||
E* d_out,
|
||||
E* d_evaluations,
|
||||
S* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
bool coset,
|
||||
S* coset_powers,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
cudaMemcpyAsync(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice, stream);
|
||||
ntt_inplace_batch_template(d_out, d_domain, n, batch_size, true, coset, coset_powers, stream, true);
|
||||
return 0;
|
||||
@@ -50,47 +63,63 @@ template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluat
|
||||
/**
|
||||
* Interpolate a polynomial from its evaluations on a subgroup.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
|
||||
* @param d_out The variable to write coefficients of the resulting polynomial into (the coefficients are in bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_out The variable to write coefficients of the resulting polynomial into (the coefficients are in
|
||||
* bit-reversed order if the evaluations weren't bit-reversed and vice-versa).
|
||||
* @param d_evaluations Input array of evaluations that have type E (elements).
|
||||
* @param d_domain Domain on which the polynomial is evaluated. Must be a subgroup.
|
||||
* @param n Length of `d_evaluations` and the size `d_domain` arrays (they should have equal length).
|
||||
*/
|
||||
template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n, bool coset, S * coset_powers, cudaStream_t stream) {
|
||||
return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1, coset, coset_powers, stream);
|
||||
template <typename E, typename S>
|
||||
int interpolate(E* d_out, E* d_evaluations, S* d_domain, unsigned n, bool coset, S* coset_powers, cudaStream_t stream)
|
||||
{
|
||||
return interpolate_batch<E, S>(d_out, d_evaluations, d_domain, n, 1, coset, coset_powers, stream);
|
||||
}
|
||||
|
||||
template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
|
||||
template <typename E>
|
||||
__global__ void fill_array(E* arr, E val, uint32_t n)
|
||||
{
|
||||
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid < n) {
|
||||
arr[tid] = val;
|
||||
}
|
||||
if (tid < n) { arr[tid] = val; }
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate a batch of polynomials on the same coset.
|
||||
* @param d_out The evaluations of the polynomials on coset `u` * `d_domain`.
|
||||
* @param d_coefficients Input array of coefficients of all polynomials of type E (elements) to be evaluated in-place on a coset.
|
||||
* @param d_coefficients Input array of coefficients of all polynomials of type E (elements) to be evaluated in-place on
|
||||
* a coset.
|
||||
* @param d_domain Domain on which the polynomials are evaluated (see `coset` flag). Must be a subgroup.
|
||||
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
|
||||
* @param n The number of coefficients, which might be different from `domain_size`.
|
||||
* @param batch_size The size of the batch; the length of `d_coefficients` is `n` * `batch_size`.
|
||||
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of
|
||||
* the coset.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers, cudaStream_t stream) {
|
||||
int evaluate_batch(
|
||||
E* d_out,
|
||||
E* d_coefficients,
|
||||
S* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
bool coset,
|
||||
S* coset_powers,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
uint32_t logn = uint32_t(log(domain_size) / log(2));
|
||||
if (domain_size > n) {
|
||||
// allocate and initialize an array of stream handles to parallelize data copying across batches
|
||||
cudaStream_t *memcpy_streams = (cudaStream_t *) malloc(batch_size * sizeof(cudaStream_t));
|
||||
for (unsigned i = 0; i < batch_size; i++)
|
||||
{
|
||||
cudaStream_t* memcpy_streams = (cudaStream_t*)malloc(batch_size * sizeof(cudaStream_t));
|
||||
for (unsigned i = 0; i < batch_size; i++) {
|
||||
cudaStreamCreate(&(memcpy_streams[i]));
|
||||
|
||||
cudaMemcpyAsync(&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
|
||||
cudaMemcpyAsync(
|
||||
&d_out[i * domain_size], &d_coefficients[i * n], n * sizeof(E), cudaMemcpyDeviceToDevice, memcpy_streams[i]);
|
||||
uint32_t NUM_THREADS = MAX_THREADS_BATCH;
|
||||
uint32_t NUM_BLOCKS = (domain_size - n + NUM_THREADS - 1) / NUM_THREADS;
|
||||
fill_array <E> <<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>> (&d_out[i * domain_size + n], E::zero(), domain_size - n);
|
||||
fill_array<E>
|
||||
<<<NUM_BLOCKS, NUM_THREADS, 0, memcpy_streams[i]>>>(&d_out[i * domain_size + n], E::zero(), domain_size - n);
|
||||
|
||||
cudaStreamSynchronize(memcpy_streams[i]);
|
||||
cudaStreamDestroy(memcpy_streams[i]);
|
||||
@@ -98,9 +127,8 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
|
||||
} else
|
||||
cudaMemcpyAsync(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice, stream);
|
||||
|
||||
if (coset)
|
||||
batch_vector_mult(coset_powers, d_out, domain_size, batch_size, stream);
|
||||
|
||||
if (coset) batch_vector_mult(coset_powers, d_out, domain_size, batch_size, stream);
|
||||
|
||||
S* _null = nullptr;
|
||||
ntt_inplace_batch_template(d_out, d_domain, domain_size, batch_size, false, false, _null, stream, true);
|
||||
return 0;
|
||||
@@ -108,102 +136,144 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
|
||||
|
||||
/**
|
||||
* Evaluate a polynomial on a coset.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs, so the order of outputs is bit-reversed.
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs, so the order of outputs
|
||||
* is bit-reversed.
|
||||
* @param d_out The evaluations of the polynomial on coset `u` * `d_domain`.
|
||||
* @param d_coefficients Input array of coefficients of a polynomial of type E (elements).
|
||||
* @param d_domain Domain on which the polynomial is evaluated (see `coset` flag). Must be a subgroup.
|
||||
* @param domain_size Length of `d_domain` array, on which the polynomial is computed.
|
||||
* @param n The number of coefficients, which might be different from `domain_size`.
|
||||
* @param coset The flag that indicates whether to evaluate on a coset. If false, evaluate on a subgroup `d_domain`.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
|
||||
* @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of
|
||||
* the coset.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers, cudaStream_t stream) {
|
||||
return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers, stream);
|
||||
template <typename E, typename S>
|
||||
int evaluate(
|
||||
E* d_out,
|
||||
E* d_coefficients,
|
||||
S* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
bool coset,
|
||||
S* coset_powers,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
return evaluate_batch<E, S>(d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(
|
||||
S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(
|
||||
E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(
|
||||
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(
|
||||
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream)
|
||||
{
|
||||
S* _null = nullptr;
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars_on_coset(S* d_out, S* d_evaluations, S* d_domain,
|
||||
unsigned n, S* coset_powers, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
int interpolate_scalars_on_coset(
|
||||
S* d_out, S* d_evaluations, S* d_domain, unsigned n, S* coset_powers, cudaStream_t stream)
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars_on_coset_batch(S* d_out, S* d_evaluations, S* d_domain,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
int interpolate_scalars_on_coset_batch(
|
||||
S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream)
|
||||
{
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(
|
||||
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream)
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_scalars_on_coset_batch(
|
||||
S* d_out,
|
||||
S* d_coefficients,
|
||||
S* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
S* coset_powers,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(
|
||||
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream)
|
||||
{
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(
|
||||
E* d_out,
|
||||
E* d_coefficients,
|
||||
S* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
S* coset_powers,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
#endif
|
||||
@@ -2,45 +2,62 @@
|
||||
#define LDE_H
|
||||
#pragma once
|
||||
|
||||
template <typename S>
|
||||
template <typename S>
|
||||
int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
template <typename S>
|
||||
int interpolate_scalars_batch(
|
||||
S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
template <typename E, typename S>
|
||||
int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
template <typename E, typename S>
|
||||
int interpolate_points_batch(
|
||||
E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
template <typename S>
|
||||
int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
template <typename S>
|
||||
int evaluate_scalars_batch(
|
||||
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
template <typename E, typename S>
|
||||
int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_batch(
|
||||
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset(
|
||||
S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
|
||||
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
|
||||
template <typename S>
|
||||
int evaluate_scalars_on_coset_batch(
|
||||
S* d_out,
|
||||
S* d_coefficients,
|
||||
S* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
S* coset_powers,
|
||||
cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain,
|
||||
unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset(
|
||||
E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
|
||||
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
|
||||
template <typename E, typename S>
|
||||
int evaluate_points_on_coset_batch(
|
||||
E* d_out,
|
||||
E* d_coefficients,
|
||||
S* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
S* coset_powers,
|
||||
cudaStream_t stream);
|
||||
|
||||
#endif
|
||||
@@ -6,18 +6,20 @@
|
||||
#include "../vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
const uint32_t MAX_NUM_THREADS = 1024;
|
||||
const uint32_t MAX_THREADS_BATCH = 512; //TODO: allows 100% occupancy for scalar NTT for sm_86..sm_89
|
||||
const uint32_t MAX_SHARED_MEM_ELEMENT_SIZE = 32; //TODO: occupancy calculator, hardcoded for sm_86..sm_89
|
||||
const uint32_t MAX_SHARED_MEM = MAX_SHARED_MEM_ELEMENT_SIZE * 1024;
|
||||
const uint32_t MAX_THREADS_BATCH = 512; // TODO: allows 100% occupancy for scalar NTT for sm_86..sm_89
|
||||
const uint32_t MAX_SHARED_MEM_ELEMENT_SIZE = 32; // TODO: occupancy calculator, hardcoded for sm_86..sm_89
|
||||
const uint32_t MAX_SHARED_MEM = MAX_SHARED_MEM_ELEMENT_SIZE * 1024;
|
||||
|
||||
/**
|
||||
* Computes the twiddle factors.
|
||||
* Computes the twiddle factors.
|
||||
* Outputs: d_twiddles[i] = omega^i.
|
||||
* @param d_twiddles input empty array.
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
* @param d_twiddles input empty array.
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
*/
|
||||
template < typename S > __global__ void twiddle_factors_kernel(S * d_twiddles, uint32_t n_twiddles, S omega) {
|
||||
template <typename S>
|
||||
__global__ void twiddle_factors_kernel(S* d_twiddles, uint32_t n_twiddles, S omega)
|
||||
{
|
||||
for (uint32_t i = 0; i < n_twiddles; i++) {
|
||||
d_twiddles[i] = S::zero();
|
||||
}
|
||||
@@ -28,21 +30,25 @@ const uint32_t MAX_SHARED_MEM = MAX_SHARED_MEM_ELEMENT_SIZE * 1024;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills twiddles array with twiddle factors.
|
||||
* @param twiddles input empty array.
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
* Fills twiddles array with twiddle factors.
|
||||
* @param twiddles input empty array.
|
||||
* @param n_twiddles number of twiddle factors.
|
||||
* @param omega multiplying factor.
|
||||
*/
|
||||
template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega, cudaStream_t stream) {
|
||||
template <typename S>
|
||||
S* fill_twiddle_factors_array(uint32_t n_twiddles, S omega, cudaStream_t stream)
|
||||
{
|
||||
size_t size_twiddles = n_twiddles * sizeof(S);
|
||||
S * d_twiddles;
|
||||
cudaMallocAsync(& d_twiddles, size_twiddles, stream);
|
||||
twiddle_factors_kernel<S> <<< 1, 1, 0, stream>>> (d_twiddles, n_twiddles, omega);
|
||||
S* d_twiddles;
|
||||
cudaMallocAsync(&d_twiddles, size_twiddles, stream);
|
||||
twiddle_factors_kernel<S><<<1, 1, 0, stream>>>(d_twiddles, n_twiddles, omega);
|
||||
cudaStreamSynchronize(stream);
|
||||
return d_twiddles;
|
||||
}
|
||||
|
||||
template < typename T > __global__ void reverse_order_kernel(T* arr, T* arr_reversed, uint32_t n, uint32_t logn, uint32_t batch_size) {
|
||||
template <typename T>
|
||||
__global__ void reverse_order_kernel(T* arr, T* arr_reversed, uint32_t n, uint32_t logn, uint32_t batch_size)
|
||||
{
|
||||
int threadId = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (threadId < n * batch_size) {
|
||||
int idx = threadId % n;
|
||||
@@ -61,12 +67,14 @@ template < typename T > __global__ void reverse_order_kernel(T* arr, T* arr_reve
|
||||
* @param logn log(n).
|
||||
* @param batch_size the size of the batch.
|
||||
*/
|
||||
template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size, cudaStream_t stream) {
|
||||
template <typename T>
|
||||
void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size, cudaStream_t stream)
|
||||
{
|
||||
T* arr_reversed;
|
||||
cudaMallocAsync(&arr_reversed, n * batch_size * sizeof(T), stream);
|
||||
int number_of_threads = MAX_THREADS_BATCH;
|
||||
int number_of_blocks = (n * batch_size + number_of_threads - 1) / number_of_threads;
|
||||
reverse_order_kernel <<<number_of_blocks, number_of_threads, 0, stream>>> (arr, arr_reversed, n, logn, batch_size);
|
||||
reverse_order_kernel<<<number_of_blocks, number_of_threads, 0, stream>>>(arr, arr_reversed, n, logn, batch_size);
|
||||
cudaMemcpyAsync(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice, stream);
|
||||
cudaFreeAsync(arr_reversed, stream);
|
||||
}
|
||||
@@ -79,11 +87,12 @@ template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t lo
|
||||
* @param n length of `arr`.
|
||||
* @param logn log(n).
|
||||
*/
|
||||
template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn, cudaStream_t stream) {
|
||||
template <typename T>
|
||||
void reverse_order(T* arr, uint32_t n, uint32_t logn, cudaStream_t stream)
|
||||
{
|
||||
reverse_order_batch(arr, n, logn, 1, stream);
|
||||
}
|
||||
|
||||
|
||||
enum Decimation {
|
||||
NONE = 0,
|
||||
DIF = 1,
|
||||
@@ -101,25 +110,29 @@ enum Decimation {
|
||||
* @param s log2(n) loop index.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
__global__ void ntt_template_kernel_shared_rev(E *__restrict__ arr_g, uint32_t n, const S *__restrict__ r_twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t ss, uint32_t logn)
|
||||
__global__ void ntt_template_kernel_shared_rev(
|
||||
E* __restrict__ arr_g,
|
||||
uint32_t n,
|
||||
const S* __restrict__ r_twiddles,
|
||||
uint32_t n_twiddles,
|
||||
uint32_t max_task,
|
||||
uint32_t ss,
|
||||
uint32_t logn)
|
||||
{
|
||||
SharedMemory<E> smem;
|
||||
E *arr = smem.getPointer();
|
||||
E* arr = smem.getPointer();
|
||||
|
||||
uint32_t task = blockIdx.x;
|
||||
uint32_t loop_limit = blockDim.x;
|
||||
uint32_t chunks = n / (loop_limit * 2);
|
||||
uint32_t offset = (task / chunks) * n;
|
||||
if (task < max_task)
|
||||
{
|
||||
if (task < max_task) {
|
||||
// flattened loop allows parallel processing
|
||||
uint32_t l = threadIdx.x;
|
||||
|
||||
if (l < loop_limit)
|
||||
{
|
||||
if (l < loop_limit) {
|
||||
#pragma unroll
|
||||
for (; ss < logn; ss++)
|
||||
{
|
||||
for (; ss < logn; ss++) {
|
||||
int s = logn - ss - 1;
|
||||
bool is_beginning = ss == 0;
|
||||
bool is_end = ss == (logn - 1);
|
||||
@@ -142,15 +155,12 @@ __global__ void ntt_template_kernel_shared_rev(E *__restrict__ arr_g, uint32_t n
|
||||
|
||||
E u = is_beginning ? arr_g[offset + oij] : arr[oij];
|
||||
E v = is_beginning ? arr_g[offset + k] : arr[k];
|
||||
if (is_end)
|
||||
{
|
||||
if (is_end) {
|
||||
arr_g[offset + oij] = u + v;
|
||||
arr_g[offset + k] = tw * (u - v);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
arr[oij] = u + v;
|
||||
arr[k] = tw *(u - v);
|
||||
arr[k] = tw * (u - v);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
@@ -170,22 +180,27 @@ __global__ void ntt_template_kernel_shared_rev(E *__restrict__ arr_g, uint32_t n
|
||||
* @param s log2(n) loop index.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
__global__ void ntt_template_kernel_shared(E *__restrict__ arr_g, uint32_t n, const S *__restrict__ r_twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t s, uint32_t logn)
|
||||
__global__ void ntt_template_kernel_shared(
|
||||
E* __restrict__ arr_g,
|
||||
uint32_t n,
|
||||
const S* __restrict__ r_twiddles,
|
||||
uint32_t n_twiddles,
|
||||
uint32_t max_task,
|
||||
uint32_t s,
|
||||
uint32_t logn)
|
||||
{
|
||||
SharedMemory<E> smem;
|
||||
E *arr = smem.getPointer();
|
||||
E* arr = smem.getPointer();
|
||||
|
||||
uint32_t task = blockIdx.x;
|
||||
uint32_t loop_limit = blockDim.x;
|
||||
uint32_t chunks = n / (loop_limit * 2);
|
||||
uint32_t offset = (task / chunks) * n;
|
||||
if (task < max_task)
|
||||
{
|
||||
if (task < max_task) {
|
||||
// flattened loop allows parallel processing
|
||||
uint32_t l = threadIdx.x;
|
||||
|
||||
if (l < loop_limit)
|
||||
{
|
||||
if (l < loop_limit) {
|
||||
#pragma unroll
|
||||
for (; s < logn; s++) // TODO: this loop also can be unrolled
|
||||
{
|
||||
@@ -204,17 +219,13 @@ __global__ void ntt_template_kernel_shared(E *__restrict__ arr_g, uint32_t n, co
|
||||
uint32_t k = oij + shift_s;
|
||||
S tw = r_twiddles[j * n_twiddles_div];
|
||||
|
||||
|
||||
E u = s == 0 ? arr_g[offset + oij] : arr[oij];
|
||||
E v = s == 0 ? arr_g[offset + k] : arr[k];
|
||||
v = tw * v;
|
||||
if (s == (logn - 1))
|
||||
{
|
||||
if (s == (logn - 1)) {
|
||||
arr_g[offset + oij] = u + v;
|
||||
arr_g[offset + k] = u - v;
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
arr[oij] = u + v;
|
||||
arr[k] = u - v;
|
||||
}
|
||||
@@ -226,9 +237,9 @@ __global__ void ntt_template_kernel_shared(E *__restrict__ arr_g, uint32_t n, co
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey NTT.
|
||||
* Cooley-Tukey NTT.
|
||||
* NOTE! this function assumes that d_twiddles are located in the device memory.
|
||||
* @param arr input array of type E (elements).
|
||||
* @param arr input array of type E (elements).
|
||||
* @param n length of d_arr.
|
||||
* @param twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
|
||||
* @param n_twiddles length of twiddles.
|
||||
@@ -236,26 +247,25 @@ __global__ void ntt_template_kernel_shared(E *__restrict__ arr_g, uint32_t n, co
|
||||
* @param s log2(n) loop index.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
__global__ void ntt_template_kernel(E *arr, uint32_t n, S *twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t s, bool rev)
|
||||
__global__ void
|
||||
ntt_template_kernel(E* arr, uint32_t n, S* twiddles, uint32_t n_twiddles, uint32_t max_task, uint32_t s, bool rev)
|
||||
{
|
||||
int task = blockIdx.x;
|
||||
int chunks = n / (blockDim.x * 2);
|
||||
|
||||
if (task < max_task)
|
||||
{
|
||||
if (task < max_task) {
|
||||
// flattened loop allows parallel processing
|
||||
uint32_t l = threadIdx.x;
|
||||
uint32_t loop_limit = blockDim.x;
|
||||
|
||||
if (l < loop_limit)
|
||||
{
|
||||
if (l < loop_limit) {
|
||||
uint32_t ntw_i = task % chunks;
|
||||
|
||||
uint32_t shift_s = 1 << s;
|
||||
uint32_t shift2_s = 1 << (s + 1);
|
||||
uint32_t n_twiddles_div = n_twiddles >> (s + 1);
|
||||
|
||||
l = ntw_i * blockDim.x + l; //to l from chunks to full
|
||||
l = ntw_i * blockDim.x + l; // to l from chunks to full
|
||||
|
||||
uint32_t j = l & (shift_s - 1); // Equivalent to: l % (1 << s)
|
||||
uint32_t i = ((l >> s) * shift2_s) & (n - 1); // (..) % n (assuming n is power of 2)
|
||||
@@ -278,18 +288,26 @@ __global__ void ntt_template_kernel(E *arr, uint32_t n, S *twiddles, uint32_t n_
|
||||
* NTT/INTT inplace batch
|
||||
* Note: this function does not preform any bit-reverse permutations on its inputs or outputs.
|
||||
* @param d_inout Array for inplace processing
|
||||
* @param d_twiddles
|
||||
* @param d_twiddles
|
||||
* @param n Length of `d_twiddles` array
|
||||
* @param batch_size The size of the batch; the length of `d_inout` is `n` * `batch_size`.
|
||||
* @param inverse true for iNTT
|
||||
* @param is_coset true for multiplication by coset
|
||||
* @param coset should be array of lenght n - or in case of lesser than n, right-padded with zeroes
|
||||
* @param stream CUDA stream
|
||||
* @param stream CUDA stream
|
||||
* @param is_sync_needed do perform sync of the supplied CUDA stream at the end of processing
|
||||
*/
|
||||
template <typename E, typename S> void ntt_inplace_batch_template(
|
||||
E * d_inout, S * d_twiddles, unsigned n, unsigned batch_size, bool inverse,
|
||||
bool is_coset, S * coset, cudaStream_t stream, bool is_sync_needed)
|
||||
template <typename E, typename S>
|
||||
void ntt_inplace_batch_template(
|
||||
E* d_inout,
|
||||
S* d_twiddles,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
bool inverse,
|
||||
bool is_coset,
|
||||
S* coset,
|
||||
cudaStream_t stream,
|
||||
bool is_sync_needed)
|
||||
{
|
||||
const int logn = int(log(n) / log(2));
|
||||
bool is_shared_mem_enabled = sizeof(E) <= MAX_SHARED_MEM_ELEMENT_SIZE;
|
||||
@@ -298,36 +316,41 @@ template <typename E, typename S> void ntt_inplace_batch_template(
|
||||
const int chunks = max(int((n / 2) / num_threads), 1);
|
||||
const int total_tasks = batch_size * chunks;
|
||||
int num_blocks = total_tasks;
|
||||
const int shared_mem = 2 * num_threads * sizeof(E); // TODO: calculator, as shared mem size may be more efficient less then max to allow more concurrent blocks on SM
|
||||
const int logn_shmem = is_shared_mem_enabled ? int(log(2 * num_threads) / log(2)) : 0; //TODO: shared memory support only for types <= 32 bytes
|
||||
const int shared_mem = 2 * num_threads * sizeof(E); // TODO: calculator, as shared mem size may be more efficient less
|
||||
// then max to allow more concurrent blocks on SM
|
||||
const int logn_shmem = is_shared_mem_enabled ? int(log(2 * num_threads) / log(2))
|
||||
: 0; // TODO: shared memory support only for types <= 32 bytes
|
||||
|
||||
if (inverse)
|
||||
{
|
||||
if (is_shared_mem_enabled) ntt_template_kernel_shared<<<num_blocks, num_threads, shared_mem, stream>>>(d_inout, 1 << logn_shmem, d_twiddles, n, total_tasks, 0, logn_shmem);
|
||||
if (inverse) {
|
||||
if (is_shared_mem_enabled)
|
||||
ntt_template_kernel_shared<<<num_blocks, num_threads, shared_mem, stream>>>(
|
||||
d_inout, 1 << logn_shmem, d_twiddles, n, total_tasks, 0, logn_shmem);
|
||||
|
||||
for (int s = logn_shmem; s < logn; s++) // TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel <E, S> <<<num_blocks, num_threads, 0, stream>>>(d_inout, n, d_twiddles, n, total_tasks, s, false);
|
||||
{
|
||||
ntt_template_kernel<E, S>
|
||||
<<<num_blocks, num_threads, 0, stream>>>(d_inout, n, d_twiddles, n, total_tasks, s, false);
|
||||
}
|
||||
|
||||
if (is_coset) batch_vector_mult(coset, d_inout, n, batch_size, stream);
|
||||
|
||||
num_threads = min(n / 2, MAX_NUM_THREADS);
|
||||
num_blocks = (n * batch_size + num_threads - 1) / num_threads;
|
||||
template_normalize_kernel <E, S> <<<num_blocks, num_threads, 0, stream>>> (d_inout, n * batch_size, S::inv_log_size(logn));
|
||||
}
|
||||
else
|
||||
{
|
||||
template_normalize_kernel<E, S>
|
||||
<<<num_blocks, num_threads, 0, stream>>>(d_inout, n * batch_size, S::inv_log_size(logn));
|
||||
} else {
|
||||
if (is_coset) batch_vector_mult(coset, d_inout, n, batch_size, stream);
|
||||
|
||||
for (int s = logn - 1; s >= logn_shmem; s--) // TODO: this loop also can be unrolled
|
||||
{
|
||||
ntt_template_kernel<<<num_blocks, num_threads, 0, stream>>>(d_inout, n, d_twiddles, n, total_tasks, s, true);
|
||||
}
|
||||
|
||||
if (is_shared_mem_enabled) ntt_template_kernel_shared_rev<<<num_blocks, num_threads, shared_mem, stream>>>(d_inout, 1 << logn_shmem, d_twiddles, n, total_tasks, 0, logn_shmem);
|
||||
|
||||
if (is_shared_mem_enabled)
|
||||
ntt_template_kernel_shared_rev<<<num_blocks, num_threads, shared_mem, stream>>>(
|
||||
d_inout, 1 << logn_shmem, d_twiddles, n, total_tasks, 0, logn_shmem);
|
||||
}
|
||||
|
||||
|
||||
if (!is_sync_needed) return;
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
@@ -335,30 +358,32 @@ template <typename E, typename S> void ntt_inplace_batch_template(
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* This is a bached version - meaning it assumes than the input array
|
||||
* This is a bached version - meaning it assumes than the input array
|
||||
* consists of N arrays of size n. The function performs n-size NTT on each small array.
|
||||
* @param arr input array of type BLS12_381::scalar_t.
|
||||
* @param arr_size number of total elements = n * N.
|
||||
* @param arr input array of type BLS12_381::scalar_t.
|
||||
* @param arr_size number of total elements = n * N.
|
||||
* @param n size of batch.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse, cudaStream_t stream) {
|
||||
template <typename E, typename S>
|
||||
uint32_t ntt_end2end_batch_template(E* arr, uint32_t arr_size, uint32_t n, bool inverse, cudaStream_t stream)
|
||||
{
|
||||
int batches = int(arr_size / n);
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
|
||||
uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order.
|
||||
size_t size_E = arr_size * sizeof(E);
|
||||
S * d_twiddles;
|
||||
if (inverse){
|
||||
S* d_twiddles;
|
||||
if (inverse) {
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
|
||||
} else{
|
||||
} else {
|
||||
d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
|
||||
}
|
||||
E * d_arr;
|
||||
cudaMallocAsync( & d_arr, size_E, stream);
|
||||
E* d_arr;
|
||||
cudaMallocAsync(&d_arr, size_E, stream);
|
||||
cudaMemcpyAsync(d_arr, arr, size_E, cudaMemcpyHostToDevice, stream);
|
||||
int NUM_THREADS = MAX_THREADS_BATCH;
|
||||
int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
|
||||
|
||||
|
||||
S* _null = nullptr;
|
||||
ntt_inplace_batch_template(d_arr, d_twiddles, n, batches, inverse, false, _null, stream, false);
|
||||
|
||||
@@ -366,17 +391,19 @@ template <typename E, typename S> void ntt_inplace_batch_template(
|
||||
cudaFreeAsync(d_arr, stream);
|
||||
cudaFreeAsync(d_twiddles, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* @param arr input array of type E (element).
|
||||
* Cooley-Tukey (scalar) NTT.
|
||||
* @param arr input array of type E (element).
|
||||
* @param n length of d_arr.
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
* @param inverse indicate if the result array should be normalized by n^(-1).
|
||||
*/
|
||||
template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse, cudaStream_t stream) {
|
||||
return ntt_end2end_batch_template <E, S> (arr, n, n, inverse, stream);
|
||||
template <typename E, typename S>
|
||||
uint32_t ntt_end2end_template(E* arr, uint32_t n, bool inverse, cudaStream_t stream)
|
||||
{
|
||||
return ntt_end2end_batch_template<E, S>(arr, n, n, inverse, stream);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,27 +1,27 @@
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <cassert>
|
||||
|
||||
#include "constants/constants_11.h"
|
||||
#include "constants/constants_2.h"
|
||||
#include "constants/constants_4.h"
|
||||
#include "constants/constants_8.h"
|
||||
#include "constants/constants_11.h"
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
|
||||
uint32_t partial_rounds_number_from_arity(const uint32_t arity) {
|
||||
switch (arity) {
|
||||
case 2:
|
||||
return 55;
|
||||
case 4:
|
||||
return 56;
|
||||
case 8:
|
||||
return 57;
|
||||
case 11:
|
||||
return 57;
|
||||
default:
|
||||
throw std::invalid_argument( "unsupported arity" );
|
||||
}
|
||||
uint32_t partial_rounds_number_from_arity(const uint32_t arity)
|
||||
{
|
||||
switch (arity) {
|
||||
case 2:
|
||||
return 55;
|
||||
case 4:
|
||||
return 56;
|
||||
case 8:
|
||||
return 57;
|
||||
case 11:
|
||||
return 57;
|
||||
default:
|
||||
throw std::invalid_argument("unsupported arity");
|
||||
}
|
||||
};
|
||||
|
||||
// TO-DO: change to mapping
|
||||
@@ -29,23 +29,24 @@ const uint32_t FULL_ROUNDS_DEFAULT = 4;
|
||||
|
||||
// TO-DO: for now, the constants are only generated in bls12_381
|
||||
template <typename S>
|
||||
S * load_constants(const uint32_t arity) {
|
||||
unsigned char * constants;
|
||||
switch (arity) {
|
||||
case 2:
|
||||
constants = constants_2;
|
||||
break;
|
||||
case 4:
|
||||
constants = constants_4;
|
||||
break;
|
||||
case 8:
|
||||
constants = constants_8;
|
||||
break;
|
||||
case 11:
|
||||
constants = constants_11;
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument( "unsupported arity" );
|
||||
}
|
||||
return reinterpret_cast< S * >(constants);
|
||||
S* load_constants(const uint32_t arity)
|
||||
{
|
||||
unsigned char* constants;
|
||||
switch (arity) {
|
||||
case 2:
|
||||
constants = constants_2;
|
||||
break;
|
||||
case 4:
|
||||
constants = constants_4;
|
||||
break;
|
||||
case 8:
|
||||
constants = constants_8;
|
||||
break;
|
||||
case 11:
|
||||
constants = constants_11;
|
||||
break;
|
||||
default:
|
||||
throw std::invalid_argument("unsupported arity");
|
||||
}
|
||||
return reinterpret_cast<S*>(constants);
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -1,273 +1,266 @@
|
||||
#include "poseidon.cuh"
|
||||
|
||||
template <typename S>
|
||||
__global__ void prepare_poseidon_states(S * states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config) {
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
int state_number = idx / config.t;
|
||||
if (state_number >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
int element_number = idx % config.t;
|
||||
__global__ void
|
||||
prepare_poseidon_states(S* states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
int state_number = idx / config.t;
|
||||
if (state_number >= number_of_states) { return; }
|
||||
int element_number = idx % config.t;
|
||||
|
||||
S prepared_element;
|
||||
S prepared_element;
|
||||
|
||||
// Domain separation
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
prepared_element = states[state_number * config.t + element_number - 1];
|
||||
}
|
||||
// Domain separation
|
||||
if (element_number == 0) {
|
||||
prepared_element = domain_tag;
|
||||
} else {
|
||||
prepared_element = states[state_number * config.t + element_number - 1];
|
||||
}
|
||||
|
||||
// Add pre-round constant
|
||||
prepared_element = prepared_element + config.round_constants[element_number];
|
||||
// Add pre-round constant
|
||||
prepared_element = prepared_element + config.round_constants[element_number];
|
||||
|
||||
// Store element in state
|
||||
states[idx] = prepared_element;
|
||||
// Store element in state
|
||||
states[idx] = prepared_element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ __forceinline__ S sbox_alpha_five(S element) {
|
||||
S result = S::sqr(element);
|
||||
result = S::sqr(result);
|
||||
return result * element;
|
||||
__device__ __forceinline__ S sbox_alpha_five(S element)
|
||||
{
|
||||
S result = S::sqr(element);
|
||||
result = S::sqr(result);
|
||||
return result * element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S vecs_mul_matrix(S element, S * matrix, int element_number, int vec_number, int size, S * shared_states) {
|
||||
shared_states[threadIdx.x] = element;
|
||||
__syncthreads();
|
||||
__device__ S vecs_mul_matrix(S element, S* matrix, int element_number, int vec_number, int size, S* shared_states)
|
||||
{
|
||||
shared_states[threadIdx.x] = element;
|
||||
__syncthreads();
|
||||
|
||||
element = S::zero();
|
||||
for (int i = 0; i < size; i++) {
|
||||
element = element + (shared_states[vec_number * size + i] * matrix[i * size + element_number]);
|
||||
}
|
||||
__syncthreads();
|
||||
return element;
|
||||
element = S::zero();
|
||||
for (int i = 0; i < size; i++) {
|
||||
element = element + (shared_states[vec_number * size + i] * matrix[i * size + element_number]);
|
||||
}
|
||||
__syncthreads();
|
||||
return element;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S full_round(S element,
|
||||
size_t rc_offset,
|
||||
int local_state_number,
|
||||
int element_number,
|
||||
bool multiply_by_mds,
|
||||
bool add_round_constant,
|
||||
S * shared_states,
|
||||
const PoseidonConfiguration<S> config) {
|
||||
element = sbox_alpha_five(element);
|
||||
if (add_round_constant) {
|
||||
element = element + config.round_constants[rc_offset + element_number];
|
||||
}
|
||||
__device__ S full_round(
|
||||
S element,
|
||||
size_t rc_offset,
|
||||
int local_state_number,
|
||||
int element_number,
|
||||
bool multiply_by_mds,
|
||||
bool add_round_constant,
|
||||
S* shared_states,
|
||||
const PoseidonConfiguration<S> config)
|
||||
{
|
||||
element = sbox_alpha_five(element);
|
||||
if (add_round_constant) { element = element + config.round_constants[rc_offset + element_number]; }
|
||||
|
||||
// Multiply all the states by mds matrix
|
||||
S * matrix = multiply_by_mds ? config.mds_matrix : config.non_sparse_matrix;
|
||||
return vecs_mul_matrix(element, matrix, element_number, local_state_number, config.t, shared_states);
|
||||
// Multiply all the states by mds matrix
|
||||
S* matrix = multiply_by_mds ? config.mds_matrix : config.non_sparse_matrix;
|
||||
return vecs_mul_matrix(element, matrix, element_number, local_state_number, config.t, shared_states);
|
||||
}
|
||||
|
||||
// Execute full rounds
|
||||
template <typename S>
|
||||
__global__ void full_rounds(S * states, size_t number_of_states, size_t rc_offset, bool first_half, const PoseidonConfiguration<S> config) {
|
||||
extern __shared__ S shared_states[];
|
||||
__global__ void full_rounds(
|
||||
S* states, size_t number_of_states, size_t rc_offset, bool first_half, const PoseidonConfiguration<S> config)
|
||||
{
|
||||
extern __shared__ S shared_states[];
|
||||
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
int state_number = idx / config.t;
|
||||
if (state_number >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
int local_state_number = threadIdx.x / config.t;
|
||||
int element_number = idx % config.t;
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
int state_number = idx / config.t;
|
||||
if (state_number >= number_of_states) { return; }
|
||||
int local_state_number = threadIdx.x / config.t;
|
||||
int element_number = idx % config.t;
|
||||
|
||||
for (int i = 0; i < config.full_rounds_half - 1; i++) {
|
||||
states[idx] = full_round(states[idx],
|
||||
rc_offset,
|
||||
local_state_number,
|
||||
element_number,
|
||||
true,
|
||||
true,
|
||||
shared_states,
|
||||
config);
|
||||
rc_offset += config.t;
|
||||
}
|
||||
for (int i = 0; i < config.full_rounds_half - 1; i++) {
|
||||
states[idx] =
|
||||
full_round(states[idx], rc_offset, local_state_number, element_number, true, true, shared_states, config);
|
||||
rc_offset += config.t;
|
||||
}
|
||||
|
||||
states[idx] = full_round(states[idx],
|
||||
rc_offset,
|
||||
local_state_number,
|
||||
element_number,
|
||||
!first_half,
|
||||
first_half,
|
||||
shared_states,
|
||||
config);
|
||||
states[idx] = full_round(
|
||||
states[idx], rc_offset, local_state_number, element_number, !first_half, first_half, shared_states, config);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__device__ S partial_round(S * state,
|
||||
size_t rc_offset,
|
||||
int round_number,
|
||||
const PoseidonConfiguration<S> config) {
|
||||
S element = state[0];
|
||||
element = sbox_alpha_five(element);
|
||||
element = element + config.round_constants[rc_offset];
|
||||
__device__ S partial_round(S* state, size_t rc_offset, int round_number, const PoseidonConfiguration<S> config)
|
||||
{
|
||||
S element = state[0];
|
||||
element = sbox_alpha_five(element);
|
||||
element = element + config.round_constants[rc_offset];
|
||||
|
||||
S * sparse_matrix = &config.sparse_matrices[(config.t * 2 - 1) * round_number];
|
||||
S* sparse_matrix = &config.sparse_matrices[(config.t * 2 - 1) * round_number];
|
||||
|
||||
state[0] = element * sparse_matrix[0];
|
||||
for (int i = 1; i < config.t; i++) {
|
||||
state[0] = state[0] + (state[i] * sparse_matrix[i]);
|
||||
}
|
||||
state[0] = element * sparse_matrix[0];
|
||||
for (int i = 1; i < config.t; i++) {
|
||||
state[0] = state[0] + (state[i] * sparse_matrix[i]);
|
||||
}
|
||||
|
||||
for (int i = 1; i < config.t; i++) {
|
||||
state[i] = state[i] + (element * sparse_matrix[config.t + i - 1]);
|
||||
}
|
||||
for (int i = 1; i < config.t; i++) {
|
||||
state[i] = state[i] + (element * sparse_matrix[config.t + i - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Execute partial rounds
|
||||
template <typename S>
|
||||
__global__ void partial_rounds(S * states, size_t number_of_states, size_t rc_offset, const PoseidonConfiguration<S> config) {
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
__global__ void
|
||||
partial_rounds(S* states, size_t number_of_states, size_t rc_offset, const PoseidonConfiguration<S> config)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
S * state = &states[idx * config.t];
|
||||
S* state = &states[idx * config.t];
|
||||
|
||||
for (int i = 0; i < config.partial_rounds; i++) {
|
||||
partial_round(state, rc_offset, i, config);
|
||||
rc_offset++;
|
||||
}
|
||||
for (int i = 0; i < config.partial_rounds; i++) {
|
||||
partial_round(state, rc_offset, i, config);
|
||||
rc_offset++;
|
||||
}
|
||||
}
|
||||
|
||||
// These function is just doing copy from the states to the output
|
||||
template <typename S>
|
||||
__global__ void get_hash_results(S * states, size_t number_of_states, S * out, int t) {
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) {
|
||||
return;
|
||||
}
|
||||
__global__ void get_hash_results(S* states, size_t number_of_states, S* out, int t)
|
||||
{
|
||||
int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (idx >= number_of_states) { return; }
|
||||
|
||||
out[idx] = states[idx * t + 1];
|
||||
out[idx] = states[idx * t + 1];
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
__host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream) {
|
||||
S * states;
|
||||
__host__ void Poseidon<S>::hash_blocks(const S* inp, size_t blocks, S* out, HashType hash_type, cudaStream_t stream)
|
||||
{
|
||||
S* states;
|
||||
|
||||
// allocate memory for {blocks} states of {t} scalars each
|
||||
if (cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream) != cudaSuccess) {
|
||||
throw std::runtime_error("Failed memory allocation on the device");
|
||||
}
|
||||
// allocate memory for {blocks} states of {t} scalars each
|
||||
if (cudaMallocAsync(&states, blocks * this->t * sizeof(S), stream) != cudaSuccess) {
|
||||
throw std::runtime_error("Failed memory allocation on the device");
|
||||
}
|
||||
|
||||
// This is where the input matrix of size Arity x NumberOfBlocks is
|
||||
// padded and coppied to device in a T x NumberOfBlocks matrix
|
||||
cudaMemcpy2DAsync(states, this->t * sizeof(S), // Device pointer and device pitch
|
||||
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
|
||||
(this->t - 1) * sizeof(S), blocks, // Size of the source matrix (Arity x NumberOfBlocks)
|
||||
cudaMemcpyHostToDevice, stream);
|
||||
// This is where the input matrix of size Arity x NumberOfBlocks is
|
||||
// padded and coppied to device in a T x NumberOfBlocks matrix
|
||||
cudaMemcpy2DAsync(
|
||||
states, this->t * sizeof(S), // Device pointer and device pitch
|
||||
inp, (this->t - 1) * sizeof(S), // Host pointer and pitch
|
||||
(this->t - 1) * sizeof(S), blocks, // Size of the source matrix (Arity x NumberOfBlocks)
|
||||
cudaMemcpyHostToDevice, stream);
|
||||
|
||||
size_t rc_offset = 0;
|
||||
size_t rc_offset = 0;
|
||||
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {t} elements in each state, and {blocks} states total
|
||||
int number_of_threads = (256 / this->t) * this->t;
|
||||
int hashes_per_block = number_of_threads / this->t;
|
||||
int total_number_of_threads = blocks * this->t;
|
||||
int number_of_blocks = total_number_of_threads / number_of_threads +
|
||||
static_cast<bool>(total_number_of_threads % number_of_threads);
|
||||
// The logic behind this is that 1 thread only works on 1 element
|
||||
// We have {t} elements in each state, and {blocks} states total
|
||||
int number_of_threads = (256 / this->t) * this->t;
|
||||
int hashes_per_block = number_of_threads / this->t;
|
||||
int total_number_of_threads = blocks * this->t;
|
||||
int number_of_blocks =
|
||||
total_number_of_threads / number_of_threads + static_cast<bool>(total_number_of_threads % number_of_threads);
|
||||
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
int singlehash_block_size = 128;
|
||||
int number_of_singlehash_blocks = blocks / singlehash_block_size + static_cast<bool>(blocks % singlehash_block_size);
|
||||
// The partial rounds operates on the whole state, so we define
|
||||
// the parallelism params for processing a single hash preimage per thread
|
||||
int singlehash_block_size = 128;
|
||||
int number_of_singlehash_blocks = blocks / singlehash_block_size + static_cast<bool>(blocks % singlehash_block_size);
|
||||
|
||||
// Pick the domain_tag accordinaly
|
||||
S domain_tag;
|
||||
switch (hash_type) {
|
||||
case HashType::ConstInputLen:
|
||||
domain_tag = this->const_input_no_pad_domain_tag;
|
||||
break;
|
||||
// Pick the domain_tag accordinaly
|
||||
S domain_tag;
|
||||
switch (hash_type) {
|
||||
case HashType::ConstInputLen:
|
||||
domain_tag = this->const_input_no_pad_domain_tag;
|
||||
break;
|
||||
|
||||
case HashType::MerkleTree:
|
||||
domain_tag = this->tree_domain_tag;
|
||||
}
|
||||
case HashType::MerkleTree:
|
||||
domain_tag = this->tree_domain_tag;
|
||||
}
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// Domain separation and adding pre-round constants
|
||||
prepare_poseidon_states <<< number_of_blocks, number_of_threads, 0, stream >>> (states, blocks, domain_tag, this->config);
|
||||
rc_offset += this->t;
|
||||
// Domain separation and adding pre-round constants
|
||||
prepare_poseidon_states<<<number_of_blocks, number_of_threads, 0, stream>>>(states, blocks, domain_tag, this->config);
|
||||
rc_offset += this->t;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Domain separation: " << rc_offset << std::endl;
|
||||
//print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Domain separation: " << rc_offset << std::endl;
|
||||
// print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// execute half full rounds
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, true, this->config);
|
||||
rc_offset += this->t * this->config.full_rounds_half;
|
||||
// execute half full rounds
|
||||
full_rounds<<<number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block* this->t, stream>>>(
|
||||
states, blocks, rc_offset, true, this->config);
|
||||
rc_offset += this->t * this->config.full_rounds_half;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
|
||||
// print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
|
||||
// print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// execute partial rounds
|
||||
partial_rounds <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, rc_offset, this->config);
|
||||
rc_offset += this->config.partial_rounds;
|
||||
// execute partial rounds
|
||||
partial_rounds<<<number_of_singlehash_blocks, singlehash_block_size, 0, stream>>>(
|
||||
states, blocks, rc_offset, this->config);
|
||||
rc_offset += this->config.partial_rounds;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
|
||||
//print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
|
||||
// print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// execute half full rounds
|
||||
full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t, stream >>> (states, blocks, rc_offset, false, this->config);
|
||||
// execute half full rounds
|
||||
full_rounds<<<number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block* this->t, stream>>>(
|
||||
states, blocks, rc_offset, false, this->config);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
|
||||
//print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
|
||||
// print_buffer_from_cuda<S>(states, blocks * this->t);
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
start_time = std::chrono::high_resolution_clock::now();
|
||||
#endif
|
||||
|
||||
// get output
|
||||
S * out_device;
|
||||
cudaMalloc(&out_device, blocks * sizeof(S));
|
||||
get_hash_results <<< number_of_singlehash_blocks, singlehash_block_size, 0, stream >>> (states, blocks, out_device, this->config.t);
|
||||
// get output
|
||||
S* out_device;
|
||||
cudaMalloc(&out_device, blocks * sizeof(S));
|
||||
get_hash_results<<<number_of_singlehash_blocks, singlehash_block_size, 0, stream>>>(
|
||||
states, blocks, out_device, this->config.t);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Get hash results" << std::endl;
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
#endif
|
||||
cudaMemcpyAsync(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost, stream);
|
||||
cudaFreeAsync(out_device, stream);
|
||||
cudaFreeAsync(states, stream);
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaStreamSynchronize(stream);
|
||||
std::cout << "Get hash results" << std::endl;
|
||||
end_time = std::chrono::high_resolution_clock::now();
|
||||
elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
|
||||
#endif
|
||||
cudaMemcpyAsync(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost, stream);
|
||||
cudaFreeAsync(out_device, stream);
|
||||
cudaFreeAsync(states, stream);
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceReset();
|
||||
#endif
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
cudaDeviceReset();
|
||||
#endif
|
||||
}
|
||||
@@ -2,19 +2,20 @@
|
||||
#include "constants.cuh"
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <chrono>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
template <typename S>
|
||||
__host__ void print_buffer_from_cuda(S * device_ptr, size_t size, size_t t) {
|
||||
S * buffer = static_cast< S * >(malloc(size * sizeof(S)));
|
||||
__host__ void print_buffer_from_cuda(S* device_ptr, size_t size, size_t t)
|
||||
{
|
||||
S* buffer = static_cast<S*>(malloc(size * sizeof(S)));
|
||||
cudaMemcpy(buffer, device_ptr, size * sizeof(S), cudaMemcpyDeviceToHost);
|
||||
|
||||
std::cout << "Start print" << std::endl;
|
||||
for(int i = 0; i < size / t; i++) {
|
||||
for (int i = 0; i < size / t; i++) {
|
||||
std::cout << "State #" << i << std::endl;
|
||||
for (int j = 0; j < t; j++) {
|
||||
std::cout << buffer[i * t + j] << std::endl;
|
||||
@@ -28,136 +29,129 @@ __host__ void print_buffer_from_cuda(S * device_ptr, size_t size, size_t t) {
|
||||
|
||||
#ifdef DEBUG
|
||||
template <typename S>
|
||||
__device__ void print_scalar(S element, int data) {
|
||||
printf("D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
|
||||
data,
|
||||
threadIdx.x,
|
||||
element.limbs_storage.limbs[0],
|
||||
element.limbs_storage.limbs[1],
|
||||
element.limbs_storage.limbs[2],
|
||||
element.limbs_storage.limbs[3],
|
||||
element.limbs_storage.limbs[4],
|
||||
element.limbs_storage.limbs[5],
|
||||
element.limbs_storage.limbs[6],
|
||||
element.limbs_storage.limbs[7]
|
||||
);
|
||||
__device__ void print_scalar(S element, int data)
|
||||
{
|
||||
printf(
|
||||
"D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n", data, threadIdx.x, element.limbs_storage.limbs[0],
|
||||
element.limbs_storage.limbs[1], element.limbs_storage.limbs[2], element.limbs_storage.limbs[3],
|
||||
element.limbs_storage.limbs[4], element.limbs_storage.limbs[5], element.limbs_storage.limbs[6],
|
||||
element.limbs_storage.limbs[7]);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename S>
|
||||
struct PoseidonConfiguration {
|
||||
uint32_t partial_rounds, full_rounds_half, t;
|
||||
S * round_constants, * mds_matrix, * non_sparse_matrix, *sparse_matrices;
|
||||
uint32_t partial_rounds, full_rounds_half, t;
|
||||
S *round_constants, *mds_matrix, *non_sparse_matrix, *sparse_matrices;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
class Poseidon {
|
||||
public:
|
||||
uint32_t t;
|
||||
PoseidonConfiguration<S> config;
|
||||
class Poseidon
|
||||
{
|
||||
public:
|
||||
uint32_t t;
|
||||
PoseidonConfiguration<S> config;
|
||||
|
||||
enum HashType {
|
||||
ConstInputLen,
|
||||
MerkleTree,
|
||||
};
|
||||
enum HashType {
|
||||
ConstInputLen,
|
||||
MerkleTree,
|
||||
};
|
||||
|
||||
Poseidon(const uint32_t arity, cudaStream_t stream) {
|
||||
t = arity + 1;
|
||||
this->config.t = t;
|
||||
this->stream = stream;
|
||||
Poseidon(const uint32_t arity, cudaStream_t stream)
|
||||
{
|
||||
t = arity + 1;
|
||||
this->config.t = t;
|
||||
this->stream = stream;
|
||||
|
||||
// Pre-calculate domain tags
|
||||
// Domain tags will vary for different applications of Poseidon
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
|
||||
tree_domain_tag = S::from(tree_domain_tag_value);
|
||||
// Pre-calculate domain tags
|
||||
// Domain tags will vary for different applications of Poseidon
|
||||
uint32_t tree_domain_tag_value = 1;
|
||||
tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
|
||||
tree_domain_tag = S::from(tree_domain_tag_value);
|
||||
|
||||
const_input_no_pad_domain_tag = S::one();
|
||||
const_input_no_pad_domain_tag = S::one();
|
||||
|
||||
// TO-DO: implement binary shifts for scalar type
|
||||
// const_input_no_pad_domain_tag = S::one() << 64;
|
||||
// const_input_no_pad_domain_tag *= S::from(arity);
|
||||
// TO-DO: implement binary shifts for scalar type
|
||||
// const_input_no_pad_domain_tag = S::one() << 64;
|
||||
// const_input_no_pad_domain_tag *= S::from(arity);
|
||||
|
||||
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
|
||||
this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
|
||||
this->config.partial_rounds = partial_rounds_number_from_arity(arity);
|
||||
|
||||
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
|
||||
uint32_t mds_matrix_len = t * t;
|
||||
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
|
||||
uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
|
||||
uint32_t mds_matrix_len = t * t;
|
||||
uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
|
||||
|
||||
// All the constants are stored in a single file
|
||||
S * constants = load_constants<S>(arity);
|
||||
// All the constants are stored in a single file
|
||||
S* constants = load_constants<S>(arity);
|
||||
|
||||
S * mds_offset = constants + round_constants_len;
|
||||
S * non_sparse_offset = mds_offset + mds_matrix_len;
|
||||
S * sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
|
||||
S* mds_offset = constants + round_constants_len;
|
||||
S* non_sparse_offset = mds_offset + mds_matrix_len;
|
||||
S* sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
|
||||
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
|
||||
#endif
|
||||
#if !defined(__CUDA_ARCH__) && defined(DEBUG)
|
||||
std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
|
||||
#endif
|
||||
|
||||
// Create streams for copying constants
|
||||
cudaStream_t stream_copy_round_constants, stream_copy_mds_matrix, stream_copy_non_sparse, stream_copy_sparse_matrices;
|
||||
cudaStreamCreate(&stream_copy_round_constants);
|
||||
cudaStreamCreate(&stream_copy_mds_matrix);
|
||||
cudaStreamCreate(&stream_copy_non_sparse);
|
||||
cudaStreamCreate(&stream_copy_sparse_matrices);
|
||||
|
||||
// Create events for copying constants
|
||||
cudaEvent_t event_copied_round_constants, event_copy_mds_matrix, event_copy_non_sparse, event_copy_sparse_matrices;
|
||||
cudaEventCreateWithFlags(&event_copied_round_constants, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_mds_matrix, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_non_sparse, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_sparse_matrices, cudaEventDisableTiming);
|
||||
// Create streams for copying constants
|
||||
cudaStream_t stream_copy_round_constants, stream_copy_mds_matrix, stream_copy_non_sparse,
|
||||
stream_copy_sparse_matrices;
|
||||
cudaStreamCreate(&stream_copy_round_constants);
|
||||
cudaStreamCreate(&stream_copy_mds_matrix);
|
||||
cudaStreamCreate(&stream_copy_non_sparse);
|
||||
cudaStreamCreate(&stream_copy_sparse_matrices);
|
||||
|
||||
// Malloc memory for copying constants
|
||||
cudaMallocAsync(&this->config.round_constants, sizeof(S) * round_constants_len, stream_copy_round_constants);
|
||||
cudaMallocAsync(&this->config.mds_matrix, sizeof(S) * mds_matrix_len, stream_copy_mds_matrix);
|
||||
cudaMallocAsync(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len, stream_copy_non_sparse);
|
||||
cudaMallocAsync(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len, stream_copy_sparse_matrices);
|
||||
// Create events for copying constants
|
||||
cudaEvent_t event_copied_round_constants, event_copy_mds_matrix, event_copy_non_sparse, event_copy_sparse_matrices;
|
||||
cudaEventCreateWithFlags(&event_copied_round_constants, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_mds_matrix, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_non_sparse, cudaEventDisableTiming);
|
||||
cudaEventCreateWithFlags(&event_copy_sparse_matrices, cudaEventDisableTiming);
|
||||
|
||||
// Copy constants
|
||||
cudaMemcpyAsync(this->config.round_constants, constants,
|
||||
sizeof(S) * round_constants_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_round_constants
|
||||
);
|
||||
cudaMemcpyAsync(this->config.mds_matrix, mds_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_mds_matrix
|
||||
);
|
||||
cudaMemcpyAsync(this->config.non_sparse_matrix, non_sparse_offset,
|
||||
sizeof(S) * mds_matrix_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_non_sparse
|
||||
);
|
||||
cudaMemcpyAsync(this->config.sparse_matrices, sparse_matrices_offset,
|
||||
sizeof(S) * sparse_matrices_len,
|
||||
cudaMemcpyHostToDevice, stream_copy_sparse_matrices
|
||||
);
|
||||
// Malloc memory for copying constants
|
||||
cudaMallocAsync(&this->config.round_constants, sizeof(S) * round_constants_len, stream_copy_round_constants);
|
||||
cudaMallocAsync(&this->config.mds_matrix, sizeof(S) * mds_matrix_len, stream_copy_mds_matrix);
|
||||
cudaMallocAsync(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len, stream_copy_non_sparse);
|
||||
cudaMallocAsync(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len, stream_copy_sparse_matrices);
|
||||
|
||||
// Record finished copying event for streams
|
||||
cudaEventRecord(event_copied_round_constants, stream_copy_round_constants);
|
||||
cudaEventRecord(event_copy_mds_matrix, stream_copy_mds_matrix);
|
||||
cudaEventRecord(event_copy_non_sparse, stream_copy_non_sparse);
|
||||
cudaEventRecord(event_copy_sparse_matrices, stream_copy_sparse_matrices);
|
||||
// Copy constants
|
||||
cudaMemcpyAsync(
|
||||
this->config.round_constants, constants, sizeof(S) * round_constants_len, cudaMemcpyHostToDevice,
|
||||
stream_copy_round_constants);
|
||||
cudaMemcpyAsync(
|
||||
this->config.mds_matrix, mds_offset, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice, stream_copy_mds_matrix);
|
||||
cudaMemcpyAsync(
|
||||
this->config.non_sparse_matrix, non_sparse_offset, sizeof(S) * mds_matrix_len, cudaMemcpyHostToDevice,
|
||||
stream_copy_non_sparse);
|
||||
cudaMemcpyAsync(
|
||||
this->config.sparse_matrices, sparse_matrices_offset, sizeof(S) * sparse_matrices_len, cudaMemcpyHostToDevice,
|
||||
stream_copy_sparse_matrices);
|
||||
|
||||
// Main stream waits for copying to finish
|
||||
cudaStreamWaitEvent(stream, event_copied_round_constants);
|
||||
cudaStreamWaitEvent(stream, event_copy_mds_matrix);
|
||||
cudaStreamWaitEvent(stream, event_copy_non_sparse);
|
||||
cudaStreamWaitEvent(stream, event_copy_sparse_matrices);
|
||||
}
|
||||
// Record finished copying event for streams
|
||||
cudaEventRecord(event_copied_round_constants, stream_copy_round_constants);
|
||||
cudaEventRecord(event_copy_mds_matrix, stream_copy_mds_matrix);
|
||||
cudaEventRecord(event_copy_non_sparse, stream_copy_non_sparse);
|
||||
cudaEventRecord(event_copy_sparse_matrices, stream_copy_sparse_matrices);
|
||||
|
||||
~Poseidon() {
|
||||
cudaFreeAsync(this->config.round_constants, this->stream);
|
||||
cudaFreeAsync(this->config.mds_matrix, this->stream);
|
||||
cudaFreeAsync(this->config.non_sparse_matrix, this->stream);
|
||||
cudaFreeAsync(this->config.sparse_matrices, this->stream);
|
||||
}
|
||||
// Main stream waits for copying to finish
|
||||
cudaStreamWaitEvent(stream, event_copied_round_constants);
|
||||
cudaStreamWaitEvent(stream, event_copy_mds_matrix);
|
||||
cudaStreamWaitEvent(stream, event_copy_non_sparse);
|
||||
cudaStreamWaitEvent(stream, event_copy_sparse_matrices);
|
||||
}
|
||||
|
||||
// Hash multiple preimages in parallel
|
||||
void hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type, cudaStream_t stream);
|
||||
~Poseidon()
|
||||
{
|
||||
cudaFreeAsync(this->config.round_constants, this->stream);
|
||||
cudaFreeAsync(this->config.mds_matrix, this->stream);
|
||||
cudaFreeAsync(this->config.non_sparse_matrix, this->stream);
|
||||
cudaFreeAsync(this->config.sparse_matrices, this->stream);
|
||||
}
|
||||
|
||||
private:
|
||||
S tree_domain_tag, const_input_no_pad_domain_tag;
|
||||
cudaStream_t stream;
|
||||
// Hash multiple preimages in parallel
|
||||
void hash_blocks(const S* inp, size_t blocks, S* out, HashType hash_type, cudaStream_t stream);
|
||||
|
||||
private:
|
||||
S tree_domain_tag, const_input_no_pad_domain_tag;
|
||||
cudaStream_t stream;
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,9 +1,8 @@
|
||||
#ifndef VEC_MULT
|
||||
#define VEC_MULT
|
||||
#pragma once
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#define MAX_THREADS_PER_BLOCK 256
|
||||
|
||||
@@ -13,128 +12,124 @@
|
||||
* @param n size of arr.
|
||||
* @param n_inv scalar of type S (scalar).
|
||||
*/
|
||||
template < typename E, typename S > __global__ void template_normalize_kernel(E * arr, uint32_t n, S scalar) {
|
||||
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid < n) {
|
||||
arr[tid] = scalar * arr[tid];
|
||||
}
|
||||
}
|
||||
template <typename E, typename S>
|
||||
__global__ void template_normalize_kernel(E* arr, uint32_t n, S scalar)
|
||||
{
|
||||
int tid = (blockIdx.x * blockDim.x) + threadIdx.x;
|
||||
if (tid < n) { arr[tid] = scalar * arr[tid]; }
|
||||
}
|
||||
|
||||
// TODO: headers for prototypes and .c .cpp .cu files for implementations
|
||||
template <typename E, typename S>
|
||||
__global__ void vectorModMult(S *scalar_vec, E *element_vec, E *result, size_t n_elments)
|
||||
__global__ void vectorModMult(S* scalar_vec, E* element_vec, E* result, size_t n_elments)
|
||||
{
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (tid < n_elments)
|
||||
{
|
||||
result[tid] = scalar_vec[tid] * element_vec[tid];
|
||||
}
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (tid < n_elments) { result[tid] = scalar_vec[tid] * element_vec[tid]; }
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments, cudaStream_t stream) // TODO: in place so no need for third result vector
|
||||
int vector_mod_mult(S* vec_a, E* vec_b, E* result, size_t n_elments, cudaStream_t stream) // TODO: in place so no need
|
||||
// for third result vector
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
|
||||
int threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
|
||||
int threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
|
||||
// Allocate memory on the device for the input vectors, the output vector, and the modulus
|
||||
S *d_vec_a;
|
||||
E *d_vec_b, *d_result;
|
||||
cudaMallocAsync(&d_vec_a, n_elments * sizeof(S), stream);
|
||||
cudaMallocAsync(&d_vec_b, n_elments * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_result, n_elments * sizeof(E), stream);
|
||||
// Allocate memory on the device for the input vectors, the output vector, and the modulus
|
||||
S* d_vec_a;
|
||||
E *d_vec_b, *d_result;
|
||||
cudaMallocAsync(&d_vec_a, n_elments * sizeof(S), stream);
|
||||
cudaMallocAsync(&d_vec_b, n_elments * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_result, n_elments * sizeof(E), stream);
|
||||
|
||||
// Copy the input vectors and the modulus from the host to the device
|
||||
cudaMemcpyAsync(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
// Copy the input vectors and the modulus from the host to the device
|
||||
cudaMemcpyAsync(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
vectorModMult<<<num_blocks, threads_per_block, 0, stream>>>(d_vec_a, d_vec_b, d_result, n_elments);
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
vectorModMult<<<num_blocks, threads_per_block, 0, stream>>>(d_vec_a, d_vec_b, d_result, n_elments);
|
||||
|
||||
cudaMemcpyAsync(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpyAsync(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost, stream);
|
||||
|
||||
cudaFreeAsync(d_vec_a, stream);
|
||||
cudaFreeAsync(d_vec_b, stream);
|
||||
cudaFreeAsync(d_result, stream);
|
||||
cudaFreeAsync(d_vec_a, stream);
|
||||
cudaFreeAsync(d_vec_b, stream);
|
||||
cudaFreeAsync(d_result, stream);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int vector_mod_mult_device(S *d_vec_a, E *d_vec_b, E *d_result, size_t n_elments) // TODO: in place so no need for third result vector
|
||||
int vector_mod_mult_device(
|
||||
S* d_vec_a, E* d_vec_b, E* d_result, size_t n_elments) // TODO: in place so no need for third result vector
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
|
||||
int threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
|
||||
int threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);
|
||||
return 0;
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
__global__ void batchVectorMult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size)
|
||||
__global__ void batchVectorMult(S* scalar_vec, E* element_vec, unsigned n_scalars, unsigned batch_size)
|
||||
{
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (tid < n_scalars * batch_size)
|
||||
{
|
||||
int scalar_id = tid % n_scalars;
|
||||
element_vec[tid] = scalar_vec[scalar_id] * element_vec[tid];
|
||||
}
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (tid < n_scalars * batch_size) {
|
||||
int scalar_id = tid % n_scalars;
|
||||
element_vec[tid] = scalar_vec[scalar_id] * element_vec[tid];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E, typename S>
|
||||
int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size, cudaStream_t stream)
|
||||
int batch_vector_mult(S* scalar_vec, E* element_vec, unsigned n_scalars, unsigned batch_size, cudaStream_t stream)
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
|
||||
int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(scalar_vec, element_vec, n_scalars, batch_size);
|
||||
return 0;
|
||||
// Set the grid and block dimensions
|
||||
int NUM_THREADS = MAX_THREADS_PER_BLOCK;
|
||||
int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
|
||||
batchVectorMult<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(scalar_vec, element_vec, n_scalars, batch_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
__global__ void matrixVectorMult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
|
||||
__global__ void matrixVectorMult(E* matrix_elements, E* vector_elements, E* result, size_t dim)
|
||||
{
|
||||
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (tid < dim)
|
||||
{
|
||||
result[tid] = E::zero();
|
||||
for (int i = 0; i < dim; i++)
|
||||
result[tid] = result[tid] + matrix_elements[tid * dim + i] * vector_elements[i];
|
||||
}
|
||||
int tid = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
if (tid < dim) {
|
||||
result[tid] = E::zero();
|
||||
for (int i = 0; i < dim; i++)
|
||||
result[tid] = result[tid] + matrix_elements[tid * dim + i] * vector_elements[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim, cudaStream_t stream)
|
||||
int matrix_mod_mult(E* matrix_elements, E* vector_elements, E* result, size_t dim, cudaStream_t stream)
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
|
||||
int threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
// Set the grid and block dimensions
|
||||
int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
|
||||
int threads_per_block = MAX_THREADS_PER_BLOCK;
|
||||
|
||||
// Allocate memory on the device for the input vectors, the output vector, and the modulus
|
||||
E *d_matrix, *d_vector, *d_result;
|
||||
cudaMallocAsync(&d_matrix, (dim * dim) * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_vector, dim * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_result, dim * sizeof(E), stream);
|
||||
// Allocate memory on the device for the input vectors, the output vector, and the modulus
|
||||
E *d_matrix, *d_vector, *d_result;
|
||||
cudaMallocAsync(&d_matrix, (dim * dim) * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_vector, dim * sizeof(E), stream);
|
||||
cudaMallocAsync(&d_result, dim * sizeof(E), stream);
|
||||
|
||||
// Copy the input vectors and the modulus from the host to the device
|
||||
cudaMemcpyAsync(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
// Copy the input vectors and the modulus from the host to the device
|
||||
cudaMemcpyAsync(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
cudaMemcpyAsync(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice, stream);
|
||||
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
matrixVectorMult<<<num_blocks, threads_per_block, 0, stream>>>(d_matrix, d_vector, d_result, dim);
|
||||
// Call the kernel to perform element-wise modular multiplication
|
||||
matrixVectorMult<<<num_blocks, threads_per_block, 0, stream>>>(d_matrix, d_vector, d_result, dim);
|
||||
|
||||
cudaMemcpyAsync(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost, stream);
|
||||
cudaMemcpyAsync(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost, stream);
|
||||
|
||||
cudaFreeAsync(d_matrix, stream);
|
||||
cudaFreeAsync(d_vector, stream);
|
||||
cudaFreeAsync(d_result, stream);
|
||||
cudaFreeAsync(d_matrix, stream);
|
||||
cudaFreeAsync(d_vector, stream);
|
||||
cudaFreeAsync(d_result, stream);
|
||||
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
@@ -9,17 +9,17 @@
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BLS12_377 {
|
||||
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_377::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{PARAMS_BLS12_377::weierstrass_b};
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
|
||||
point_field_t{PARAMS_BLS12_377::weierstrass_b_g2_re}, point_field_t{PARAMS_BLS12_377::weierstrass_b_g2_im}};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
} // namespace BLS12_377
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,186 +1,216 @@
|
||||
#ifndef _BLS12_377_MSM
|
||||
#define _BLS12_377_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_cuda_bls12_377(
|
||||
BLS12_377::projective_t* out,
|
||||
BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_batch_cuda_bls12_377(
|
||||
BLS12_377::projective_t* out,
|
||||
BLS12_377::affine_t points[],
|
||||
BLS12_377::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_cuda_bls12_377(
|
||||
BLS12_377::projective_t* d_out,
|
||||
BLS12_377::scalar_t* d_scalars,
|
||||
BLS12_377::affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_batch_cuda_bls12_377(
|
||||
BLS12_377::projective_t* d_out,
|
||||
BLS12_377::scalar_t* d_scalars,
|
||||
BLS12_377::affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C"
|
||||
int msm_g2_cuda_bls12_377(BLS12_377::g2_projective_t *out, BLS12_377::g2_affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_g2_cuda_bls12_377(
|
||||
BLS12_377::g2_projective_t* out,
|
||||
BLS12_377::g2_affine_t points[],
|
||||
BLS12_377::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::g2_projective_t, BLS12_377::g2_affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::g2_projective_t, BLS12_377::g2_affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_g2_cuda_bls12_377(BLS12_377::g2_projective_t* out, BLS12_377::g2_affine_t points[],
|
||||
BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_batch_g2_cuda_bls12_377(
|
||||
BLS12_377::g2_projective_t* out,
|
||||
BLS12_377::g2_affine_t points[],
|
||||
BLS12_377::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_377::scalar_t, BLS12_377::g2_projective_t, BLS12_377::g2_affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_377::scalar_t, BLS12_377::g2_projective_t, BLS12_377::g2_affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM in G2 group.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points G2 affine points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_g2_cuda_bls12_377(BLS12_377::g2_projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_g2_cuda_bls12_377(
|
||||
BLS12_377::g2_projective_t* d_out,
|
||||
BLS12_377::scalar_t* d_scalars,
|
||||
BLS12_377::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_g2_cuda_bls12_377(BLS12_377::g2_projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for
|
||||
* each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C" int commit_batch_g2_cuda_bls12_377(
|
||||
BLS12_377::g2_projective_t* d_out,
|
||||
BLS12_377::scalar_t* d_scalars,
|
||||
BLS12_377::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -1,184 +1,329 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
|
||||
namespace PARAMS_BLS12_377{
|
||||
struct fp_config{
|
||||
namespace PARAMS_BLS12_377 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd, 0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb, 0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3, 0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7, 0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f, 0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
|
||||
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
|
||||
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3,
|
||||
0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7,
|
||||
0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f,
|
||||
0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
static constexpr unsigned modulus_bit_count = 253;
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b, 0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f, 0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95, 0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b,
|
||||
0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f,
|
||||
0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95,
|
||||
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
|
||||
|
||||
static constexpr storage<limbs_count> omega1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega4= {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega5= {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291, 0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
|
||||
static constexpr storage<limbs_count> omega6= {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34, 0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
|
||||
static constexpr storage<limbs_count> omega7= {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1, 0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
|
||||
static constexpr storage<limbs_count> omega8= {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111, 0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
|
||||
static constexpr storage<limbs_count> omega9= {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244, 0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
|
||||
static constexpr storage<limbs_count> omega10= {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4, 0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
|
||||
static constexpr storage<limbs_count> omega11= {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645, 0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
|
||||
static constexpr storage<limbs_count> omega12= {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb, 0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
|
||||
static constexpr storage<limbs_count> omega13= {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc, 0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
|
||||
static constexpr storage<limbs_count> omega14= {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f, 0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
|
||||
static constexpr storage<limbs_count> omega15= {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557, 0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
|
||||
static constexpr storage<limbs_count> omega16= {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112, 0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
|
||||
static constexpr storage<limbs_count> omega17= {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4, 0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega18= {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe, 0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega19= {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee, 0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega20= {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed, 0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega21= {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba, 0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega22= {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2, 0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega23= {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da, 0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega24= {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3, 0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega25= {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804, 0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega26= {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374, 0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega27= {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267, 0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega28= {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae, 0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega29= {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d, 0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega30= {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e, 0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31= {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c, 0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32= {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc, 0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
static constexpr storage<limbs_count> omega1 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega2 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3 = {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega4 = {0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765,
|
||||
0x970dec00, 0x23ed1347, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega5 = {0x0405f600, 0xfa8e7081, 0xf8a89660, 0x38b1c291,
|
||||
0x6bda5fce, 0xefab9005, 0x92a3c754, 0x0b6b0756};
|
||||
static constexpr storage<limbs_count> omega6 = {0xaf0a50c8, 0xc5b2c78e, 0x4636deb3, 0x72e32a34,
|
||||
0xb6f97778, 0x3d775d15, 0x2b16be6e, 0x0c4c070d};
|
||||
static constexpr storage<limbs_count> omega7 = {0x7a1ade2c, 0x3f5a4e73, 0x0120d1db, 0x71e5bca1,
|
||||
0x3b2866fd, 0xbcb44162, 0x89c38db1, 0x06ed1a90};
|
||||
static constexpr storage<limbs_count> omega8 = {0xbd2cd25e, 0x61c5510e, 0x2b0d531c, 0xe2d70111,
|
||||
0x94c3bd4b, 0x738f9894, 0x53182695, 0x0b1e0f1d};
|
||||
static constexpr storage<limbs_count> omega9 = {0x8cb9508c, 0xcfb2f75e, 0xf491e401, 0x4c14f244,
|
||||
0x23c16afb, 0xc8f5265f, 0x70f3ff2a, 0x0cda7e27};
|
||||
static constexpr storage<limbs_count> omega10 = {0x0bdc32ee, 0xca77feb9, 0xd957f5a9, 0xf36ddfd4,
|
||||
0x61ba14c4, 0x491c58f5, 0x93e8f339, 0x0618d3c9};
|
||||
static constexpr storage<limbs_count> omega11 = {0x2d89d82f, 0x68c3242e, 0x832a3729, 0xf9559645,
|
||||
0xbceb62cc, 0x5c803c5e, 0x99ffa2f8, 0x1177cf5d};
|
||||
static constexpr storage<limbs_count> omega12 = {0x6932851a, 0xb6ed40f2, 0x1e0da12e, 0x79cbe7fb,
|
||||
0x2a7d8f87, 0x8d408575, 0x7505d049, 0x11867341};
|
||||
static constexpr storage<limbs_count> omega13 = {0x07146cbf, 0x8cf7d87a, 0x109c4d23, 0x14ac37dc,
|
||||
0x883e9660, 0x082d15f0, 0xad9ea9b8, 0x003719b1};
|
||||
static constexpr storage<limbs_count> omega14 = {0xfd0aee77, 0x2260e0dd, 0x1e33b6db, 0xc0cbbc3f,
|
||||
0xfe7e1b36, 0xc8bf6747, 0x4cb802c1, 0x129e4fd5};
|
||||
static constexpr storage<limbs_count> omega15 = {0x8ac75741, 0x22f6fca2, 0xdd37b519, 0x8101b557,
|
||||
0x1036226a, 0xf493bb8a, 0xfce05c2c, 0x06dbad6c};
|
||||
static constexpr storage<limbs_count> omega16 = {0x56733f8b, 0x7d246c24, 0xff70b46a, 0xbc3c4112,
|
||||
0x6f13530b, 0x2c159b40, 0xc55d287b, 0x0c13137a};
|
||||
static constexpr storage<limbs_count> omega17 = {0xec8af73d, 0x8d24de3c, 0xcf722b45, 0x50f778d4,
|
||||
0x15bc7dd7, 0xf4506bc3, 0xf94a16e1, 0x0e43ba91};
|
||||
static constexpr storage<limbs_count> omega18 = {0xd4405b8f, 0x0baa7b44, 0xee0f1394, 0xf8f3c7fe,
|
||||
0xef0dfe6d, 0x46b153c0, 0x2dde6b95, 0x0ea2bcd9};
|
||||
static constexpr storage<limbs_count> omega19 = {0x3d1fa34e, 0x5f4dc975, 0x15af81db, 0xc28e54ee,
|
||||
0x04947d99, 0x83d9a55f, 0x54a2b488, 0x08ec7ccf};
|
||||
static constexpr storage<limbs_count> omega20 = {0x0cac0ee8, 0x0d8fa7b3, 0x82ef38e4, 0x756284ed,
|
||||
0xac8f90d2, 0x7014b194, 0x634e5d50, 0x092488f8};
|
||||
static constexpr storage<limbs_count> omega21 = {0x6d34ed69, 0xd85399bf, 0x09e49cef, 0x4d9012ba,
|
||||
0xca00ae5d, 0x020142ee, 0x3bdfebfd, 0x12772e57};
|
||||
static constexpr storage<limbs_count> omega22 = {0x2eb41723, 0x676c8fc7, 0x5dd895bd, 0xe20380e2,
|
||||
0x9bf22dde, 0x09dc8be8, 0x42638176, 0x12822f94};
|
||||
static constexpr storage<limbs_count> omega23 = {0x81a6d2de, 0x1f1df770, 0xcf29c812, 0x5d33b2da,
|
||||
0x134f0e7e, 0x1bf162de, 0x1e2877a8, 0x045162c4};
|
||||
static constexpr storage<limbs_count> omega24 = {0xfecda1b6, 0x24f4503b, 0xded67d3c, 0x0e5d7ed3,
|
||||
0x40cf20af, 0x2b7b7e5e, 0x4faad6af, 0x0d472650};
|
||||
static constexpr storage<limbs_count> omega25 = {0x584b9eb1, 0xcc6c474c, 0x15a8d886, 0x47670804,
|
||||
0xbb8654c5, 0x07736d2f, 0xeb207a4b, 0x0d14ce7a};
|
||||
static constexpr storage<limbs_count> omega26 = {0xed25924a, 0xd1c6471c, 0x6bc312c3, 0xd98bb374,
|
||||
0xfeae1a41, 0x50be0848, 0x3265c719, 0x04b07dea};
|
||||
static constexpr storage<limbs_count> omega27 = {0x618241e3, 0xab13f73e, 0x166ca902, 0x571c9267,
|
||||
0x5e828a6d, 0x8586443a, 0x6daba50b, 0x093fdf2f};
|
||||
static constexpr storage<limbs_count> omega28 = {0xee11c34f, 0xe688e66b, 0xeacecf5a, 0xdc232eae,
|
||||
0xb95ae685, 0x4fc35094, 0x7c1d31dc, 0x0273b5bd};
|
||||
static constexpr storage<limbs_count> omega29 = {0x1a9057bd, 0x8a8a5a77, 0x41834fbb, 0xdcbfae1d,
|
||||
0xb34ede6e, 0x534f5b97, 0xb78bbd3e, 0x07313ac5};
|
||||
static constexpr storage<limbs_count> omega30 = {0x2be70731, 0x287abbb1, 0x7c35c5aa, 0x5cbcfd1e,
|
||||
0x1671f4df, 0x7585b3fe, 0xb899c011, 0x08350ecf};
|
||||
static constexpr storage<limbs_count> omega31 = {0x09f7c5e2, 0x3400c14e, 0x0a649ea1, 0xc112e60c,
|
||||
0x067ce95e, 0xf7510758, 0xf9daf17c, 0x040a66a5};
|
||||
static constexpr storage<limbs_count> omega32 = {0x43efecd3, 0x89d65957, 0x3bd6c318, 0x29246adc,
|
||||
0xce01533c, 0xf9fb5ef6, 0x849078c3, 0x020410e4};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
|
||||
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
|
||||
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
|
||||
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
|
||||
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8, omega9, omega10, omega11,
|
||||
omega12, omega13, omega14, omega15, omega16, omega17, omega18, omega19, omega20, omega21, omega22,
|
||||
omega23, omega24, omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
|
||||
};
|
||||
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63, 0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f, 0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6, 0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd, 0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b, 0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a, 0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69, 0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c, 0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3, 0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2, 0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3, 0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016, 0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571, 0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894, 0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c, 0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec, 0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc, 0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024, 0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7, 0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db, 0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3, 0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e, 0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1, 0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697, 0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf, 0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f, 0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8, 0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314, 0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
|
||||
|
||||
static constexpr storage<limbs_count> omega_inv1 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv2 = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega_inv3 = {0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv4 = {0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99,
|
||||
0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> omega_inv5 = {0x29f04fbb, 0x401766f3, 0x0a4b98b2, 0x7e4e5f63,
|
||||
0x9fbc28da, 0x35887f12, 0xdabe3b97, 0x045cb225};
|
||||
static constexpr storage<limbs_count> omega_inv6 = {0xac4ce534, 0xf3883827, 0x7c4940f0, 0x9f9a114f,
|
||||
0x32cc3182, 0xe48527ee, 0x2877f4c2, 0x02d4450c};
|
||||
static constexpr storage<limbs_count> omega_inv7 = {0x4afbf0bb, 0xd2533833, 0x1d646d56, 0x20987ba6,
|
||||
0xb8ae7d61, 0xf2c34c11, 0xb53ae995, 0x09962e74};
|
||||
static constexpr storage<limbs_count> omega_inv8 = {0x34f5271a, 0xd6aeb755, 0x493bb125, 0xc0e24cfd,
|
||||
0x35cf1879, 0xc9d2a1ad, 0x19000e58, 0x0f3570fa};
|
||||
static constexpr storage<limbs_count> omega_inv9 = {0xbec3ee61, 0x2601423e, 0xb5252af1, 0x94f5ab4b,
|
||||
0x205d09ca, 0xa1184628, 0x82a1fba2, 0x0e305e1e};
|
||||
static constexpr storage<limbs_count> omega_inv10 = {0x7e3320f2, 0x3cbad3a7, 0x4269c624, 0x7866653a,
|
||||
0xa2fc13a2, 0xaf6d742d, 0xfe24db2a, 0x03ed8246};
|
||||
static constexpr storage<limbs_count> omega_inv11 = {0x30cff7d3, 0xcb6ab09e, 0xd88db7e6, 0x29949e69,
|
||||
0x24db3cd4, 0xb9117dc6, 0xca8d11b5, 0x01b2aadd};
|
||||
static constexpr storage<limbs_count> omega_inv12 = {0x433b851c, 0x1c8fbc5d, 0x545e622f, 0x0ccc3b8c,
|
||||
0x5c624e0f, 0x0fba9df2, 0x0496ddf9, 0x02d54c5d};
|
||||
static constexpr storage<limbs_count> omega_inv13 = {0x0a176838, 0x2ddbbfdd, 0xc4c77f0f, 0xb7a1e4f3,
|
||||
0x41cad032, 0x645b4383, 0xbfb123c4, 0x0f3fe2e3};
|
||||
static constexpr storage<limbs_count> omega_inv14 = {0x9ff30538, 0x1d6d50fe, 0x8576b6fa, 0xca07f2d2,
|
||||
0x720da6d2, 0x587839fa, 0xe9ebd753, 0x0038d5aa};
|
||||
static constexpr storage<limbs_count> omega_inv15 = {0x8e30fb24, 0xaeac713d, 0x21906459, 0xd004e9e3,
|
||||
0xa60b0a33, 0x2fc54303, 0x14e545a6, 0x039063f8};
|
||||
static constexpr storage<limbs_count> omega_inv16 = {0x74d36c47, 0x112559bd, 0x4154b77a, 0x87db7016,
|
||||
0x3843df80, 0x9e779ae5, 0x297077d0, 0x024424f2};
|
||||
static constexpr storage<limbs_count> omega_inv17 = {0x65953c15, 0xd649ae5e, 0x56accc60, 0x879fe571,
|
||||
0xa3ba1e39, 0xba914f52, 0xd6ea78a2, 0x01b74920};
|
||||
static constexpr storage<limbs_count> omega_inv18 = {0x3d8a82b4, 0x319dea45, 0x8fc703de, 0x49468894,
|
||||
0xc6b00817, 0x703f710f, 0xe862bc53, 0x007762fd};
|
||||
static constexpr storage<limbs_count> omega_inv19 = {0x5bae083f, 0x4f433336, 0x27612fe3, 0x485e079c,
|
||||
0x7f8f0a07, 0xf83b6572, 0xca91a4d4, 0x06bdcaaf};
|
||||
static constexpr storage<limbs_count> omega_inv20 = {0xb2fb63eb, 0x4a0bf5e7, 0x996004d9, 0x6f64f8ec,
|
||||
0x67519c5e, 0x0fecd781, 0x1cab2760, 0x04475eb3};
|
||||
static constexpr storage<limbs_count> omega_inv21 = {0xcd83d14f, 0xadbd6ce4, 0x750b194a, 0xc664d3bc,
|
||||
0x89c9f437, 0x3034dfed, 0xcc2e643b, 0x03d502b8};
|
||||
static constexpr storage<limbs_count> omega_inv22 = {0x2272320b, 0xf89478a9, 0xd2e658b7, 0x3adac024,
|
||||
0x94b25831, 0xf38d840f, 0x37dc6c4c, 0x04540b1f};
|
||||
static constexpr storage<limbs_count> omega_inv23 = {0xa6d411fe, 0x19d969b1, 0xf544a648, 0x973f00f7,
|
||||
0xc9ed9f93, 0xb18f166c, 0xe7f21124, 0x02fba68e};
|
||||
static constexpr storage<limbs_count> omega_inv24 = {0x94921227, 0x78b96b20, 0x23b35b65, 0x07cd90db,
|
||||
0xc843f1c3, 0x111f4fd9, 0xff729f23, 0x0ec4b820};
|
||||
static constexpr storage<limbs_count> omega_inv25 = {0x4879d823, 0x53eb200b, 0x93095f4a, 0x1971fac3,
|
||||
0x86989a58, 0x8467ffe6, 0x306ed29d, 0x0af20231};
|
||||
static constexpr storage<limbs_count> omega_inv26 = {0xd4793454, 0x71c907bd, 0x7700defb, 0xc11aa47e,
|
||||
0xbac11769, 0xf03e0873, 0x97419136, 0x0353190d};
|
||||
static constexpr storage<limbs_count> omega_inv27 = {0xa81a701c, 0x61a3deb6, 0x91bbbecf, 0xd8a4eda1,
|
||||
0x6feb65df, 0x3f5339b1, 0x8b5421f2, 0x108adc5b};
|
||||
static constexpr storage<limbs_count> omega_inv28 = {0xe7bf5a41, 0x7d6c573a, 0xfa83b1f7, 0x8038b697,
|
||||
0xa6718ce9, 0x2a988bee, 0x1239b708, 0x0846f362};
|
||||
static constexpr storage<limbs_count> omega_inv29 = {0xe3373548, 0x89a068a4, 0x78a6c4e5, 0xf31284cf,
|
||||
0x6e9396d6, 0x9eed5c8d, 0x7e4342f9, 0x01643c65};
|
||||
static constexpr storage<limbs_count> omega_inv30 = {0x123a81f6, 0xc03a3272, 0x115b15e8, 0x377e6d2f,
|
||||
0x2d6d7206, 0xed5575e4, 0x714004f2, 0x0b1e37e4};
|
||||
static constexpr storage<limbs_count> omega_inv31 = {0xdde8ffc5, 0x62a29589, 0x618c5d62, 0xfb6716e8,
|
||||
0x88d61f25, 0x787e561c, 0xd2b21c7e, 0x0e351761};
|
||||
static constexpr storage<limbs_count> omega_inv32 = {0x7aca7fbe, 0xc9fea0e9, 0xb41a8854, 0x965ff314,
|
||||
0x810eea7e, 0x743415d4, 0x8275bbd1, 0x0431c01b};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
|
||||
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
|
||||
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
|
||||
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
|
||||
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
|
||||
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
|
||||
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
|
||||
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
|
||||
};
|
||||
|
||||
static constexpr storage<limbs_count> inv1= {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
|
||||
static constexpr storage<limbs_count> inv2= {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
|
||||
static constexpr storage<limbs_count> inv3= {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
|
||||
static constexpr storage<limbs_count> inv4= {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
|
||||
static constexpr storage<limbs_count> inv5= {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
|
||||
static constexpr storage<limbs_count> inv6= {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
|
||||
static constexpr storage<limbs_count> inv7= {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
|
||||
static constexpr storage<limbs_count> inv8= {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
|
||||
static constexpr storage<limbs_count> inv9= {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
|
||||
static constexpr storage<limbs_count> inv10= {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
|
||||
static constexpr storage<limbs_count> inv11= {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
|
||||
static constexpr storage<limbs_count> inv12= {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
|
||||
static constexpr storage<limbs_count> inv13= {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
|
||||
static constexpr storage<limbs_count> inv14= {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
|
||||
static constexpr storage<limbs_count> inv15= {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
|
||||
static constexpr storage<limbs_count> inv16= {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
|
||||
static constexpr storage<limbs_count> inv17= {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
|
||||
static constexpr storage<limbs_count> inv18= {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
|
||||
static constexpr storage<limbs_count> inv19= {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
|
||||
static constexpr storage<limbs_count> inv20= {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
|
||||
static constexpr storage<limbs_count> inv21= {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
|
||||
static constexpr storage<limbs_count> inv22= {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
|
||||
static constexpr storage<limbs_count> inv23= {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
|
||||
static constexpr storage<limbs_count> inv24= {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
|
||||
static constexpr storage<limbs_count> inv25= {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
|
||||
static constexpr storage<limbs_count> inv26= {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
|
||||
static constexpr storage<limbs_count> inv27= {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
|
||||
static constexpr storage<limbs_count> inv28= {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
|
||||
static constexpr storage<limbs_count> inv29= {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv30= {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv31= {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv32= {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv1 = {0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f,
|
||||
0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af};
|
||||
static constexpr storage<limbs_count> inv2 = {0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f,
|
||||
0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06};
|
||||
static constexpr storage<limbs_count> inv3 = {0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e,
|
||||
0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2};
|
||||
static constexpr storage<limbs_count> inv4 = {0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e,
|
||||
0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08};
|
||||
static constexpr storage<limbs_count> inv5 = {0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346,
|
||||
0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33};
|
||||
static constexpr storage<limbs_count> inv6 = {0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22,
|
||||
0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9};
|
||||
static constexpr storage<limbs_count> inv7 = {0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210,
|
||||
0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93};
|
||||
static constexpr storage<limbs_count> inv8 = {0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87,
|
||||
0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9};
|
||||
static constexpr storage<limbs_count> inv9 = {0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3,
|
||||
0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab};
|
||||
static constexpr storage<limbs_count> inv10 = {0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61,
|
||||
0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85};
|
||||
static constexpr storage<limbs_count> inv11 = {0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af,
|
||||
0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1};
|
||||
static constexpr storage<limbs_count> inv12 = {0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57,
|
||||
0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8};
|
||||
static constexpr storage<limbs_count> inv13 = {0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab,
|
||||
0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003};
|
||||
static constexpr storage<limbs_count> inv14 = {0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054,
|
||||
0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1};
|
||||
static constexpr storage<limbs_count> inv15 = {0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9,
|
||||
0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007};
|
||||
static constexpr storage<limbs_count> inv16 = {0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54,
|
||||
0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3};
|
||||
static constexpr storage<limbs_count> inv17 = {0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29,
|
||||
0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08};
|
||||
static constexpr storage<limbs_count> inv18 = {0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094,
|
||||
0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3};
|
||||
static constexpr storage<limbs_count> inv19 = {0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9,
|
||||
0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309};
|
||||
static constexpr storage<limbs_count> inv20 = {0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164,
|
||||
0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433};
|
||||
static constexpr storage<limbs_count> inv21 = {0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31,
|
||||
0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9};
|
||||
static constexpr storage<limbs_count> inv22 = {0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198,
|
||||
0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513};
|
||||
static constexpr storage<limbs_count> inv23 = {0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b,
|
||||
0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539};
|
||||
static constexpr storage<limbs_count> inv24 = {0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5,
|
||||
0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b};
|
||||
static constexpr storage<limbs_count> inv25 = {0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651,
|
||||
0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555};
|
||||
static constexpr storage<limbs_count> inv26 = {0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8,
|
||||
0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559};
|
||||
static constexpr storage<limbs_count> inv27 = {0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3,
|
||||
0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c};
|
||||
static constexpr storage<limbs_count> inv28 = {0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9,
|
||||
0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d};
|
||||
static constexpr storage<limbs_count> inv29 = {0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4,
|
||||
0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv30 = {0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9,
|
||||
0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv31 = {0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc,
|
||||
0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> inv32 = {0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd,
|
||||
0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
|
||||
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
|
||||
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
|
||||
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
|
||||
};
|
||||
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8, inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
|
||||
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24, inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
|
||||
};
|
||||
};
|
||||
|
||||
struct fq_config{
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88, 0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6, 0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510, 0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc, 0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3, 0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2, 0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af, 0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4, 0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f, 0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48, 0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be, 0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x8508c000, 0x30000000, 0x170b5d44,
|
||||
0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x0a118000, 0x60000001, 0x2e16ba88,
|
||||
0x74129000, 0x3de6c45f, 0x01ea271e, 0x3445b3e6,
|
||||
0xd9429276, 0x8c760b80, 0x2f8a21d5, 0x035c748c};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x14230000, 0xc0000002, 0x5c2d7510,
|
||||
0xe8252000, 0x7bcd88be, 0x03d44e3c, 0x688b67cc,
|
||||
0xb28524ec, 0x18ec1701, 0x5f1443ab, 0x06b8e918};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x8508c000, 0x30000000, 0x170b5d44, 0xba094800, 0x1ef3622f, 0x00f5138f, 0x1a22d9f3,
|
||||
0x6ca1493b, 0xc63b05c0, 0x17c510ea, 0x01ae3a46, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x0a118000, 0xf0000001, 0x7338d254, 0x2e1bd800, 0x4ada268f, 0x35f1c09a, 0x6bcbfbd2,
|
||||
0x58638c9d, 0x318324b9, 0x8bb70ae0, 0x460aaaaa, 0x502a4d6c, 0xc014e712, 0xb90660cd, 0x09d018af,
|
||||
0x3dda4d5c, 0x1f5e7141, 0xa4aee93f, 0x4bb8b87d, 0xb361263c, 0x2256913b, 0xd0bbaffb, 0x0002d307};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x14230000, 0xe0000002, 0xe671a4a9, 0x5c37b000, 0x95b44d1e, 0x6be38134, 0xd797f7a4,
|
||||
0xb0c7193a, 0x63064972, 0x176e15c0, 0x8c155555, 0xa0549ad8, 0x8029ce24, 0x720cc19b, 0x13a0315f,
|
||||
0x7bb49ab8, 0x3ebce282, 0x495dd27e, 0x977170fb, 0x66c24c78, 0x44ad2277, 0xa1775ff6, 0x0005a60f};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x28460000, 0xc0000004, 0xcce34953, 0xb86f6001, 0x2b689a3c, 0xd7c70269, 0xaf2fef48,
|
||||
0x618e3275, 0xc60c92e5, 0x2edc2b80, 0x182aaaaa, 0x40a935b1, 0x00539c49, 0xe4198337, 0x274062be,
|
||||
0xf7693570, 0x7d79c504, 0x92bba4fc, 0x2ee2e1f6, 0xcd8498f1, 0x895a44ee, 0x42eebfec, 0x000b4c1f};
|
||||
static constexpr unsigned modulus_bit_count = 377;
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488, 0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffff, 0xf73fffff, 0xffffff7a, 0xf4a2bbcf, 0xf6b7ffe8, 0x0c9dd045, 0x0aec70e1, 0xdd260cff, 0x5eb6c4e5, 0xc4fa3f93, 0x3aef1539, 0x51c5b9e8};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x934f3a1, 0xb0909a28, 0xc1cfac62, 0x3264aa55, 0x2a491ae8, 0xaccd49ca, 0xe80e9a61, 0x28b2dce9, 0x26f7c08a, 0x4d313ea1, 0x36254563, 0x161de1ee};
|
||||
static constexpr storage<limbs_count> m = {0x5e4daffc, 0x1f9fd58c, 0x89c42a59, 0xd0ed6877, 0xd85a6d02, 0x6af2d488,
|
||||
0x6776b1a0, 0x3bbad0de, 0x582ef4f7, 0x976c3ca0, 0x0cc4060e, 0x0261508d};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffff, 0xf73fffff, 0xffffff7a, 0xf4a2bbcf,
|
||||
0xf6b7ffe8, 0x0c9dd045, 0x0aec70e1, 0xdd260cff,
|
||||
0x5eb6c4e5, 0xc4fa3f93, 0x3aef1539, 0x51c5b9e8};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x934f3a1, 0xb0909a28, 0xc1cfac62, 0x3264aa55,
|
||||
0x2a491ae8, 0xaccd49ca, 0xe80e9a61, 0x28b2dce9,
|
||||
0x26f7c08a, 0x4d313ea1, 0x36254563, 0x161de1ee};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 5;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512, 0xbd37cb5c, 0x188282c8,
|
||||
0xaa9d41bb, 0x85951e2c, 0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36, 0x4fb82305, 0x6d182ad4,
|
||||
0xca3e52d9, 0xbd7fb348, 0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52, 0x57db6b9b, 0x7ea501f5,
|
||||
0x203e5031, 0xc565f071, 0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984, 0x0799c9de, 0xe7223ece,
|
||||
0x6651cecb, 0x532777ee, 0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888, 0xf832d204, 0xe458c282,
|
||||
0x74b49a58, 0xde03ed72, 0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f, 0xcee304c2, 0x2463b01a,
|
||||
0x3d591bf1, 0x61ef11ac, 0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512,
|
||||
0xbd37cb5c, 0x188282c8, 0xaa9d41bb, 0x85951e2c,
|
||||
0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36,
|
||||
0x4fb82305, 0x6d182ad4, 0xca3e52d9, 0xbd7fb348,
|
||||
0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52,
|
||||
0x57db6b9b, 0x7ea501f5, 0x203e5031, 0xc565f071,
|
||||
0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984,
|
||||
0x0799c9de, 0xe7223ece, 0x6651cecb, 0x532777ee,
|
||||
0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888,
|
||||
0xf832d204, 0xe458c282, 0x74b49a58, 0xde03ed72,
|
||||
0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f,
|
||||
0xcee304c2, 0x2463b01a, 0x3d591bf1, 0x61ef11ac,
|
||||
0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
|
||||
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
|
||||
}
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
|
||||
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
|
||||
} // namespace PARAMS_BLS12_377
|
||||
|
||||
@@ -1,50 +1,45 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" BLS12_377::projective_t random_projective_bls12_377()
|
||||
{
|
||||
return BLS12_377::projective_t::rand_host();
|
||||
}
|
||||
extern "C" BLS12_377::projective_t random_projective_bls12_377() { return BLS12_377::projective_t::rand_host(); }
|
||||
|
||||
extern "C" BLS12_377::projective_t projective_zero_bls12_377()
|
||||
{
|
||||
return BLS12_377::projective_t::zero();
|
||||
}
|
||||
extern "C" BLS12_377::projective_t projective_zero_bls12_377() { return BLS12_377::projective_t::zero(); }
|
||||
|
||||
extern "C" bool projective_is_on_curve_bls12_377(BLS12_377::projective_t *point1)
|
||||
extern "C" bool projective_is_on_curve_bls12_377(BLS12_377::projective_t* point1)
|
||||
{
|
||||
return BLS12_377::projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
extern "C" BLS12_377::affine_t projective_to_affine_bls12_377(BLS12_377::projective_t *point1)
|
||||
extern "C" BLS12_377::affine_t projective_to_affine_bls12_377(BLS12_377::projective_t* point1)
|
||||
{
|
||||
return BLS12_377::projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" BLS12_377::projective_t projective_from_affine_bls12_377(BLS12_377::affine_t *point1)
|
||||
extern "C" BLS12_377::projective_t projective_from_affine_bls12_377(BLS12_377::affine_t* point1)
|
||||
{
|
||||
return BLS12_377::projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" BLS12_377::scalar_field_t random_scalar_bls12_377()
|
||||
{
|
||||
return BLS12_377::scalar_field_t::rand_host();
|
||||
}
|
||||
extern "C" BLS12_377::scalar_field_t random_scalar_bls12_377() { return BLS12_377::scalar_field_t::rand_host(); }
|
||||
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t* point1, BLS12_377::projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) &&
|
||||
(point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) &&
|
||||
(point2->z == BLS12_377::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t* point1, BLS12_377::g2_projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) &&
|
||||
(point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) &&
|
||||
(point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" BLS12_377::g2_projective_t random_g2_projective_bls12_377()
|
||||
@@ -52,17 +47,17 @@ extern "C" BLS12_377::g2_projective_t random_g2_projective_bls12_377()
|
||||
return BLS12_377::g2_projective_t::rand_host();
|
||||
}
|
||||
|
||||
extern "C" BLS12_377::g2_affine_t g2_projective_to_affine_bls12_377(BLS12_377::g2_projective_t *point1)
|
||||
extern "C" BLS12_377::g2_affine_t g2_projective_to_affine_bls12_377(BLS12_377::g2_projective_t* point1)
|
||||
{
|
||||
return BLS12_377::g2_projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" BLS12_377::g2_projective_t g2_projective_from_affine_bls12_377(BLS12_377::g2_affine_t *point1)
|
||||
extern "C" BLS12_377::g2_projective_t g2_projective_from_affine_bls12_377(BLS12_377::g2_affine_t* point1)
|
||||
{
|
||||
return BLS12_377::g2_projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" bool g2_projective_is_on_curve_bls12_377(BLS12_377::g2_projective_t *point1)
|
||||
extern "C" bool g2_projective_is_on_curve_bls12_377(BLS12_377::g2_projective_t* point1)
|
||||
{
|
||||
return BLS12_377::g2_projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "projective.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
@@ -1,88 +1,78 @@
|
||||
#ifndef _BLS12_377_VEC_MULT
|
||||
#define _BLS12_377_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
|
||||
BLS12_377::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_377(
|
||||
BLS12_377::projective_t* inout,
|
||||
BLS12_377::scalar_t* scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
|
||||
BLS12_377::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_377(
|
||||
BLS12_377::scalar_t* inout,
|
||||
BLS12_377::scalar_t* scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_device_scalar_bls12_377(
|
||||
BLS12_377::scalar_t *inout,
|
||||
BLS12_377::scalar_t *scalar_vec,
|
||||
size_t n_elements,
|
||||
size_t device_id
|
||||
) {
|
||||
BLS12_377::scalar_t* inout, BLS12_377::scalar_t* scalar_vec, size_t n_elements, size_t device_id)
|
||||
{
|
||||
try {
|
||||
vector_mod_mult_device<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elements);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error &ex) {
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_flattened,
|
||||
BLS12_377::scalar_t *input,
|
||||
BLS12_377::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_377(
|
||||
BLS12_377::scalar_t* matrix_flattened,
|
||||
BLS12_377::scalar_t* input,
|
||||
BLS12_377::scalar_t* output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -9,17 +9,17 @@
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BLS12_381 {
|
||||
typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_381::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{PARAMS_BLS12_381::weierstrass_b};
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
|
||||
point_field_t{PARAMS_BLS12_381::weierstrass_b_g2_re}, point_field_t{PARAMS_BLS12_381::weierstrass_b_g2_im}};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
} // namespace BLS12_381
|
||||
@@ -1,523 +1,560 @@
|
||||
#ifndef _BLS12_381_LDE
|
||||
#define _BLS12_381_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../utils/mont.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(
|
||||
uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn), stream);
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn), stream);
|
||||
}
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, Decimation decimation, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ntt_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* arr,
|
||||
uint32_t n,
|
||||
bool inverse,
|
||||
Decimation decimation,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_381::scalar_t, BLS12_381::scalar_t>(
|
||||
arr, n, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, Decimation decimation, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ecntt_cuda_bls12_381(
|
||||
BLS12_381::projective_t* arr,
|
||||
uint32_t n,
|
||||
bool inverse,
|
||||
Decimation decimation,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<BLS12_381::projective_t, BLS12_381::scalar_t>(
|
||||
arr, n, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ntt_batch_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* arr,
|
||||
uint32_t arr_size,
|
||||
uint32_t batch_size,
|
||||
bool inverse,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_381::scalar_t, BLS12_381::scalar_t>(
|
||||
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ecntt_batch_cuda_bls12_381(
|
||||
BLS12_381::projective_t* arr,
|
||||
uint32_t arr_size,
|
||||
uint32_t batch_size,
|
||||
bool inverse,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<BLS12_381::projective_t, BLS12_381::scalar_t>(
|
||||
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int interpolate_scalars_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_evaluations,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_evaluations, BLS12_381::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int interpolate_scalars_batch_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_evaluations,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int interpolate_points_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::projective_t* d_evaluations,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_evaluations, BLS12_381::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int interpolate_points_batch_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::projective_t* d_evaluations,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_scalars_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_scalars_batch_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
auto result_code = evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, 0);
|
||||
cudaStreamDestroy(stream);
|
||||
return result_code;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
auto result_code = evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, 0);
|
||||
cudaStreamDestroy(stream);
|
||||
return result_code;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_points_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::projective_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_points_batch_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::projective_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
auto result_code = evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
cudaStreamDestroy(stream);
|
||||
return result_code;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
auto result_code =
|
||||
evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
cudaStreamDestroy(stream);
|
||||
return result_code;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BLS12_381::scalar_t* coset_powers,
|
||||
unsigned device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_381::scalar_t* coset_powers,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_points_on_coset_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::projective_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
BLS12_381::scalar_t* coset_powers,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream); //TODO: don't create if default was passed, destroy what was created, same applies to all calls
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(
|
||||
&stream); // TODO: don't create if default was passed, destroy what was created, same applies to all calls
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::projective_t* d_coefficients,
|
||||
BLS12_381::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
BLS12_381::scalar_t* coset_powers,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_inplace_batch_cuda_bls12_381(BLS12_381::scalar_t* d_inout, BLS12_381::scalar_t* d_twiddles,
|
||||
unsigned n, unsigned batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int ntt_inplace_batch_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_inout,
|
||||
BLS12_381::scalar_t* d_twiddles,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
bool inverse,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, false, _null, stream, true);
|
||||
return CUDA_SUCCESS; //TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
BLS12_381::scalar_t* _null = nullptr;
|
||||
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, false, _null, stream, true);
|
||||
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int reverse_order_scalars_batch_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int sub_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_in1, BLS12_381::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int sub_scalars_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_in1,
|
||||
BLS12_381::scalar_t* d_in2,
|
||||
unsigned n,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return sub_polys(d_out, d_in1, d_in2, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return sub_polys(d_out, d_in1, d_in2, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int add_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_in1, BLS12_381::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int add_scalars_cuda_bls12_381(
|
||||
BLS12_381::scalar_t* d_out,
|
||||
BLS12_381::scalar_t* d_in1,
|
||||
BLS12_381::scalar_t* d_in2,
|
||||
unsigned n,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return add_polys(d_out, d_in1, d_in2, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return add_polys(d_out, d_in1, d_in2, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery(d_inout, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery(d_inout, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery(d_inout, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery(d_inout, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_proj_points_cuda_bls12_381(BLS12_381::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
to_montgomery_proj_points_cuda_bls12_381(BLS12_381::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 3 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 3 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_proj_points_cuda_bls12_381(BLS12_381::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
from_montgomery_proj_points_cuda_bls12_381(BLS12_381::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 3 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 3 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_aff_points_cuda_bls12_381(BLS12_381::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
to_montgomery_aff_points_cuda_bls12_381(BLS12_381::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 2 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 2 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_aff_points_cuda_bls12_381(BLS12_381::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
from_montgomery_aff_points_cuda_bls12_381(BLS12_381::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 2 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 2 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" int to_montgomery_proj_points_g2_cuda_bls12_381(BLS12_381::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
to_montgomery_proj_points_g2_cuda_bls12_381(BLS12_381::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 6 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 6 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_proj_points_g2_cuda_bls12_381(BLS12_381::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
from_montgomery_proj_points_g2_cuda_bls12_381(BLS12_381::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 6 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 6 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_aff_points_g2_cuda_bls12_381(BLS12_381::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
to_montgomery_aff_points_g2_cuda_bls12_381(BLS12_381::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 4 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((BLS12_381::point_field_t*)d_inout, 4 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_aff_points_g2_cuda_bls12_381(BLS12_381::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
extern "C" int
|
||||
from_montgomery_aff_points_g2_cuda_bls12_381(BLS12_381::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 4 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((BLS12_381::point_field_t*)d_inout, 4 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int reverse_order_points_batch_cuda_bls12_381(
|
||||
BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,41 +1,47 @@
|
||||
#ifndef _BLS12_381_MSM
|
||||
#define _BLS12_381_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0) //TODO: unify parameter types size_t/unsigned etc
|
||||
extern "C" int msm_cuda_bls12_381(
|
||||
BLS12_381::projective_t* out,
|
||||
BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0) // TODO: unify parameter types size_t/unsigned etc
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bls12_381(
|
||||
BLS12_381::projective_t* out,
|
||||
BLS12_381::affine_t points[],
|
||||
BLS12_381::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
@@ -43,144 +49,168 @@ extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381:
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C"
|
||||
int msm_g2_cuda_bls12_381(BLS12_381::g2_projective_t *out, BLS12_381::g2_affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::scalar_t* d_scalars,
|
||||
BLS12_381::affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::g2_projective_t, BLS12_381::g2_affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_g2_cuda_bls12_381(BLS12_381::g2_projective_t* out, BLS12_381::g2_affine_t points[],
|
||||
BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C" int commit_batch_cuda_bls12_381(
|
||||
BLS12_381::projective_t* d_out,
|
||||
BLS12_381::scalar_t* d_scalars,
|
||||
BLS12_381::affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_381::scalar_t, BLS12_381::g2_projective_t, BLS12_381::g2_affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" int msm_g2_cuda_bls12_381(
|
||||
BLS12_381::g2_projective_t* out,
|
||||
BLS12_381::g2_affine_t points[],
|
||||
BLS12_381::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::g2_projective_t, BLS12_381::g2_affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_g2_cuda_bls12_381(
|
||||
BLS12_381::g2_projective_t* out,
|
||||
BLS12_381::g2_affine_t points[],
|
||||
BLS12_381::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BLS12_381::scalar_t, BLS12_381::g2_projective_t, BLS12_381::g2_affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM in G2 group.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points G2 affine points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_g2_cuda_bls12_381(BLS12_381::g2_projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_g2_cuda_bls12_381(
|
||||
BLS12_381::g2_projective_t* d_out,
|
||||
BLS12_381::scalar_t* d_scalars,
|
||||
BLS12_381::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_g2_cuda_bls12_381(BLS12_381::g2_projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for
|
||||
* each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C" int commit_batch_g2_cuda_bls12_381(
|
||||
BLS12_381::g2_projective_t* d_out,
|
||||
BLS12_381::scalar_t* d_scalars,
|
||||
BLS12_381::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -1,219 +1,411 @@
|
||||
#pragma once
|
||||
#include "../../utils/storage.cuh"
|
||||
|
||||
namespace PARAMS_BLS12_381{
|
||||
namespace PARAMS_BLS12_381 {
|
||||
struct fp_config {
|
||||
// field structure size = 8 * 32 bit
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 32;
|
||||
// modulus = 52435875175126190479447740508185965837690552500527637822603658699938581184513
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
// modulus*2 = 104871750350252380958895481016371931675381105001055275645207317399877162369026
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805, 0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0xfffffffe, 0xfffcb7fd, 0xa77b4805,
|
||||
0x1343b00a, 0x6673b010, 0x533afa90, 0xe7db4ea6};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0xfffffffe, 0xfffcb7fe, 0xa77e9007, 0x1cdbb005, 0x698ae002, 0x5433f7b8, 0x48aa415e,
|
||||
0x4aa9c661, 0xc2611f6f, 0x59934a1d, 0x0e9593f9, 0xef2cc20f, 0x520c13db, 0xf4bc2778, 0x347f60f3};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
// note: doesnt actually fit into 384 bits, and shouldnt be used! is added for compilation
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000002, 0xfffffffc, 0xfff96ffd, 0x4efd200f, 0x39b7600b, 0xd315c004, 0xa867ef70, 0x915482bc,
|
||||
0x95538cc2, 0x84c23ede, 0xb326943b, 0x1d2b27f2, 0xde59841e, 0xa41827b7, 0xe9784ef0, 0x68fec1e7};
|
||||
static constexpr unsigned modulus_bit_count = 255;
|
||||
// m = floor(2^(2*modulus_bit_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad, 0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffffe, 0x00000001, 0x00034802, 0x5884b7fa, 0xecbc4ff5, 0x998c4fef, 0xacc5056f, 0x1824b159};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0xfe75c040, 0x13f75b69, 0x09dc705f, 0xab6fca8f, 0x4f77266a, 0x7204078a, 0x30009d57, 0x1bbe8693};
|
||||
|
||||
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e}, {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8}, {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3}, {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72}, {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega2= {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3= {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
|
||||
static constexpr storage<limbs_count> omega4= {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
|
||||
static constexpr storage<limbs_count> omega5= {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
|
||||
static constexpr storage<limbs_count> omega6= {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
|
||||
static constexpr storage<limbs_count> omega7= {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
|
||||
static constexpr storage<limbs_count> omega8= {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
|
||||
static constexpr storage<limbs_count> omega9= {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c, 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
|
||||
static constexpr storage<limbs_count> omega10= {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
|
||||
static constexpr storage<limbs_count> omega11= {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
|
||||
static constexpr storage<limbs_count> omega12= {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
|
||||
static constexpr storage<limbs_count> omega13= {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
|
||||
static constexpr storage<limbs_count> omega14= {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
|
||||
static constexpr storage<limbs_count> omega15= {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
|
||||
static constexpr storage<limbs_count> omega16= {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
|
||||
static constexpr storage<limbs_count> omega17= {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
|
||||
static constexpr storage<limbs_count> omega18= {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d, 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
|
||||
static constexpr storage<limbs_count> omega19= {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
|
||||
static constexpr storage<limbs_count> omega20= {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
|
||||
static constexpr storage<limbs_count> omega21= {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
|
||||
static constexpr storage<limbs_count> omega22= {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
|
||||
static constexpr storage<limbs_count> omega23= {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
|
||||
static constexpr storage<limbs_count> omega24= {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
|
||||
static constexpr storage<limbs_count> omega25= {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
|
||||
static constexpr storage<limbs_count> omega26= {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
|
||||
static constexpr storage<limbs_count> omega27= {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f, 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
|
||||
static constexpr storage<limbs_count> omega28= {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
|
||||
static constexpr storage<limbs_count> omega29= {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
|
||||
static constexpr storage<limbs_count> omega30= {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
|
||||
static constexpr storage<limbs_count> omega31= {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
|
||||
static constexpr storage<limbs_count> omega32= {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
static constexpr storage<limbs_count> m = {0x830358e4, 0x509cde80, 0x2f92eb5c, 0xd9410fad,
|
||||
0xc1f823b4, 0xe2d772d, 0x7fb78ddf, 0x8d54253b};
|
||||
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffffe, 0x00000001, 0x00034802, 0x5884b7fa,
|
||||
0xecbc4ff5, 0x998c4fef, 0xacc5056f, 0x1824b159};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0xfe75c040, 0x13f75b69, 0x09dc705f, 0xab6fca8f,
|
||||
0x4f77266a, 0x7204078a, 0x30009d57, 0x1bbe8693};
|
||||
|
||||
// static constexpr storage<limbs_count> omega[32]= { {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805,
|
||||
// 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000000, 0x00010000, 0x76030000, 0xec030002, 0x760304d0, 0x8d51ccce,
|
||||
// 0x00000000, 0x00000000}, {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240, 0x98ca5b22, 0xa733b23a, 0x25a31660,
|
||||
// 0x3f96405d}, {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672, 0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e},
|
||||
// {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c, 0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb}, {0xac5db47f,
|
||||
// 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6, 0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac}, {0xab28e208, 0xb750da4c,
|
||||
// 0x3be95635, 0x501dff64, 0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802}, {0x2fe322b8, 0x2cabadec, 0x15412560,
|
||||
// 0x752c84f3, 0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59}, {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c,
|
||||
// 0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667}, {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0, 0x6350721d,
|
||||
// 0x3ed6d55a, 0x58f43cef, 0x2f27b098}, {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14, 0x620890d7, 0xeb674a1a,
|
||||
// 0xca252472, 0x43527a8b}, {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171, 0x93f9e9ac, 0xe155cb48, 0xc8e9101b,
|
||||
// 0x110cebd0}, {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce, 0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8},
|
||||
// {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727, 0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8}, {0xa97eccd4,
|
||||
// 0xe6a354dd, 0x88fbbc57, 0x39929d2e, 0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911}, {0xcfc35f7a, 0x137b458a,
|
||||
// 0x29c01b06, 0x0caba63a, 0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd}, {0x8831e03e, 0x10251f7d, 0x7ff858ec,
|
||||
// 0x77d85a93, 0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333}, {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d,
|
||||
// 0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db}, {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673, 0x22cc3253,
|
||||
// 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83}, {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa, 0x537d6971, 0x556c35f6,
|
||||
// 0x5f686d91, 0x3436287f}, {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f, 0xfb4460f7, 0x36f8f165, 0x7e7046e0,
|
||||
// 0x6eee34d5}, {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42, 0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3},
|
||||
// {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e, 0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd}, {0x1ab70e2c,
|
||||
// 0x5b90153a, 0x75fb0ab8, 0x8deffa31, 0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc}, {0x59a2e8eb, 0x801c894c,
|
||||
// 0xe12fc974, 0xbc535c5c, 0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd}, {0xcca1d8be, 0x810fa372, 0x82e0bfa7,
|
||||
// 0xc67b8c28, 0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580}, {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f,
|
||||
// 0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d}, {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a, 0xd274a80a,
|
||||
// 0x97ae418d, 0x5e3e7682, 0x2967385d}, {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157, 0xa04fccf3, 0xc3974d73,
|
||||
// 0x4a939684, 0x705aba4f}, {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e, 0x811c1dfb, 0x04287254, 0x23b30c29,
|
||||
// 0x086d072b}, {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9, 0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72},
|
||||
// {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2, 0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e}}; Quick fix for
|
||||
// linking issue
|
||||
static constexpr storage<limbs_count> omega1 = {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega2 = {0x00000000, 0x00010000, 0x76030000, 0xec030002,
|
||||
0x760304d0, 0x8d51ccce, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> omega3 = {0x688bc087, 0x8dd702cb, 0x78eaa4fe, 0xa0328240,
|
||||
0x98ca5b22, 0xa733b23a, 0x25a31660, 0x3f96405d};
|
||||
static constexpr storage<limbs_count> omega4 = {0x0411fe73, 0x95df4b36, 0xebc1e1bb, 0x1ef4e672,
|
||||
0x60afca4a, 0x6e92a9c4, 0x753e4fcc, 0x4f2c596e};
|
||||
static constexpr storage<limbs_count> omega5 = {0xba60eaa6, 0x9733f3a6, 0x77487ae7, 0xbd7fdf9c,
|
||||
0xc8b6cc00, 0xd84f8612, 0x6162ffab, 0x476fa2fb};
|
||||
static constexpr storage<limbs_count> omega6 = {0xac5db47f, 0xd2fc5e69, 0x15d0b8e4, 0xa12a70a6,
|
||||
0xbc8de5d9, 0x293b1d67, 0x57f86f5e, 0x0e4840ac};
|
||||
static constexpr storage<limbs_count> omega7 = {0xab28e208, 0xb750da4c, 0x3be95635, 0x501dff64,
|
||||
0xf0b4b276, 0x8cbe2437, 0xa94a946e, 0x07d0c802};
|
||||
static constexpr storage<limbs_count> omega8 = {0x2fe322b8, 0x2cabadec, 0x15412560, 0x752c84f3,
|
||||
0x1a3b0aef, 0x32a732ae, 0xa33dcbf2, 0x2e95da59};
|
||||
static constexpr storage<limbs_count> omega9 = {0xfe0c65f4, 0x33811ea1, 0x687f28a2, 0x15c1ad4c,
|
||||
0x42dee7f4, 0xecfbede3, 0x9a5d88b1, 0x1bb46667};
|
||||
static constexpr storage<limbs_count> omega10 = {0x2d010ff9, 0xd58a5af4, 0x570bf109, 0x79efd6b0,
|
||||
0x6350721d, 0x3ed6d55a, 0x58f43cef, 0x2f27b098};
|
||||
static constexpr storage<limbs_count> omega11 = {0x8c130477, 0x74a1f671, 0xb61e0abe, 0xa534af14,
|
||||
0x620890d7, 0xeb674a1a, 0xca252472, 0x43527a8b};
|
||||
static constexpr storage<limbs_count> omega12 = {0x7ea8ee05, 0x450d9f97, 0x37d56fc0, 0x565af171,
|
||||
0x93f9e9ac, 0xe155cb48, 0xc8e9101b, 0x110cebd0};
|
||||
static constexpr storage<limbs_count> omega13 = {0x59a0be92, 0x23c91599, 0x7a027759, 0x87d188ce,
|
||||
0xcab3c3cc, 0x70491431, 0xb3f7f8da, 0x0ac00eb8};
|
||||
static constexpr storage<limbs_count> omega14 = {0x69583404, 0x13e96ade, 0x5306243d, 0x82c05727,
|
||||
0x29ca9f2a, 0x77e48bf5, 0x1fe19595, 0x50646ac8};
|
||||
static constexpr storage<limbs_count> omega15 = {0xa97eccd4, 0xe6a354dd, 0x88fbbc57, 0x39929d2e,
|
||||
0xd6e7b1c8, 0xa22ba63d, 0xf5f07f43, 0x42c22911};
|
||||
static constexpr storage<limbs_count> omega16 = {0xcfc35f7a, 0x137b458a, 0x29c01b06, 0x0caba63a,
|
||||
0x7a02402c, 0x0409ee98, 0x56aa725b, 0x6709c6cd};
|
||||
static constexpr storage<limbs_count> omega17 = {0x8831e03e, 0x10251f7d, 0x7ff858ec, 0x77d85a93,
|
||||
0x4fb9ac5c, 0xebe905bd, 0xf8727901, 0x05deb333};
|
||||
static constexpr storage<limbs_count> omega18 = {0xb9009408, 0xbf87b689, 0xdd3ccc96, 0x4f730e7d,
|
||||
0x4610300c, 0xfd7f05ba, 0x0b8ac903, 0x5ef5e8db};
|
||||
static constexpr storage<limbs_count> omega19 = {0x17cd0c14, 0x64996884, 0x68812f7f, 0xa6728673,
|
||||
0x22cc3253, 0x2e1d9a19, 0xaa0a1d80, 0x3a689e83};
|
||||
static constexpr storage<limbs_count> omega20 = {0x41144dea, 0x20b53cbe, 0xc2f0fcbd, 0x870c46fa,
|
||||
0x537d6971, 0x556c35f6, 0x5f686d91, 0x3436287f};
|
||||
static constexpr storage<limbs_count> omega21 = {0x436ba2e7, 0x007e082a, 0x9116e877, 0x67c6630f,
|
||||
0xfb4460f7, 0x36f8f165, 0x7e7046e0, 0x6eee34d5};
|
||||
static constexpr storage<limbs_count> omega22 = {0xa53a56d1, 0xc5b670ee, 0x53037d7b, 0x127d1f42,
|
||||
0xa722c2e2, 0x57d4257e, 0x33cbd838, 0x03ae26a3};
|
||||
static constexpr storage<limbs_count> omega23 = {0x76504cf8, 0x1e914848, 0xb63edd02, 0x55bbbf1e,
|
||||
0x4e55aa02, 0xbcdafec8, 0x2dc0beb0, 0x5145c4cd};
|
||||
static constexpr storage<limbs_count> omega24 = {0x1ab70e2c, 0x5b90153a, 0x75fb0ab8, 0x8deffa31,
|
||||
0x46900c95, 0xc553ae23, 0x6bd3118c, 0x1d31dcdc};
|
||||
static constexpr storage<limbs_count> omega25 = {0x59a2e8eb, 0x801c894c, 0xe12fc974, 0xbc535c5c,
|
||||
0x47d39803, 0x95508d27, 0xac5d094f, 0x16d9d3cd};
|
||||
static constexpr storage<limbs_count> omega26 = {0xcca1d8be, 0x810fa372, 0x82e0bfa7, 0xc67b8c28,
|
||||
0xe2d35bc2, 0xdbb4edf0, 0x5087c995, 0x712d1580};
|
||||
static constexpr storage<limbs_count> omega27 = {0xfd88f133, 0xeb162203, 0xf010ea74, 0xac96c38f,
|
||||
0xe64cfc70, 0x4307987f, 0x37b7a114, 0x350fe98d};
|
||||
static constexpr storage<limbs_count> omega28 = {0x42f2a254, 0xaba2f518, 0xa71efc0c, 0x4d7f3c3a,
|
||||
0xd274a80a, 0x97ae418d, 0x5e3e7682, 0x2967385d};
|
||||
static constexpr storage<limbs_count> omega29 = {0x575a0b79, 0x75c55c7b, 0x74a7ded1, 0x3ba4a157,
|
||||
0xa04fccf3, 0xc3974d73, 0x4a939684, 0x705aba4f};
|
||||
static constexpr storage<limbs_count> omega30 = {0x14ebb608, 0x8409a9ea, 0x66bac611, 0xfad0084e,
|
||||
0x811c1dfb, 0x04287254, 0x23b30c29, 0x086d072b};
|
||||
static constexpr storage<limbs_count> omega31 = {0x67e4756a, 0xb427c9b3, 0x02ebc38d, 0xc7537fb9,
|
||||
0xcd6a205f, 0x51de21be, 0x7923597d, 0x6064ab72};
|
||||
static constexpr storage<limbs_count> omega32 = {0x0b912f1f, 0x1b788f50, 0x70b3e094, 0xc4024ff2,
|
||||
0xd168d6c0, 0x0fd56dc8, 0x5b416b6f, 0x0212d79e};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8,
|
||||
omega9, omega10, omega11, omega12, omega13, omega14, omega15, omega16,
|
||||
omega17, omega18, omega19, omega20, omega21, omega22, omega23, omega24,
|
||||
omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
|
||||
omega1, omega2, omega3, omega4, omega5, omega6, omega7, omega8, omega9, omega10, omega11,
|
||||
omega12, omega13, omega14, omega15, omega16, omega17, omega18, omega19, omega20, omega21, omega22,
|
||||
omega23, omega24, omega25, omega26, omega27, omega28, omega29, omega30, omega31, omega32,
|
||||
};
|
||||
|
||||
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c}, {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1}, {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9}, {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
|
||||
|
||||
// static constexpr storage<limbs_count> omega_inv[32]={ {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
// 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753}, {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334,
|
||||
// 0xa5e80b39, 0x299d7d47, 0x73eda753}, {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff,
|
||||
// 0x5ce11044, 0x1333b22e}, {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b,
|
||||
// 0x551115b4}, {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c},
|
||||
// {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee}, {0xcf28601b,
|
||||
// 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d}, {0x6a2f777a, 0xe9561c17,
|
||||
// 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25}, {0xf02a116e, 0xfb350dbe, 0xb4543a3e,
|
||||
// 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e}, {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41,
|
||||
// 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508}, {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1,
|
||||
// 0xdeae67bc, 0x65ba213e, 0x394fda0d}, {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340,
|
||||
// 0x6d174692, 0x58c3ba63}, {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f,
|
||||
// 0x044107b7}, {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1},
|
||||
// {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac}, {0x9ed57ae5,
|
||||
// 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003}, {0x645e1cfa, 0x903a0a0c,
|
||||
// 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c}, {0x14b1ba04, 0xb49d6b05, 0xf00b84f2,
|
||||
// 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7}, {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b,
|
||||
// 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950}, {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3,
|
||||
// 0x167fce38, 0x6f5d6dfa, 0x545ad9b2}, {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b,
|
||||
// 0x6fa2672c, 0x156cd7f6}, {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503,
|
||||
// 0x47880cd5}, {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9},
|
||||
// {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960}, {0x20238f62,
|
||||
// 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6}, {0xe8bff41e, 0x65b09c73,
|
||||
// 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf}, {0xd5fdb757, 0x8480c0e7, 0x365bf9fd,
|
||||
// 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f}, {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d,
|
||||
// 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533}, {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9,
|
||||
// 0x2d44da3b, 0xfd09be59, 0x092778ff}, {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724,
|
||||
// 0xf386c0d2, 0x24e5d287}, {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f,
|
||||
// 0x0158abd6}, {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666}};
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> omega_inv1= {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402, 0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv2= {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400, 0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv3= {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036, 0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
|
||||
static constexpr storage<limbs_count> omega_inv4= {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896, 0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
|
||||
static constexpr storage<limbs_count> omega_inv5= {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f, 0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
|
||||
static constexpr storage<limbs_count> omega_inv6= {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501, 0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
|
||||
static constexpr storage<limbs_count> omega_inv7= {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582, 0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
|
||||
static constexpr storage<limbs_count> omega_inv8= {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03, 0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
|
||||
static constexpr storage<limbs_count> omega_inv9= {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf, 0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
|
||||
static constexpr storage<limbs_count> omega_inv10= {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41, 0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
|
||||
static constexpr storage<limbs_count> omega_inv11= {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32, 0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
|
||||
static constexpr storage<limbs_count> omega_inv12= {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e, 0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
|
||||
static constexpr storage<limbs_count> omega_inv13= {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6, 0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
|
||||
static constexpr storage<limbs_count> omega_inv14= {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4, 0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
|
||||
static constexpr storage<limbs_count> omega_inv15= {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d, 0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
|
||||
static constexpr storage<limbs_count> omega_inv16= {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a, 0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
|
||||
static constexpr storage<limbs_count> omega_inv17= {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb, 0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
|
||||
static constexpr storage<limbs_count> omega_inv18= {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4, 0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
|
||||
static constexpr storage<limbs_count> omega_inv19= {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b, 0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
|
||||
static constexpr storage<limbs_count> omega_inv20= {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a, 0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
|
||||
static constexpr storage<limbs_count> omega_inv21= {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e, 0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
|
||||
static constexpr storage<limbs_count> omega_inv22= {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab, 0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
|
||||
static constexpr storage<limbs_count> omega_inv23= {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673, 0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
|
||||
static constexpr storage<limbs_count> omega_inv24= {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a, 0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
|
||||
static constexpr storage<limbs_count> omega_inv25= {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097, 0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
|
||||
static constexpr storage<limbs_count> omega_inv26= {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8, 0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
|
||||
static constexpr storage<limbs_count> omega_inv27= {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0, 0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
|
||||
static constexpr storage<limbs_count> omega_inv28= {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d, 0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
|
||||
static constexpr storage<limbs_count> omega_inv29= {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0, 0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
|
||||
static constexpr storage<limbs_count> omega_inv30= {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9, 0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
|
||||
static constexpr storage<limbs_count> omega_inv31= {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5, 0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
|
||||
static constexpr storage<limbs_count> omega_inv32= {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d, 0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
|
||||
|
||||
static constexpr storage<limbs_count> omega_inv1 = {0x00000000, 0xffffffff, 0xfffe5bfe, 0x53bda402,
|
||||
0x09a1d805, 0x3339d808, 0x299d7d48, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv2 = {0x00000001, 0xfffeffff, 0x89fb5bfe, 0x67baa400,
|
||||
0x939ed334, 0xa5e80b39, 0x299d7d47, 0x73eda753};
|
||||
static constexpr storage<limbs_count> omega_inv3 = {0xae99502e, 0x6037fe81, 0x94b04fd8, 0x8e749036,
|
||||
0xca86bf65, 0xbabc5aff, 0x5ce11044, 0x1333b22e};
|
||||
static constexpr storage<limbs_count> omega_inv4 = {0x7dc08d74, 0x7f847ee4, 0x04eeaf5a, 0xbd433896,
|
||||
0x1832fc60, 0xd66c91d6, 0x607e449b, 0x551115b4};
|
||||
static constexpr storage<limbs_count> omega_inv5 = {0x4e7773cb, 0xee5bcecc, 0xf6dab086, 0x45593d6f,
|
||||
0x4016e2bd, 0xa3a95d2d, 0xaf96816f, 0x047cb16c};
|
||||
static constexpr storage<limbs_count> omega_inv6 = {0x982b68c5, 0xb891fa3f, 0x1d426b52, 0xa41e8501,
|
||||
0x882952d6, 0x566009b5, 0x7b3c79d6, 0x199cdaee};
|
||||
static constexpr storage<limbs_count> omega_inv7 = {0xcf28601b, 0x571ba2fc, 0xac74db12, 0x166fb582,
|
||||
0x3501370b, 0x51420be4, 0x52f970ba, 0x1996fa8d};
|
||||
static constexpr storage<limbs_count> omega_inv8 = {0x6a2f777a, 0xe9561c17, 0x2393991b, 0xc03cae03,
|
||||
0x5a5bfd4f, 0x91b00023, 0x272e58ee, 0x6d64ed25};
|
||||
static constexpr storage<limbs_count> omega_inv9 = {0xf02a116e, 0xfb350dbe, 0xb4543a3e, 0x1c510ebf,
|
||||
0x37ad4eca, 0xf675522e, 0x80f82b2d, 0x1907a56e};
|
||||
static constexpr storage<limbs_count> omega_inv10 = {0x4eb71aa6, 0xb0ad8003, 0xaa67e0be, 0x50a32c41,
|
||||
0x19141f44, 0x105f0672, 0xa3dad316, 0x2bcd9508};
|
||||
static constexpr storage<limbs_count> omega_inv11 = {0x0f6fb2ac, 0x3dc9e560, 0x9aa58ff5, 0x3cc5bb32,
|
||||
0x36f376e1, 0xdeae67bc, 0x65ba213e, 0x394fda0d};
|
||||
static constexpr storage<limbs_count> omega_inv12 = {0x60b82267, 0x09f239f7, 0x8b24f123, 0x14180e0e,
|
||||
0x45625d95, 0xad5a5340, 0x6d174692, 0x58c3ba63};
|
||||
static constexpr storage<limbs_count> omega_inv13 = {0x348b416f, 0x0acf21c2, 0xbc086439, 0x798b6bf6,
|
||||
0xb1ca111d, 0x222d411f, 0x30ba1e0f, 0x044107b7};
|
||||
static constexpr storage<limbs_count> omega_inv14 = {0x014abe84, 0xa3b861b8, 0x427ed008, 0x37c017e4,
|
||||
0xae0ff4f5, 0xae51f613, 0xcb1218d3, 0x1a2d00e1};
|
||||
static constexpr storage<limbs_count> omega_inv15 = {0x4de7eb2b, 0x48aaa3bf, 0x6772057d, 0x4a58d54d,
|
||||
0x7093b551, 0xce25f16c, 0xd206337c, 0x242150ac};
|
||||
static constexpr storage<limbs_count> omega_inv16 = {0x9ed57ae5, 0xdf3ec9ae, 0x7166577f, 0xea7df73a,
|
||||
0x022fbbe4, 0x6ca8d281, 0x151e3f6b, 0x5850c003};
|
||||
static constexpr storage<limbs_count> omega_inv17 = {0x645e1cfa, 0x903a0a0c, 0x34788c37, 0xfbac54cb,
|
||||
0x8cf73d78, 0xdc127d11, 0x975d3c82, 0x6d0b5c7c};
|
||||
static constexpr storage<limbs_count> omega_inv18 = {0x14b1ba04, 0xb49d6b05, 0xf00b84f2, 0x56e466b4,
|
||||
0x0b904f22, 0x30c390cf, 0x3ee254cc, 0x3e11cfb7};
|
||||
static constexpr storage<limbs_count> omega_inv19 = {0xbe8201ab, 0x84dfa547, 0x530715d2, 0x3887ce8b,
|
||||
0x3eed4ed7, 0xa4c719c6, 0x8f8007b4, 0x18c44950};
|
||||
static constexpr storage<limbs_count> omega_inv20 = {0x7d813cd1, 0xdaf0346d, 0xf755beb1, 0xeccf6f9a,
|
||||
0xe08143e3, 0x167fce38, 0x6f5d6dfa, 0x545ad9b2};
|
||||
static constexpr storage<limbs_count> omega_inv21 = {0x577605de, 0x973f5466, 0x974f953c, 0x0ce8986e,
|
||||
0x074382f9, 0x8941cf4b, 0x6fa2672c, 0x156cd7f6};
|
||||
static constexpr storage<limbs_count> omega_inv22 = {0x33b66141, 0x24315404, 0x1992f584, 0x5d1375ab,
|
||||
0x8b20ca1a, 0xf193ffa6, 0x2701a503, 0x47880cd5};
|
||||
static constexpr storage<limbs_count> omega_inv23 = {0xe9f7b9af, 0xf7b6847d, 0x62c83ce2, 0x9a339673,
|
||||
0x6e5e6f79, 0xfabf4537, 0x35af33a3, 0x0975acd9};
|
||||
static constexpr storage<limbs_count> omega_inv24 = {0x0eddd248, 0x4fb4204a, 0xc9e509b3, 0x8c98706a,
|
||||
0x2bb27eb1, 0xd0be8987, 0xc831438b, 0x6ec5f960};
|
||||
static constexpr storage<limbs_count> omega_inv25 = {0x20238f62, 0xa13c95b7, 0x83b476b9, 0x130aa097,
|
||||
0x14860881, 0x758a04e0, 0x97066493, 0x58e2f8d6};
|
||||
static constexpr storage<limbs_count> omega_inv26 = {0xe8bff41e, 0x65b09c73, 0x37f1c6a3, 0x8b3280e8,
|
||||
0x2846fb21, 0xe17b82ce, 0xb1ae27df, 0x476534bf};
|
||||
static constexpr storage<limbs_count> omega_inv27 = {0xd5fdb757, 0x8480c0e7, 0x365bf9fd, 0x3644eea0,
|
||||
0xb776be86, 0x4ca116ca, 0x8b58390c, 0x17b6395f};
|
||||
static constexpr storage<limbs_count> omega_inv28 = {0x252eb0db, 0x2c811e9a, 0x7479e161, 0x1b7d960d,
|
||||
0xb0a89a26, 0xb3afc7c1, 0x32b5e793, 0x6a2f9533};
|
||||
static constexpr storage<limbs_count> omega_inv29 = {0x08b8a7ad, 0xe877b2c4, 0x341652b4, 0x68b0e8f0,
|
||||
0xe8b6a2d9, 0x2d44da3b, 0xfd09be59, 0x092778ff};
|
||||
static constexpr storage<limbs_count> omega_inv30 = {0x7988f244, 0x84a1aa6f, 0x24faf63f, 0xa164b3d9,
|
||||
0xc1bbb915, 0x7aae9724, 0xf386c0d2, 0x24e5d287};
|
||||
static constexpr storage<limbs_count> omega_inv31 = {0x41a1b30c, 0xa70a7efd, 0x39f0e511, 0xc49c55a5,
|
||||
0x033bb323, 0xab307a8f, 0x17acbd7f, 0x0158abd6};
|
||||
static constexpr storage<limbs_count> omega_inv32 = {0x0f642025, 0x2c228b30, 0x01bd882b, 0xb0878e8d,
|
||||
0xd7377fea, 0xd862b255, 0xf0490536, 0x18ac3666};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
|
||||
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
|
||||
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
|
||||
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
|
||||
omega_inv1, omega_inv2, omega_inv3, omega_inv4, omega_inv5, omega_inv6, omega_inv7, omega_inv8,
|
||||
omega_inv9, omega_inv10, omega_inv11, omega_inv12, omega_inv13, omega_inv14, omega_inv15, omega_inv16,
|
||||
omega_inv17, omega_inv18, omega_inv19, omega_inv20, omega_inv21, omega_inv22, omega_inv23, omega_inv24,
|
||||
omega_inv25, omega_inv26, omega_inv27, omega_inv28, omega_inv29, omega_inv30, omega_inv31, omega_inv32,
|
||||
};
|
||||
|
||||
|
||||
// Quick fix for linking issue
|
||||
static constexpr storage<limbs_count> inv1= {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201, 0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
|
||||
static constexpr storage<limbs_count> inv2= {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02, 0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
|
||||
static constexpr storage<limbs_count> inv3= {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82, 0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
|
||||
static constexpr storage<limbs_count> inv4= {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2, 0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
|
||||
static constexpr storage<limbs_count> inv5= {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2, 0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
|
||||
static constexpr storage<limbs_count> inv6= {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72, 0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
|
||||
static constexpr storage<limbs_count> inv7= {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba, 0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
|
||||
static constexpr storage<limbs_count> inv8= {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e, 0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
|
||||
static constexpr storage<limbs_count> inv9= {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530, 0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
|
||||
static constexpr storage<limbs_count> inv10= {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499, 0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
|
||||
static constexpr storage<limbs_count> inv11= {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e, 0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
|
||||
static constexpr storage<limbs_count> inv12= {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828, 0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
|
||||
static constexpr storage<limbs_count> inv13= {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615, 0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
|
||||
static constexpr storage<limbs_count> inv14= {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c, 0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
|
||||
static constexpr storage<limbs_count> inv15= {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87, 0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
|
||||
static constexpr storage<limbs_count> inv16= {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045, 0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
|
||||
static constexpr storage<limbs_count> inv17= {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24, 0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
|
||||
static constexpr storage<limbs_count> inv18= {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13, 0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
|
||||
static constexpr storage<limbs_count> inv19= {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b, 0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
|
||||
static constexpr storage<limbs_count> inv20= {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7, 0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
|
||||
static constexpr storage<limbs_count> inv21= {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965, 0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
|
||||
static constexpr storage<limbs_count> inv22= {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4, 0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
|
||||
static constexpr storage<limbs_count> inv23= {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b, 0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
|
||||
static constexpr storage<limbs_count> inv24= {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf, 0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
|
||||
static constexpr storage<limbs_count> inv25= {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159, 0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
|
||||
static constexpr storage<limbs_count> inv26= {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae, 0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
|
||||
static constexpr storage<limbs_count> inv27= {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358, 0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
|
||||
static constexpr storage<limbs_count> inv28= {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad, 0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
|
||||
static constexpr storage<limbs_count> inv29= {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8, 0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
|
||||
static constexpr storage<limbs_count> inv30= {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed, 0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
|
||||
static constexpr storage<limbs_count> inv31= {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv32= {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv1 = {0x80000001, 0x7fffffff, 0x7fff2dff, 0xa9ded201,
|
||||
0x04d0ec02, 0x199cec04, 0x94cebea4, 0x39f6d3a9};
|
||||
static constexpr storage<limbs_count> inv2 = {0x40000001, 0x3fffffff, 0x3ffec4ff, 0xfece3b02,
|
||||
0x07396203, 0x266b6206, 0x5f361df6, 0x56f23d7e};
|
||||
static constexpr storage<limbs_count> inv3 = {0x20000001, 0x1fffffff, 0x9ffe907f, 0xa945ef82,
|
||||
0x086d9d04, 0x2cd29d07, 0xc469cd9f, 0x656ff268};
|
||||
static constexpr storage<limbs_count> inv4 = {0x10000001, 0x0fffffff, 0xcffe763f, 0xfe81c9c2,
|
||||
0x8907ba84, 0xb0063a87, 0xf703a573, 0x6caeccdd};
|
||||
static constexpr storage<limbs_count> inv5 = {0x08000001, 0x07ffffff, 0xe7fe691f, 0x291fb6e2,
|
||||
0xc954c945, 0xf1a00947, 0x9050915d, 0x704e3a18};
|
||||
static constexpr storage<limbs_count> inv6 = {0x04000001, 0x03ffffff, 0xf3fe628f, 0x3e6ead72,
|
||||
0xe97b50a5, 0x126cf0a7, 0xdcf70753, 0x721df0b5};
|
||||
static constexpr storage<limbs_count> inv7 = {0x02000001, 0x01ffffff, 0xf9fe5f47, 0x491628ba,
|
||||
0xf98e9455, 0xa2d36457, 0x834a424d, 0x7305cc04};
|
||||
static constexpr storage<limbs_count> inv8 = {0x01000001, 0x00ffffff, 0xfcfe5da3, 0x4e69e65e,
|
||||
0x0198362d, 0xeb069e30, 0xd673dfca, 0x7379b9ab};
|
||||
static constexpr storage<limbs_count> inv9 = {0x00800001, 0x007fffff, 0xfe7e5cd1, 0x5113c530,
|
||||
0x059d0719, 0x8f203b1c, 0x8008ae89, 0x73b3b07f};
|
||||
static constexpr storage<limbs_count> inv10 = {0x00400001, 0x003fffff, 0xff3e5c68, 0x5268b499,
|
||||
0x079f6f8f, 0xe12d0992, 0x54d315e8, 0x73d0abe9};
|
||||
static constexpr storage<limbs_count> inv11 = {0x00200001, 0x801fffff, 0x7f9e5c33, 0x53132c4e,
|
||||
0x08a0a3ca, 0x8a3370cd, 0x3f384998, 0x73df299e};
|
||||
static constexpr storage<limbs_count> inv12 = {0x00100001, 0x400fffff, 0xbfce5c19, 0xd3686828,
|
||||
0x89213de7, 0x5eb6a46a, 0xb46ae370, 0x73e66878};
|
||||
static constexpr storage<limbs_count> inv13 = {0x00080001, 0x2007ffff, 0xdfe65c0c, 0x93930615,
|
||||
0x49618af6, 0x48f83e39, 0xef04305c, 0x73ea07e5};
|
||||
static constexpr storage<limbs_count> inv14 = {0x00040001, 0x9003ffff, 0x6ff25c05, 0xf3a8550c,
|
||||
0xa981b17d, 0x3e190b20, 0x8c50d6d2, 0x73ebd79c};
|
||||
static constexpr storage<limbs_count> inv15 = {0x00020001, 0x4801ffff, 0xb7f85c02, 0xa3b2fc87,
|
||||
0x5991c4c1, 0x38a97194, 0xdaf72a0d, 0x73ecbf77};
|
||||
static constexpr storage<limbs_count> inv16 = {0x00010001, 0xa400ffff, 0x5bfb5c00, 0x7bb85045,
|
||||
0x3199ce63, 0xb5f1a4ce, 0x824a53aa, 0x73ed3365};
|
||||
static constexpr storage<limbs_count> inv17 = {0x00008001, 0xd2007fff, 0x2dfcdbff, 0x67bafa24,
|
||||
0x1d9dd334, 0x7495be6b, 0x55f3e879, 0x73ed6d5c};
|
||||
static constexpr storage<limbs_count> inv18 = {0x00004001, 0x69003fff, 0x96fd9bff, 0xddbc4f13,
|
||||
0x939fd59c, 0xd3e7cb39, 0xbfc8b2e0, 0x73ed8a57};
|
||||
static constexpr storage<limbs_count> inv19 = {0x00002001, 0x34801fff, 0x4b7dfbff, 0x18bcf98b,
|
||||
0xcea0d6d1, 0x8390d1a0, 0x74b31814, 0x73ed98d5};
|
||||
static constexpr storage<limbs_count> inv20 = {0x00001001, 0x1a400fff, 0x25be2bff, 0x363d4ec7,
|
||||
0x6c21576b, 0x5b6554d4, 0x4f284aae, 0x73eda014};
|
||||
static constexpr storage<limbs_count> inv21 = {0x00000801, 0x0d2007ff, 0x12de43ff, 0x44fd7965,
|
||||
0x3ae197b8, 0x474f966e, 0xbc62e3fb, 0x73eda3b3};
|
||||
static constexpr storage<limbs_count> inv22 = {0x00000401, 0x069003ff, 0x096e4fff, 0xcc5d8eb4,
|
||||
0x2241b7de, 0xbd44b73b, 0x730030a1, 0x73eda583};
|
||||
static constexpr storage<limbs_count> inv23 = {0x00000201, 0x034801ff, 0x84b655ff, 0x100d995b,
|
||||
0x95f1c7f2, 0xf83f47a1, 0x4e4ed6f4, 0x73eda66b};
|
||||
static constexpr storage<limbs_count> inv24 = {0x00000101, 0x01a400ff, 0x425a58ff, 0xb1e59eaf,
|
||||
0xcfc9cffb, 0x95bc8fd4, 0x3bf62a1e, 0x73eda6df};
|
||||
static constexpr storage<limbs_count> inv25 = {0x00000081, 0x00d2007f, 0x212c5a7f, 0x82d1a159,
|
||||
0x6cb5d400, 0x647b33ee, 0x32c9d3b3, 0x73eda719};
|
||||
static constexpr storage<limbs_count> inv26 = {0x00000041, 0x0069003f, 0x10955b3f, 0xeb47a2ae,
|
||||
0x3b2bd602, 0xcbda85fb, 0x2e33a87d, 0x73eda736};
|
||||
static constexpr storage<limbs_count> inv27 = {0x00000021, 0x0034801f, 0x8849db9f, 0x1f82a358,
|
||||
0xa266d704, 0xff8a2f01, 0xabe892e2, 0x73eda744};
|
||||
static constexpr storage<limbs_count> inv28 = {0x00000011, 0x001a400f, 0xc4241bcf, 0xb9a023ad,
|
||||
0xd6045784, 0x99620384, 0xeac30815, 0x73eda74b};
|
||||
static constexpr storage<limbs_count> inv29 = {0x00000009, 0x000d2007, 0x62113be7, 0x06aee3d8,
|
||||
0x6fd317c5, 0xe64dedc6, 0x8a3042ae, 0x73eda74f};
|
||||
static constexpr storage<limbs_count> inv30 = {0x00000005, 0x00069003, 0xb107cbf3, 0x2d3643ed,
|
||||
0x3cba77e5, 0x8cc3e2e7, 0x59e6dffb, 0x73eda751};
|
||||
static constexpr storage<limbs_count> inv31 = {0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8,
|
||||
0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752};
|
||||
static constexpr storage<limbs_count> inv32 = {0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd,
|
||||
0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8,
|
||||
inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
|
||||
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24,
|
||||
inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
|
||||
};
|
||||
};
|
||||
|
||||
inv1, inv2, inv3, inv4, inv5, inv6, inv7, inv8, inv9, inv10, inv11, inv12, inv13, inv14, inv15, inv16,
|
||||
inv17, inv18, inv19, inv20, inv21, inv22, inv23, inv24, inv25, inv26, inv27, inv28, inv29, inv30, inv31, inv32,
|
||||
};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
// field structure size = 12 * 32 bit
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
// modulus = 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84, 0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 = 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd, 0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709, 0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 = 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa, 0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13, 0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus =
|
||||
// 4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
|
||||
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
// modulus*2 =
|
||||
// 8004819110443334786835579651471808313113765639878015770664116272248063300981675728885375258258031328075788545119574
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd,
|
||||
0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709,
|
||||
0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
// modulus*4 =
|
||||
// 16009638220886669573671159302943616626227531279756031541328232544496126601963351457770750516516062656151577090239148
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
|
||||
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
|
||||
0xd2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
|
||||
// modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
// 2*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
// 4*modulus^2
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr unsigned modulus_bit_count = 381;
|
||||
// m = floor(2^(2*modulus_bit_count) / modulus)
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7, 0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x0005555, 0x60100000, 0xeac00004, 0x15400014, 0x94f09dbe, 0x8cf2d5f0, 0xc7aed409, 0xb88b47b0, 0xcb453289, 0x4e45849b, 0x6801965b, 0x5feee15c};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x05c40fe, 0xaa212c9c, 0xccfd7e14, 0x70093ae9, 0xc85a96b4, 0x6d05c02d, 0x025fecd3, 0x1f193851, 0xeb48f4c6, 0x84d32f44, 0xed8ffb1a, 0xbefcc91e};
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7,
|
||||
0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x0005555, 0x60100000, 0xeac00004, 0x15400014,
|
||||
0x94f09dbe, 0x8cf2d5f0, 0xc7aed409, 0xb88b47b0,
|
||||
0xcb453289, 0x4e45849b, 0x6801965b, 0x5feee15c};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x05c40fe, 0xaa212c9c, 0xccfd7e14, 0x70093ae9,
|
||||
0xc85a96b4, 0x6d05c02d, 0x025fecd3, 0x1f193851,
|
||||
0xeb48f4c6, 0x84d32f44, 0xed8ffb1a, 0xbefcc91e};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f, 0x171bac58, 0xa14e3a3f,
|
||||
0x9774b905, 0xc3688c4f, 0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744, 0x2c04b3ed, 0x00db18cb,
|
||||
0xd5d00af6, 0xfcf5e095, 0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326, 0x7ae3d177, 0xb4510b64,
|
||||
0xfa403b02, 0xc6e47ad4, 0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112, 0xdc7f5049, 0xb5da61bb,
|
||||
0x9920b61a, 0x596bd0d0, 0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc, 0x5160d12c, 0x6d429a69,
|
||||
0x8cbdd3a7, 0xadfd9baa, 0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27, 0x572e99ab, 0x267492ab,
|
||||
0x85a763af, 0xcb3e287e, 0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f,
|
||||
0x171bac58, 0xa14e3a3f, 0x9774b905, 0xc3688c4f,
|
||||
0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744,
|
||||
0x2c04b3ed, 0x00db18cb, 0xd5d00af6, 0xfcf5e095,
|
||||
0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326,
|
||||
0x7ae3d177, 0xb4510b64, 0xfa403b02, 0xc6e47ad4,
|
||||
0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112,
|
||||
0xdc7f5049, 0xb5da61bb, 0x9920b61a, 0x596bd0d0,
|
||||
0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc,
|
||||
0x5160d12c, 0x6d429a69, 0x8cbdd3a7, 0xadfd9baa,
|
||||
0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27,
|
||||
0x572e99ab, 0x267492ab, 0x85a763af, 0xcb3e287e,
|
||||
0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
}
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
} // namespace PARAMS_BLS12_381
|
||||
@@ -1,23 +1,25 @@
|
||||
#ifndef _BLS12_381_POSEIDON
|
||||
#define _BLS12_381_POSEIDON
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
#include "../../appUtils/poseidon/poseidon.cu"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
|
||||
template class Poseidon<BLS12_381::scalar_t>;
|
||||
|
||||
extern "C" int poseidon_multi_cuda_bls12_381(BLS12_381::scalar_t input[], BLS12_381::scalar_t* out,
|
||||
size_t number_of_blocks, int arity, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int poseidon_multi_cuda_bls12_381(
|
||||
BLS12_381::scalar_t input[],
|
||||
BLS12_381::scalar_t* out,
|
||||
size_t number_of_blocks,
|
||||
int arity,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
// TODO: once we get bindings to pass a stream, we should make {stream} a required parameter and use it instead of
|
||||
try {
|
||||
// TODO: once we get bindings to pass a stream, we should make {stream} a required parameter and use it instead of
|
||||
// creating a new stream
|
||||
if (stream == 0) {
|
||||
cudaStreamCreate(&stream);
|
||||
}
|
||||
|
||||
if (stream == 0) { cudaStreamCreate(&stream); }
|
||||
|
||||
cudaEvent_t start_event, end_event;
|
||||
cudaEventCreate(&start_event);
|
||||
cudaEventCreate(&end_event);
|
||||
@@ -27,19 +29,17 @@ extern "C" int poseidon_multi_cuda_bls12_381(BLS12_381::scalar_t input[], BLS12_
|
||||
cudaEventRecord(end_event, stream);
|
||||
cudaEventSynchronize(end_event);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifdef DEBUG
|
||||
float elapsedTime;
|
||||
cudaEventElapsedTime(&elapsedTime, start_event, end_event);
|
||||
printf("Time elapsed: %f", elapsedTime);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
cudaEventDestroy(start_event);
|
||||
cudaEventDestroy(end_event);
|
||||
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1,19 +1,23 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t* point1, BLS12_381::projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) &&
|
||||
(point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) &&
|
||||
(point2->z == BLS12_381::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t* point1, BLS12_381::g2_projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) &&
|
||||
(point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) &&
|
||||
(point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
#include "poseidon.cu"
|
||||
#include "projective.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
|
||||
@@ -1,66 +1,60 @@
|
||||
#ifndef _BLS12_381_VEC_MULT
|
||||
#define _BLS12_381_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
|
||||
BLS12_381::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_point_bls12_381(
|
||||
BLS12_381::projective_t* inout,
|
||||
BLS12_381::scalar_t* scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_381(BLS12_381::scalar_t *inout,
|
||||
BLS12_381::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_scalar_bls12_381(
|
||||
BLS12_381::scalar_t* inout,
|
||||
BLS12_381::scalar_t* scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_381(BLS12_381::scalar_t *matrix_flattened,
|
||||
BLS12_381::scalar_t *input,
|
||||
BLS12_381::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t matrix_vec_mod_mult_bls12_381(
|
||||
BLS12_381::scalar_t* matrix_flattened,
|
||||
BLS12_381::scalar_t* input,
|
||||
BLS12_381::scalar_t* output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -9,17 +9,17 @@
|
||||
#include "params.cuh"
|
||||
|
||||
namespace BN254 {
|
||||
typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BN254::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_BN254::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BN254::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_BN254::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_BN254::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{PARAMS_BN254::weierstrass_b};
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
|
||||
point_field_t{PARAMS_BN254::weierstrass_b_g2_re}, point_field_t{PARAMS_BN254::weierstrass_b_g2_im}};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
} // namespace BN254
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,186 +1,216 @@
|
||||
#ifndef _BN254_MSM
|
||||
#define _BN254_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_cuda_bn254(
|
||||
BN254::projective_t* out,
|
||||
BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_batch_cuda_bn254(
|
||||
BN254::projective_t* out,
|
||||
BN254::affine_t points[],
|
||||
BN254::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_cuda_bn254(
|
||||
BN254::projective_t* d_out,
|
||||
BN254::scalar_t* d_scalars,
|
||||
BN254::affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_batch_cuda_bn254(
|
||||
BN254::projective_t* d_out,
|
||||
BN254::scalar_t* d_scalars,
|
||||
BN254::affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C"
|
||||
int msm_g2_cuda_bn254(BN254::g2_projective_t *out, BN254::g2_affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_g2_cuda_bn254(
|
||||
BN254::g2_projective_t* out,
|
||||
BN254::g2_affine_t points[],
|
||||
BN254::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BN254::scalar_t, BN254::g2_projective_t, BN254::g2_affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<BN254::scalar_t, BN254::g2_projective_t, BN254::g2_affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_g2_cuda_bn254(BN254::g2_projective_t* out, BN254::g2_affine_t points[],
|
||||
BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int msm_batch_g2_cuda_bn254(
|
||||
BN254::g2_projective_t* out,
|
||||
BN254::g2_affine_t points[],
|
||||
BN254::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BN254::scalar_t, BN254::g2_projective_t, BN254::g2_affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<BN254::scalar_t, BN254::g2_projective_t, BN254::g2_affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM in G2 group.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points G2 affine points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_g2_cuda_bn254(BN254::g2_projective_t* d_out, BN254::scalar_t* d_scalars, BN254::g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
extern "C" int commit_g2_cuda_bn254(
|
||||
BN254::g2_projective_t* d_out,
|
||||
BN254::scalar_t* d_scalars,
|
||||
BN254::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_g2_cuda_bn254(BN254::g2_projective_t* d_out, BN254::scalar_t* d_scalars, BN254::g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for
|
||||
* each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C" int commit_batch_g2_cuda_bn254(
|
||||
BN254::g2_projective_t* d_out,
|
||||
BN254::scalar_t* d_scalars,
|
||||
BN254::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -6,147 +6,183 @@ namespace PARAMS_BN254 {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090, 0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121, 0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975, 0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb, 0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7, 0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520, 0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x4ffffffb, 0xac96341c, 0x9f60cd29, 0x36fc7695, 0x7879462e, 0x666ea36f, 0x9a07df2f, 0xe0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x6db1194e, 0xdc5ba005, 0xe111ec87, 0x90ef5a9, 0xaeb85d5d, 0xc8260de4, 0x82c5551c, 0x15ebf951};
|
||||
static constexpr storage<limbs_count> modulus = {0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xe0000002, 0x87c3eb27, 0xf372e122, 0x5067d090,
|
||||
0x0302b0ba, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xc0000004, 0x0f87d64f, 0xe6e5c245, 0xa0cfa121,
|
||||
0x06056174, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xf0000001, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0xe0000001, 0x08c3eb27, 0xdcb34000, 0xc7f26223, 0x68c9bb7f, 0xffe9a62c, 0xe821ddb0, 0xa6ce1975,
|
||||
0x47b62fe7, 0x2c77527b, 0xd379d3df, 0x85f73bb0, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0xc0000002, 0x1187d64f, 0xb9668000, 0x8fe4c447, 0xd19376ff, 0xffd34c58, 0xd043bb61, 0x4d9c32eb,
|
||||
0x8f6c5fcf, 0x58eea4f6, 0xa6f3a7be, 0x0bee7761, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x80000004, 0x230fac9f, 0x72cd0000, 0x1fc9888f, 0xa326edff, 0xffa698b1, 0xa08776c3, 0x9b3865d7,
|
||||
0x1ed8bf9e, 0xb1dd49ed, 0x4de74f7c, 0x17dceec3, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = { {
|
||||
{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72},
|
||||
{0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1},
|
||||
{0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2},
|
||||
{0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6},
|
||||
{0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d},
|
||||
{0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd},
|
||||
{0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561},
|
||||
{0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e},
|
||||
{0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1},
|
||||
{0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584},
|
||||
{0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596},
|
||||
{0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49},
|
||||
{0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651},
|
||||
{0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f},
|
||||
{0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb},
|
||||
{0xce8f58e5, 0x276e5858, 0x5655210e, 0x0512eca9, 0xe70e61f3, 0xc3708cc6, 0xa7d74902, 0x1bf82deb},
|
||||
{0x7dcdc0e0, 0x84c6bfa5, 0x13f4d1bd, 0xc57088ff, 0xb5b95e4d, 0x5c0176fb, 0x3a8d46c1, 0x19ddbcaf},
|
||||
{0x613f6cbd, 0x5c1d597f, 0x8357473a, 0x30525841, 0x968e4915, 0x51829353, 0x844bca52, 0x2260e724},
|
||||
{0x53337857, 0x53422da9, 0xdbed349f, 0xac616632, 0x06d1e303, 0x27508aba, 0x0a0ed063, 0x26125da1},
|
||||
{0xfcd0b523, 0xb2c87885, 0xca5a5ce3, 0x58f50577, 0x8598fc8c, 0x4222150e, 0xae2bdd1a, 0x1ded8980},
|
||||
{0xa219447e, 0xa76dde56, 0x359eebbb, 0xec1a1f05, 0x8be08215, 0xcda0ceb6, 0xb1f8d9a7, 0x1ad92f46},
|
||||
{0xab80c59d, 0xb54d4506, 0x22dd991f, 0x5680c640, 0xbc23a139, 0x6b7bcf70, 0x5ab4c74d, 0x0210fe63},
|
||||
{0xe32b045b, 0x1c25f1e3, 0x2e832696, 0x145e0db8, 0x71c6441f, 0x852e2a03, 0x845d50d2, 0x0c9fabc7},
|
||||
{0xb878331a, 0xeccd4f3e, 0x8dc6d26e, 0x7b26b748, 0xd9130cd4, 0xa19b0361, 0x326341ef, 0x2a734ebb},
|
||||
{0x2f4e9212, 0x1c79bd57, 0x3d68f9ae, 0x605b52b6, 0xb8d89d4a, 0x0113eff9, 0xf1ff73b2, 0x1067569a},
|
||||
{0x80928c44, 0x034afc45, 0xf6437da2, 0xb4823532, 0x6dc6e364, 0x5f256a9f, 0xb363ebe8, 0x049ae702},
|
||||
{0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e, 0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0}
|
||||
} };
|
||||
static constexpr storage<limbs_count> m = {0xbe1de925, 0x620703a6, 0x09e880ae, 0x71448520,
|
||||
0x68073014, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x4ffffffb, 0xac96341c, 0x9f60cd29, 0x36fc7695,
|
||||
0x7879462e, 0x666ea36f, 0x9a07df2f, 0xe0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x6db1194e, 0xdc5ba005, 0xe111ec87, 0x90ef5a9,
|
||||
0xaeb85d5d, 0xc8260de4, 0x82c5551c, 0x15ebf951};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x8f703636, 0x23120470, 0xfd736bec, 0x5cea24f6, 0x3fd84104, 0x048b6e19, 0xe131a029, 0x30644e72},
|
||||
{0xc1bd5e80, 0x948dad4a, 0xf8170a0a, 0x52627366, 0x96afef36, 0xec9b9e2f, 0xc8c14f22, 0x2b337de1},
|
||||
{0xe306460b, 0xb11509c6, 0x174efb98, 0x996dfbe1, 0x94dd508c, 0x1c6e4f45, 0x16cbbf4e, 0x21082ca2},
|
||||
{0x3bb512d0, 0x3eed4c53, 0x838eeb1d, 0x9c18d51b, 0x47c0b2a9, 0x9678200d, 0x306b93d2, 0x09c532c6},
|
||||
{0x118f023a, 0xdb94fb05, 0x26e324be, 0x46a6cb24, 0x49bdadf2, 0xc24cdb76, 0x5b080fca, 0x1418144d},
|
||||
{0xba9d1811, 0x9d0e470c, 0xb6f24c79, 0x1dcb5564, 0xe85943e0, 0xdf5ce19c, 0xad310991, 0x16e73dfd},
|
||||
{0x74a57a76, 0xc8936191, 0x6750f230, 0x61794254, 0x9f36ffb0, 0xf086204a, 0xa6148404, 0x07b0c561},
|
||||
{0x470157ce, 0x893a7fa1, 0xfc782d75, 0xe8302a41, 0xdd9b0675, 0xffc02c0e, 0xf6e72f5b, 0x0f1ded1e},
|
||||
{0xbc2e5912, 0x11f995e1, 0xa8d2d7ab, 0x39ba79c0, 0xb08771e3, 0xebbebc2b, 0x7017a420, 0x06fd19c1},
|
||||
{0x769a2ee2, 0xd00a58f9, 0x7494f0ca, 0xb8c12c17, 0xa5355d71, 0xb4027fd7, 0x99c5042b, 0x027a3584},
|
||||
{0x0042d43a, 0x1c477572, 0x6f039bb9, 0x76f169c7, 0xfd5a90a9, 0x01ddd073, 0xde2fd10f, 0x0931d596},
|
||||
{0x9bbdd310, 0x4aa49b8d, 0x8e3a2d76, 0xd31bf3e2, 0x78b2667b, 0x001deac8, 0xb869ae62, 0x006fab49},
|
||||
{0x617c6e85, 0xadaa01c2, 0x7420aae6, 0xb4a93ee1, 0x0ddca8a8, 0x1f4e51b8, 0xcdd9e481, 0x2d965651},
|
||||
{0x4e26ecfb, 0xa93458fd, 0x4115a009, 0x022a2a2d, 0x69ec2bd0, 0x017171fa, 0x5941dc91, 0x2d1ba66f},
|
||||
{0xdaac43b7, 0xd1628ba2, 0xe4347e7d, 0x16c8601d, 0xe081dcff, 0x649abebd, 0x5981ed45, 0x00eeb2cb},
|
||||
{0xce8f58e5, 0x276e5858, 0x5655210e, 0x0512eca9, 0xe70e61f3, 0xc3708cc6, 0xa7d74902, 0x1bf82deb},
|
||||
{0x7dcdc0e0, 0x84c6bfa5, 0x13f4d1bd, 0xc57088ff, 0xb5b95e4d, 0x5c0176fb, 0x3a8d46c1, 0x19ddbcaf},
|
||||
{0x613f6cbd, 0x5c1d597f, 0x8357473a, 0x30525841, 0x968e4915, 0x51829353, 0x844bca52, 0x2260e724},
|
||||
{0x53337857, 0x53422da9, 0xdbed349f, 0xac616632, 0x06d1e303, 0x27508aba, 0x0a0ed063, 0x26125da1},
|
||||
{0xfcd0b523, 0xb2c87885, 0xca5a5ce3, 0x58f50577, 0x8598fc8c, 0x4222150e, 0xae2bdd1a, 0x1ded8980},
|
||||
{0xa219447e, 0xa76dde56, 0x359eebbb, 0xec1a1f05, 0x8be08215, 0xcda0ceb6, 0xb1f8d9a7, 0x1ad92f46},
|
||||
{0xab80c59d, 0xb54d4506, 0x22dd991f, 0x5680c640, 0xbc23a139, 0x6b7bcf70, 0x5ab4c74d, 0x0210fe63},
|
||||
{0xe32b045b, 0x1c25f1e3, 0x2e832696, 0x145e0db8, 0x71c6441f, 0x852e2a03, 0x845d50d2, 0x0c9fabc7},
|
||||
{0xb878331a, 0xeccd4f3e, 0x8dc6d26e, 0x7b26b748, 0xd9130cd4, 0xa19b0361, 0x326341ef, 0x2a734ebb},
|
||||
{0x2f4e9212, 0x1c79bd57, 0x3d68f9ae, 0x605b52b6, 0xb8d89d4a, 0x0113eff9, 0xf1ff73b2, 0x1067569a},
|
||||
{0x80928c44, 0x034afc45, 0xf6437da2, 0xb4823532, 0x6dc6e364, 0x5f256a9f, 0xb363ebe8, 0x049ae702},
|
||||
{0x725b19f0, 0x9bd61b6e, 0x41112ed4, 0x402d111e, 0x8ef62abc, 0x00e0a7eb, 0xa58a7e85, 0x2a3c09f0}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = { {
|
||||
{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000},
|
||||
{0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711},
|
||||
{0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf},
|
||||
{0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136},
|
||||
{0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2},
|
||||
{0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053},
|
||||
{0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327},
|
||||
{0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821},
|
||||
{0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7},
|
||||
{0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21},
|
||||
{0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2},
|
||||
{0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58},
|
||||
{0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f},
|
||||
{0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766},
|
||||
{0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7},
|
||||
{0xd96f0d22, 0x20e75251, 0x6bd4e8c9, 0xc01c7f08, 0xf9dd50c4, 0x37d8b00b, 0xc43ca872, 0x244cf010},
|
||||
{0x66c5174c, 0x7a823174, 0x22d5ad70, 0x7dbe118c, 0x111119c5, 0xf8d7c71d, 0x83780e87, 0x036853f0},
|
||||
{0xca535321, 0xd98f9924, 0xe66e6c81, 0x22dbc0ef, 0x664ae1b7, 0xa15cf806, 0xa314fb67, 0x06e402c0},
|
||||
{0xe26c91f3, 0x0852a8fd, 0x3baca626, 0x521f45cb, 0x2c51bfca, 0xab6473bc, 0x2100895f, 0x100c332d},
|
||||
{0xa376d0f0, 0xf5fac783, 0x940797d3, 0x50fd246e, 0x145f5278, 0xab14ecc1, 0x41091b14, 0x19c6dfb8},
|
||||
{0x7faa1396, 0x43dc52e2, 0x4beced23, 0xd437be9d, 0x6d3c38c3, 0xecc11e9c, 0x0c74a876, 0x2eb58439},
|
||||
{0xd69ca83b, 0x811b03e7, 0xa1a6eadf, 0x126a786b, 0x4e2b8e61, 0x1dd75c9f, 0xbda6792b, 0x2165a1a5},
|
||||
{0x110b737b, 0x02e1d4d1, 0xb323a164, 0x7be1488d, 0x9cd06163, 0xa334d317, 0xdb50e9cd, 0x2710c370},
|
||||
{0x9550fe47, 0x45d2f3cb, 0xf6a8efc4, 0x5f43327b, 0xe993ee18, 0x5bcd0d50, 0xb21de952, 0x27f035bd},
|
||||
{0x232e3983, 0x1d63cbae, 0xaa1b58e2, 0xac815161, 0x6aeb019e, 0x531f42a5, 0x03ca2ef5, 0x2dcd51d9},
|
||||
{0x980db869, 0xa8b64ba8, 0xc9718f6c, 0x4c787f72, 0x15d27ced, 0x7746a25a, 0x435a46e9, 0x110bf78f},
|
||||
{0x9d18157e, 0x72394277, 0xfd399d5d, 0xec9d51f8, 0x49d5387f, 0x6117635d, 0x9c229cd5, 0x01b77519}
|
||||
} };
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0xf0000000, 0x43e1f593, 0x79b97091, 0x2833e848, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72},
|
||||
{0x608fc9cb, 0x20cff123, 0x7c4604a5, 0xcb49c351, 0x41a91758, 0xb3c4d79d, 0x00000000, 0x00000000},
|
||||
{0x07b95a9b, 0x8b11d9ab, 0x41671f56, 0x20710ead, 0x30f81dee, 0xfb3acaee, 0x9778465c, 0x130b1711},
|
||||
{0x373428de, 0xb85a71e6, 0xaeb0337e, 0x74954d30, 0x303402b7, 0x2bfc85eb, 0x409556c0, 0x02e40daf},
|
||||
{0xf210979d, 0x8c99980c, 0x34905b4d, 0xef8f3113, 0xdf25d8e7, 0x0aeaf3e7, 0x03bfbd79, 0x27247136},
|
||||
{0x763d698f, 0x78ce6a0b, 0x1d3213ee, 0xd80396ec, 0x67a8a676, 0x035cdc75, 0xb2a13d3a, 0x26177cf2},
|
||||
{0xc64427d7, 0xdddf985f, 0xa49e95bd, 0xaa4f964a, 0x5def8b04, 0x427c045f, 0x7969b732, 0x1641c053},
|
||||
{0x0329f5d6, 0x692c553d, 0x8712848a, 0xa54cf8c6, 0x38e2b5e6, 0x64751ad9, 0x7422fad3, 0x204bd327},
|
||||
{0xaf6b3e4e, 0x52f26c0f, 0xf0bcc0c8, 0x4c277a07, 0xe4fcfcab, 0x546875d5, 0xaa9995b3, 0x09d8f821},
|
||||
{0xb2e5cc71, 0xcaa2e1e9, 0x6e43404e, 0xed42b68e, 0x7a2c7f0a, 0x6ed80915, 0xde3c86d6, 0x1c4042c7},
|
||||
{0x579d71ae, 0x20a3a65d, 0x0adc4420, 0xfd7efed8, 0xfddabf54, 0x3bb6dcd7, 0xbc73d07b, 0x0fa9bb21},
|
||||
{0xc79e0e57, 0xb6f70f8d, 0xa04e05ac, 0x269d3fde, 0x2ba088d9, 0xcf2e371c, 0x11b88d9c, 0x1af864d2},
|
||||
{0xabd95dc9, 0x3b0b205a, 0x978188ca, 0xc8df74fa, 0x6a1cb6c8, 0x08e124db, 0xbfac6104, 0x1670ed58},
|
||||
{0x641c8410, 0xf8eee934, 0x677771c0, 0xf40976b0, 0x558e6e8c, 0x11680d42, 0x06e7e9e9, 0x281c036f},
|
||||
{0xb2dbc0b4, 0xc92a742f, 0x4d384e68, 0xc3f02842, 0x2fa43d0d, 0x22701b6f, 0xe4590b37, 0x05d33766},
|
||||
{0x02d842d4, 0x922d5ac8, 0xc830e4c6, 0x91126414, 0x082f37e0, 0xe92338c0, 0x7fe704e8, 0x0b5d56b7},
|
||||
{0xd96f0d22, 0x20e75251, 0x6bd4e8c9, 0xc01c7f08, 0xf9dd50c4, 0x37d8b00b, 0xc43ca872, 0x244cf010},
|
||||
{0x66c5174c, 0x7a823174, 0x22d5ad70, 0x7dbe118c, 0x111119c5, 0xf8d7c71d, 0x83780e87, 0x036853f0},
|
||||
{0xca535321, 0xd98f9924, 0xe66e6c81, 0x22dbc0ef, 0x664ae1b7, 0xa15cf806, 0xa314fb67, 0x06e402c0},
|
||||
{0xe26c91f3, 0x0852a8fd, 0x3baca626, 0x521f45cb, 0x2c51bfca, 0xab6473bc, 0x2100895f, 0x100c332d},
|
||||
{0xa376d0f0, 0xf5fac783, 0x940797d3, 0x50fd246e, 0x145f5278, 0xab14ecc1, 0x41091b14, 0x19c6dfb8},
|
||||
{0x7faa1396, 0x43dc52e2, 0x4beced23, 0xd437be9d, 0x6d3c38c3, 0xecc11e9c, 0x0c74a876, 0x2eb58439},
|
||||
{0xd69ca83b, 0x811b03e7, 0xa1a6eadf, 0x126a786b, 0x4e2b8e61, 0x1dd75c9f, 0xbda6792b, 0x2165a1a5},
|
||||
{0x110b737b, 0x02e1d4d1, 0xb323a164, 0x7be1488d, 0x9cd06163, 0xa334d317, 0xdb50e9cd, 0x2710c370},
|
||||
{0x9550fe47, 0x45d2f3cb, 0xf6a8efc4, 0x5f43327b, 0xe993ee18, 0x5bcd0d50, 0xb21de952, 0x27f035bd},
|
||||
{0x232e3983, 0x1d63cbae, 0xaa1b58e2, 0xac815161, 0x6aeb019e, 0x531f42a5, 0x03ca2ef5, 0x2dcd51d9},
|
||||
{0x980db869, 0xa8b64ba8, 0xc9718f6c, 0x4c787f72, 0x15d27ced, 0x7746a25a, 0x435a46e9, 0x110bf78f},
|
||||
{0x9d18157e, 0x72394277, 0xfd399d5d, 0xec9d51f8, 0x49d5387f, 0x6117635d, 0x9c229cd5, 0x01b77519}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = { {
|
||||
{0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739},
|
||||
{0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6},
|
||||
{0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4},
|
||||
{0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b},
|
||||
{0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff},
|
||||
{0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39},
|
||||
{0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5},
|
||||
{0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24},
|
||||
{0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b},
|
||||
{0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f},
|
||||
{0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9},
|
||||
{0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d},
|
||||
{0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50},
|
||||
{0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1},
|
||||
{0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa},
|
||||
{0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e},
|
||||
{0xf5360801, 0x8b9953a2, 0x859533b4, 0x7c05542e, 0x5ea6179c, 0xe83b698e, 0xb9f82f90, 0x30643640},
|
||||
{0x729b0401, 0xe7bda49b, 0x7fa75222, 0xd21c9e3b, 0x7013b7fc, 0x5045d7a2, 0xcd94e7dd, 0x30644259},
|
||||
{0xb14d8201, 0x15cfcd17, 0xfcb0615a, 0xfd284341, 0x78ca882c, 0x844b0eac, 0x57634403, 0x30644866},
|
||||
{0xd0a6c101, 0xacd8e155, 0x3b34e8f5, 0x12ae15c5, 0x7d25f045, 0x9e4daa31, 0x9c4a7216, 0x30644b6c},
|
||||
{0xe0536081, 0x785d6b74, 0xda772cc3, 0x1d70ff06, 0xff53a451, 0x2b4ef7f3, 0xbebe0920, 0x30644cef},
|
||||
{0x6829b041, 0x5e1fb084, 0xaa184eaa, 0x22d273a7, 0x406a7e57, 0xf1cf9ed5, 0x4ff7d4a4, 0x30644db1},
|
||||
{0x2c14d821, 0xd100d30c, 0x11e8df9d, 0x25832df8, 0xe0f5eb5a, 0x550ff245, 0x1894ba67, 0x30644e12},
|
||||
{0x0e0a6c11, 0x8a716450, 0x45d12817, 0xa6db8b20, 0x313ba1db, 0x86b01bfe, 0x7ce32d48, 0x30644e42},
|
||||
{0xff053609, 0x6729acf1, 0x5fc54c54, 0x6787b9b4, 0x595e7d1c, 0x1f8030da, 0xaf0a66b9, 0x30644e5a},
|
||||
{0xf7829b05, 0xd585d142, 0x6cbf5e72, 0xc7ddd0fe, 0x6d6feabc, 0x6be83b48, 0xc81e0371, 0x30644e66},
|
||||
{0x73c14d83, 0x0cb3e36b, 0x733c6782, 0xf808dca3, 0x7778a18c, 0x921c407f, 0xd4a7d1cd, 0x30644e6c},
|
||||
{0xb1e0a6c2, 0xa84aec7f, 0xf67aec09, 0x101e6275, 0xfc7cfcf5, 0xa536431a, 0xdaecb8fb, 0x30644e6f}
|
||||
} };
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0xf8000001, 0xa1f0fac9, 0x3cdcb848, 0x9419f424, 0x40c0ac2e, 0xdc2822db, 0x7098d014, 0x18322739},
|
||||
{0xf4000001, 0xf2e9782e, 0x5b4b146c, 0xde26ee36, 0xe1210245, 0x4a3c3448, 0x28e5381f, 0x244b3ad6},
|
||||
{0x72000001, 0x1b65b6e1, 0x6a82427f, 0x832d6b3f, 0xb1512d51, 0x81463cff, 0x850b6c24, 0x2a57c4a4},
|
||||
{0xb1000001, 0x2fa3d63a, 0xf21dd988, 0x55b0a9c3, 0x196942d7, 0x1ccb415b, 0xb31e8627, 0x2d5e098b},
|
||||
{0x50800001, 0xb9c2e5e7, 0x35eba50c, 0x3ef24906, 0xcd754d9a, 0x6a8dc388, 0x4a281328, 0x2ee12bff},
|
||||
{0xa0400001, 0xfed26dbd, 0x57d28ace, 0xb39318a7, 0xa77b52fb, 0x116f049f, 0x15acd9a9, 0x2fa2bd39},
|
||||
{0xc8200001, 0x215a31a8, 0xe8c5fdb0, 0x6de38077, 0x147e55ac, 0x64dfa52b, 0xfb6f3ce9, 0x300385d5},
|
||||
{0x5c100001, 0xb29e139e, 0x313fb720, 0xcb0bb460, 0xcaffd704, 0x8e97f570, 0x6e506e89, 0x3033ea24},
|
||||
{0x26080001, 0xfb400499, 0x557c93d8, 0xf99fce54, 0xa64097b0, 0xa3741d93, 0xa7c10759, 0x304c1c4b},
|
||||
{0x8b040001, 0x1f90fd16, 0x679b0235, 0x10e9db4e, 0x13e0f807, 0xade231a5, 0x447953c1, 0x3058355f},
|
||||
{0x3d820001, 0x31b97955, 0x70aa3963, 0x1c8ee1cb, 0xcab12832, 0xb3193bad, 0x12d579f5, 0x305e41e9},
|
||||
{0x96c10001, 0x3acdb774, 0xf531d4fa, 0xa2616509, 0x26194047, 0xb5b4c0b2, 0xfa038d0f, 0x3061482d},
|
||||
{0x43608001, 0xbf57d684, 0x3775a2c5, 0x654aa6a9, 0x53cd4c52, 0xb7028334, 0x6d9a969c, 0x3062cb50},
|
||||
{0x19b04001, 0x819ce60c, 0xd89789ab, 0xc6bf4778, 0x6aa75257, 0x37a96475, 0xa7661b63, 0x30638ce1},
|
||||
{0x04d82001, 0x62bf6dd0, 0xa9287d1e, 0x777997e0, 0xf614555a, 0x77fcd515, 0x444bddc6, 0x3063edaa},
|
||||
{0xfa6c1001, 0xd350b1b1, 0x9170f6d7, 0xcfd6c014, 0x3bcad6db, 0x18268d66, 0x92bebef8, 0x30641e0e},
|
||||
{0xf5360801, 0x8b9953a2, 0x859533b4, 0x7c05542e, 0x5ea6179c, 0xe83b698e, 0xb9f82f90, 0x30643640},
|
||||
{0x729b0401, 0xe7bda49b, 0x7fa75222, 0xd21c9e3b, 0x7013b7fc, 0x5045d7a2, 0xcd94e7dd, 0x30644259},
|
||||
{0xb14d8201, 0x15cfcd17, 0xfcb0615a, 0xfd284341, 0x78ca882c, 0x844b0eac, 0x57634403, 0x30644866},
|
||||
{0xd0a6c101, 0xacd8e155, 0x3b34e8f5, 0x12ae15c5, 0x7d25f045, 0x9e4daa31, 0x9c4a7216, 0x30644b6c},
|
||||
{0xe0536081, 0x785d6b74, 0xda772cc3, 0x1d70ff06, 0xff53a451, 0x2b4ef7f3, 0xbebe0920, 0x30644cef},
|
||||
{0x6829b041, 0x5e1fb084, 0xaa184eaa, 0x22d273a7, 0x406a7e57, 0xf1cf9ed5, 0x4ff7d4a4, 0x30644db1},
|
||||
{0x2c14d821, 0xd100d30c, 0x11e8df9d, 0x25832df8, 0xe0f5eb5a, 0x550ff245, 0x1894ba67, 0x30644e12},
|
||||
{0x0e0a6c11, 0x8a716450, 0x45d12817, 0xa6db8b20, 0x313ba1db, 0x86b01bfe, 0x7ce32d48, 0x30644e42},
|
||||
{0xff053609, 0x6729acf1, 0x5fc54c54, 0x6787b9b4, 0x595e7d1c, 0x1f8030da, 0xaf0a66b9, 0x30644e5a},
|
||||
{0xf7829b05, 0xd585d142, 0x6cbf5e72, 0xc7ddd0fe, 0x6d6feabc, 0x6be83b48, 0xc81e0371, 0x30644e66},
|
||||
{0x73c14d83, 0x0cb3e36b, 0x733c6782, 0xf808dca3, 0x7778a18c, 0x921c407f, 0xd4a7d1cd, 0x30644e6c},
|
||||
{0xb1e0a6c2, 0xa84aec7f, 0xf67aec09, 0x101e6275, 0xfc7cfcf5, 0xa536431a, 0xdaecb8fb, 0x30644e6f}}};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522, 0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45, 0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2*limbs_count> modulus_wide = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2*limbs_count> modulus_squared = {0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95, 0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_2 = {0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a, 0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2*limbs_count> modulus_squared_4 = {0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55, 0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17, 0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0xa78eb28, 0x7879462c, 0x666ea36f, 0x9a07df2f, 0xe0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x14afa37, 0xed84884a, 0x278edf8, 0xeb202285, 0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522,
|
||||
0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45,
|
||||
0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95,
|
||||
0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a,
|
||||
0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55,
|
||||
0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17,
|
||||
0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0xa78eb28,
|
||||
0x7879462c, 0x666ea36f, 0x9a07df2f, 0xe0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x14afa37, 0xed84884a, 0x278edf8, 0xeb202285,
|
||||
0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
|
||||
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4, 0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933, 0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769, 0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133, 0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4,
|
||||
0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
|
||||
static constexpr storage<limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933,
|
||||
0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
|
||||
static constexpr storage<limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769,
|
||||
0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
|
||||
static constexpr storage<limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133,
|
||||
0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
|
||||
};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {0x24a138e5, 0x3267e6dc, 0x59dbefa3, 0xb5b4c5e5, 0x1be06ac3, 0x81be1899, 0xceb8aaae, 0x2b149d40};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {0x85c315d2, 0xe4a2bd06, 0xe52d1852, 0xa74fa084, 0xeed8fdf4, 0xcd2cafad, 0x3af0fed4, 0x009713b0};
|
||||
}
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x24a138e5, 0x3267e6dc, 0x59dbefa3, 0xb5b4c5e5, 0x1be06ac3, 0x81be1899, 0xceb8aaae, 0x2b149d40};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x85c315d2, 0xe4a2bd06, 0xe52d1852, 0xa74fa084, 0xeed8fdf4, 0xcd2cafad, 0x3af0fed4, 0x009713b0};
|
||||
} // namespace PARAMS_BN254
|
||||
|
||||
@@ -1,68 +1,60 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" BN254::projective_t random_projective_bn254()
|
||||
{
|
||||
return BN254::projective_t::rand_host();
|
||||
}
|
||||
extern "C" BN254::projective_t random_projective_bn254() { return BN254::projective_t::rand_host(); }
|
||||
|
||||
extern "C" BN254::projective_t projective_zero_bn254()
|
||||
{
|
||||
return BN254::projective_t::zero();
|
||||
}
|
||||
extern "C" BN254::projective_t projective_zero_bn254() { return BN254::projective_t::zero(); }
|
||||
|
||||
extern "C" bool projective_is_on_curve_bn254(BN254::projective_t *point1)
|
||||
extern "C" bool projective_is_on_curve_bn254(BN254::projective_t* point1)
|
||||
{
|
||||
return BN254::projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
extern "C" BN254::affine_t projective_to_affine_bn254(BN254::projective_t *point1)
|
||||
extern "C" BN254::affine_t projective_to_affine_bn254(BN254::projective_t* point1)
|
||||
{
|
||||
return BN254::projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" BN254::projective_t projective_from_affine_bn254(BN254::affine_t *point1)
|
||||
extern "C" BN254::projective_t projective_from_affine_bn254(BN254::affine_t* point1)
|
||||
{
|
||||
return BN254::projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" BN254::scalar_field_t random_scalar_bn254()
|
||||
{
|
||||
return BN254::scalar_field_t::rand_host();
|
||||
}
|
||||
extern "C" BN254::scalar_field_t random_scalar_bn254() { return BN254::scalar_field_t::rand_host(); }
|
||||
|
||||
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
|
||||
extern "C" bool eq_bn254(BN254::projective_t* point1, BN254::projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) &&
|
||||
(point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) &&
|
||||
(point2->z == BN254::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t* point1, BN254::g2_projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) &&
|
||||
(point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) &&
|
||||
(point2->z == BN254::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" BN254::g2_projective_t random_g2_projective_bn254()
|
||||
{
|
||||
return BN254::g2_projective_t::rand_host();
|
||||
}
|
||||
extern "C" BN254::g2_projective_t random_g2_projective_bn254() { return BN254::g2_projective_t::rand_host(); }
|
||||
|
||||
extern "C" BN254::g2_affine_t g2_projective_to_affine_bn254(BN254::g2_projective_t *point1)
|
||||
extern "C" BN254::g2_affine_t g2_projective_to_affine_bn254(BN254::g2_projective_t* point1)
|
||||
{
|
||||
return BN254::g2_projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" BN254::g2_projective_t g2_projective_from_affine_bn254(BN254::g2_affine_t *point1)
|
||||
extern "C" BN254::g2_projective_t g2_projective_from_affine_bn254(BN254::g2_affine_t* point1)
|
||||
{
|
||||
return BN254::g2_projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" bool g2_projective_is_on_curve_bn254(BN254::g2_projective_t *point1)
|
||||
extern "C" bool g2_projective_is_on_curve_bn254(BN254::g2_projective_t* point1)
|
||||
{
|
||||
return BN254::g2_projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "projective.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
@@ -1,88 +1,70 @@
|
||||
#ifndef _BN254_VEC_MULT
|
||||
#define _BN254_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
|
||||
BN254::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_point_bn254(
|
||||
BN254::projective_t* inout, BN254::scalar_t* scalar_vec, size_t n_elments, size_t device_id, cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_bn254(BN254::scalar_t *inout,
|
||||
BN254::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_scalar_bn254(
|
||||
BN254::scalar_t* inout, BN254::scalar_t* scalar_vec, size_t n_elments, size_t device_id, cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_device_scalar_bn254(
|
||||
BN254::scalar_t *inout,
|
||||
BN254::scalar_t *scalar_vec,
|
||||
size_t n_elements,
|
||||
size_t device_id
|
||||
) {
|
||||
BN254::scalar_t* inout, BN254::scalar_t* scalar_vec, size_t n_elements, size_t device_id)
|
||||
{
|
||||
try {
|
||||
vector_mod_mult_device<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elements);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error &ex) {
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_bn254(BN254::scalar_t *matrix_flattened,
|
||||
BN254::scalar_t *input,
|
||||
BN254::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t matrix_vec_mod_mult_bn254(
|
||||
BN254::scalar_t* matrix_flattened,
|
||||
BN254::scalar_t* input,
|
||||
BN254::scalar_t* output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#if defined(G2_DEFINED)
|
||||
#include "../../primitives/extension_field.cuh"
|
||||
#endif
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace ${CURVE_NAME_U} {
|
||||
typedef Field<PARAMS_${CURVE_NAME_U}::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_${CURVE_NAME_U}::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b };
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_${CURVE_NAME_U}::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_re },
|
||||
point_field_t{ PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_im }};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
25
icicle/curves/curve_template/curve_config.cuh.tmpl
Normal file
25
icicle/curves/curve_template/curve_config.cuh.tmpl
Normal file
@@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#if defined(G2_DEFINED)
|
||||
#include "../../primitives/extension_field.cuh"
|
||||
#endif
|
||||
|
||||
#include "params.cuh"
|
||||
|
||||
namespace ${CURVE_NAME_U} {
|
||||
typedef Field<PARAMS_${CURVE_NAME_U}::fp_config> scalar_field_t;
|
||||
typedef scalar_field_t scalar_t;
|
||||
typedef Field<PARAMS_${CURVE_NAME_U}::fq_config> point_field_t;
|
||||
static constexpr point_field_t b = point_field_t{PARAMS_${CURVE_NAME_U}::weierstrass_b};
|
||||
typedef Projective<point_field_t, scalar_field_t, b> projective_t;
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
#if defined(G2_DEFINED)
|
||||
typedef ExtensionField<PARAMS_${CURVE_NAME_U}::fq_config> g2_point_field_t;
|
||||
static constexpr g2_point_field_t b_g2 = g2_point_field_t{
|
||||
point_field_t{PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_re}, point_field_t{PARAMS_${CURVE_NAME_U}::weierstrass_b_g2_im}};
|
||||
typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
|
||||
typedef Affine<g2_point_field_t> g2_affine_t;
|
||||
#endif
|
||||
}
|
||||
@@ -1,567 +0,0 @@
|
||||
#ifndef _${CURVE_NAME_U}_LDE
|
||||
#define _${CURVE_NAME_U}_LDE
|
||||
#include <cuda.h>
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../utils/mont.cuh"
|
||||
|
||||
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::scalar_t* build_domain_cuda_${CURVE_NAME_L}(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega(logn), stream);
|
||||
}
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *arr, uint32_t n, bool inverse, Decimation decimation, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<${CURVE_NAME_U}::scalar_t,${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *arr, uint32_t n, bool inverse, Decimation decimation, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<${CURVE_NAME_U}::projective_t,${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<${CURVE_NAME_U}::scalar_t,${CURVE_NAME_U}::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<${CURVE_NAME_U}::projective_t,${CURVE_NAME_U}::scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_evaluations, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_evaluations, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_evaluations, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned n, ${CURVE_NAME_U}::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_evaluations, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned n,
|
||||
unsigned batch_size, ${CURVE_NAME_U}::scalar_t* coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_evaluations, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_evaluations, ${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain,
|
||||
unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, ${CURVE_NAME_U}::scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t *d_coefficients, ${CURVE_NAME_U}::scalar_t *d_domain, unsigned domain_size,
|
||||
unsigned n, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::projective_t* d_coefficients, ${CURVE_NAME_U}::scalar_t* d_domain, unsigned domain_size,
|
||||
unsigned n, unsigned batch_size, ${CURVE_NAME_U}::scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_inplace_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_inout, ${CURVE_NAME_U}::scalar_t* d_twiddles,
|
||||
unsigned n, unsigned batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
|
||||
cudaStreamCreate(&stream);
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, false, _null, stream, true);
|
||||
return CUDA_SUCCESS; //TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_inplace_coset_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_inout, ${CURVE_NAME_U}::scalar_t* d_twiddles,
|
||||
unsigned n, unsigned batch_size, bool inverse, bool is_coset, ${CURVE_NAME_U}::scalar_t* coset, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, is_coset, coset, stream, true);
|
||||
return CUDA_SUCCESS; //TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int sub_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_in1, ${CURVE_NAME_U}::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return sub_polys(d_out, d_in1, d_in2, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int add_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_in1, ${CURVE_NAME_U}::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return add_polys(d_out, d_in1, d_in2, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery(d_inout, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery(d_inout, n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_proj_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 3 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_proj_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 3 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_aff_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 2 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_aff_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 2 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" int to_montgomery_proj_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 6 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_proj_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 6 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_aff_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 4 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_aff_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 4 * n, stream);
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" int reverse_order_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
592
icicle/curves/curve_template/lde.cu.tmpl
Normal file
592
icicle/curves/curve_template/lde.cu.tmpl
Normal file
@@ -0,0 +1,592 @@
|
||||
#ifndef _${CURVE_NAME_U}_LDE
|
||||
#define _${CURVE_NAME_U}_LDE
|
||||
#include "../../appUtils/ntt/lde.cu"
|
||||
#include "../../appUtils/ntt/ntt.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../utils/mont.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::scalar_t* build_domain_cuda_${CURVE_NAME_L}(
|
||||
uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
if (inverse) {
|
||||
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega_inv(logn), stream);
|
||||
} else {
|
||||
return fill_twiddle_factors_array(domain_size, ${CURVE_NAME_U}::scalar_t::omega(logn), stream);
|
||||
}
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* arr, uint32_t n, bool inverse, Decimation decimation, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* arr,
|
||||
uint32_t n,
|
||||
bool inverse,
|
||||
Decimation decimation,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_template<${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* arr,
|
||||
uint32_t arr_size,
|
||||
uint32_t batch_size,
|
||||
bool inverse,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::scalar_t>(
|
||||
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ecntt_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* arr,
|
||||
uint32_t arr_size,
|
||||
uint32_t batch_size,
|
||||
bool inverse,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return ntt_end2end_batch_template<${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::scalar_t>(
|
||||
arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_evaluations,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_evaluations,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_on_coset_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_evaluations,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
${CURVE_NAME_U}::scalar_t* coset_powers,
|
||||
unsigned device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_scalars_batch_on_coset_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_evaluations,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
${CURVE_NAME_U}::scalar_t* coset_powers,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::projective_t* d_evaluations,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
return interpolate(d_out, d_evaluations, d_domain, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int interpolate_points_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::projective_t* d_evaluations,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::projective_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::projective_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
${CURVE_NAME_U}::scalar_t* coset_powers,
|
||||
unsigned device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_scalars_on_coset_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
${CURVE_NAME_U}::scalar_t* coset_powers,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::projective_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
${CURVE_NAME_U}::scalar_t* coset_powers,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int evaluate_points_on_coset_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::projective_t* d_coefficients,
|
||||
${CURVE_NAME_U}::scalar_t* d_domain,
|
||||
unsigned domain_size,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
${CURVE_NAME_U}::scalar_t* coset_powers,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_inplace_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_inout,
|
||||
${CURVE_NAME_U}::scalar_t* d_twiddles,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
bool inverse,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
${CURVE_NAME_U}::scalar_t* _null = nullptr;
|
||||
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, false, _null, stream, true);
|
||||
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int ntt_inplace_coset_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_inout,
|
||||
${CURVE_NAME_U}::scalar_t* d_twiddles,
|
||||
unsigned n,
|
||||
unsigned batch_size,
|
||||
bool inverse,
|
||||
bool is_coset,
|
||||
${CURVE_NAME_U}::scalar_t* coset,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
ntt_inplace_batch_template(d_inout, d_twiddles, n, batch_size, inverse, is_coset, coset, stream, true);
|
||||
return CUDA_SUCCESS; // TODO: we should implement this https://leimao.github.io/blog/Proper-CUDA-Error-Checking/
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int sub_scalars_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_in1, ${CURVE_NAME_U}::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return sub_polys(d_out, d_in1, d_in2, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int add_scalars_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* d_out, ${CURVE_NAME_U}::scalar_t* d_in1, ${CURVE_NAME_U}::scalar_t* d_in2, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return add_polys(d_out, d_in1, d_in2, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery(d_inout, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery(d_inout, n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_proj_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 3 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_proj_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 3 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_aff_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 2 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int from_montgomery_aff_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 2 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" int
|
||||
to_montgomery_proj_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 6 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
from_montgomery_proj_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 6 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int to_montgomery_aff_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return to_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 4 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
from_montgomery_aff_points_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_affine_t* d_inout, unsigned n, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
return from_montgomery((${CURVE_NAME_U}::point_field_t*)d_inout, 4 * n, stream);
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
extern "C" int
|
||||
reverse_order_scalars_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_scalars_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int
|
||||
reverse_order_points_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order(arr, n, logn, stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int reverse_order_points_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
uint32_t logn = uint32_t(log(n) / log(2));
|
||||
cudaStreamCreate(&stream);
|
||||
reverse_order_batch(arr, n, logn, batch_size, stream);
|
||||
return 0;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -1,186 +0,0 @@
|
||||
#ifndef _${CURVE_NAME_U}_MSM
|
||||
#define _${CURVE_NAME_U}_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include <stdexcept>
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
|
||||
|
||||
extern "C"
|
||||
int msm_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *out, ${CURVE_NAME_U}::affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* out, ${CURVE_NAME_U}::affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C"
|
||||
int msm_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *out, ${CURVE_NAME_U}::g2_affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[], size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::g2_projective_t, ${CURVE_NAME_U}::g2_affine_t>(scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* out, ${CURVE_NAME_U}::g2_affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::g2_projective_t, ${CURVE_NAME_U}::g2_affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM in G2 group.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut G2 point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points G2 affine points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C"
|
||||
int commit_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::g2_affine_t* d_points, size_t count, unsigned large_bucket_factor, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C"
|
||||
int commit_batch_g2_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* d_out, ${CURVE_NAME_U}::scalar_t* d_scalars, ${CURVE_NAME_U}::g2_affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
216
icicle/curves/curve_template/msm.cu.tmpl
Normal file
216
icicle/curves/curve_template/msm.cu.tmpl
Normal file
@@ -0,0 +1,216 @@
|
||||
#ifndef _${CURVE_NAME_U}_MSM
|
||||
#define _${CURVE_NAME_U}_MSM
|
||||
#include "../../appUtils/msm/msm.cu"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
#include <stdexcept>
|
||||
|
||||
extern "C" int msm_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* out,
|
||||
${CURVE_NAME_U}::affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* out,
|
||||
${CURVE_NAME_U}::affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points Points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C" int commit_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_scalars,
|
||||
${CURVE_NAME_U}::affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C" int commit_batch_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_scalars,
|
||||
${CURVE_NAME_U}::affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" int msm_g2_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::g2_projective_t* out,
|
||||
${CURVE_NAME_U}::g2_affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[],
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::g2_projective_t, ${CURVE_NAME_U}::g2_affine_t>(
|
||||
scalars, points, count, out, false, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int msm_batch_g2_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::g2_projective_t* out,
|
||||
${CURVE_NAME_U}::g2_affine_t points[],
|
||||
${CURVE_NAME_U}::scalar_t scalars[],
|
||||
size_t batch_size,
|
||||
size_t msm_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::g2_projective_t, ${CURVE_NAME_U}::g2_affine_t>(
|
||||
scalars, points, batch_size, msm_size, out, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a polynomial using the MSM in G2 group.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the result to.
|
||||
* @param d_scalars Scalars for the MSM. Must be on device.
|
||||
* @param d_points G2 affine points for the MSM. Must be on device.
|
||||
* @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
|
||||
*/
|
||||
extern "C" int commit_g2_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::g2_projective_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_scalars,
|
||||
${CURVE_NAME_U}::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
unsigned large_bucket_factor,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, large_bucket_factor, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Commit to a batch of polynomials using the MSM.
|
||||
* Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or
|
||||
* points.
|
||||
* @param d_out Ouptut G2 point to write the results to.
|
||||
* @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
|
||||
* @param d_points G2 affine points for the MSMs. Must be on device. It is assumed that this set of bases is used for
|
||||
* each MSM.
|
||||
* @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
|
||||
* @param batch_size Size of the batch.
|
||||
*/
|
||||
extern "C" int commit_batch_g2_cuda_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::g2_projective_t* d_out,
|
||||
${CURVE_NAME_U}::scalar_t* d_scalars,
|
||||
${CURVE_NAME_U}::g2_affine_t* d_points,
|
||||
size_t count,
|
||||
size_t batch_size,
|
||||
size_t device_id = 0,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try {
|
||||
cudaStreamCreate(&stream);
|
||||
batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what());
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
@@ -6,7 +6,7 @@ namespace PARAMS_${curve_name_U} {
|
||||
static constexpr unsigned limbs_count = ${fp_num_limbs};
|
||||
static constexpr unsigned omegas_count = ${num_omegas};
|
||||
static constexpr unsigned modulus_bit_count = ${fp_modulus_bit_count};
|
||||
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {${fp_modulus}};
|
||||
static constexpr storage<limbs_count> modulus_2 = {${fp_modulus_2}};
|
||||
static constexpr storage<limbs_count> modulus_4 = {${fp_modulus_4}};
|
||||
@@ -1,70 +0,0 @@
|
||||
#include <cuda.h>
|
||||
#include "curve_config.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::projective_t random_projective_${CURVE_NAME_L}()
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::rand_host();
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::projective_t projective_zero_${CURVE_NAME_L}()
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::zero();
|
||||
}
|
||||
|
||||
extern "C" bool projective_is_on_curve_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::affine_t projective_to_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::projective_t projective_from_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::affine_t *point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::scalar_field_t random_scalar_${CURVE_NAME_L}()
|
||||
{
|
||||
return ${CURVE_NAME_U}::scalar_field_t::rand_host();
|
||||
}
|
||||
|
||||
extern "C" bool eq_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *point1, ${CURVE_NAME_U}::projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::point_field_t::zero())) &&
|
||||
!((point2->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *point1, ${CURVE_NAME_U}::g2_projective_t *point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::g2_point_field_t::zero())) &&
|
||||
!((point2->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::g2_projective_t random_g2_projective_${CURVE_NAME_L}()
|
||||
{
|
||||
return ${CURVE_NAME_U}::g2_projective_t::rand_host();
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::g2_affine_t g2_projective_to_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::g2_projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::g2_projective_t g2_projective_from_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_affine_t *point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::g2_projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" bool g2_projective_is_on_curve_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::g2_projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
#endif
|
||||
62
icicle/curves/curve_template/projective.cu.tmpl
Normal file
62
icicle/curves/curve_template/projective.cu.tmpl
Normal file
@@ -0,0 +1,62 @@
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::projective_t random_projective_${CURVE_NAME_L}() { return ${CURVE_NAME_U}::projective_t::rand_host(); }
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::projective_t projective_zero_${CURVE_NAME_L}() { return ${CURVE_NAME_U}::projective_t::zero(); }
|
||||
|
||||
extern "C" bool projective_is_on_curve_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::affine_t projective_to_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::projective_t projective_from_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::affine_t* point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::scalar_field_t random_scalar_${CURVE_NAME_L}() { return ${CURVE_NAME_U}::scalar_field_t::rand_host(); }
|
||||
|
||||
extern "C" bool eq_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* point1, ${CURVE_NAME_U}::projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::point_field_t::zero()) &&
|
||||
(point1->z == ${CURVE_NAME_U}::point_field_t::zero())) &&
|
||||
!((point2->x == ${CURVE_NAME_U}::point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::point_field_t::zero()) &&
|
||||
(point2->z == ${CURVE_NAME_U}::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* point1, ${CURVE_NAME_U}::g2_projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) &&
|
||||
(point1->z == ${CURVE_NAME_U}::g2_point_field_t::zero())) &&
|
||||
!((point2->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) &&
|
||||
(point2->z == ${CURVE_NAME_U}::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::g2_projective_t random_g2_projective_${CURVE_NAME_L}() { return ${CURVE_NAME_U}::g2_projective_t::rand_host(); }
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::g2_affine_t g2_projective_to_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::g2_projective_t::to_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" ${CURVE_NAME_U}::g2_projective_t g2_projective_from_affine_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_affine_t* point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::g2_projective_t::from_affine(*point1);
|
||||
}
|
||||
|
||||
extern "C" bool g2_projective_is_on_curve_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t* point1)
|
||||
{
|
||||
return ${CURVE_NAME_U}::g2_projective_t::is_on_curve(*point1);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "projective.cu"
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "projective.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
@@ -1,88 +1,70 @@
|
||||
#ifndef _${CURVE_NAME_U}_VEC_MULT
|
||||
#define _${CURVE_NAME_U}_VEC_MULT
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../utils/storage.cuh"
|
||||
#include "curve_config.cuh"
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
|
||||
|
||||
extern "C" int32_t vec_mod_mult_point_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *inout,
|
||||
${CURVE_NAME_U}::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_point_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::projective_t* inout, ${CURVE_NAME_U}::scalar_t* scalar_vec, size_t n_elments, size_t device_id, cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_scalar_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *inout,
|
||||
${CURVE_NAME_U}::scalar_t *scalar_vec,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t vec_mod_mult_scalar_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* inout, ${CURVE_NAME_U}::scalar_t* scalar_vec, size_t n_elments, size_t device_id, cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
vector_mod_mult<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::scalar_t>(scalar_vec, inout, inout, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t vec_mod_mult_device_scalar_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t *inout,
|
||||
${CURVE_NAME_U}::scalar_t *scalar_vec,
|
||||
size_t n_elements,
|
||||
size_t device_id
|
||||
) {
|
||||
${CURVE_NAME_U}::scalar_t* inout, ${CURVE_NAME_U}::scalar_t* scalar_vec, size_t n_elements, size_t device_id)
|
||||
{
|
||||
try {
|
||||
vector_mod_mult_device<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::scalar_t>(scalar_vec, inout, inout, n_elements);
|
||||
return CUDA_SUCCESS;
|
||||
} catch (const std::runtime_error &ex) {
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" int32_t matrix_vec_mod_mult_${CURVE_NAME_L}(${CURVE_NAME_U}::scalar_t *matrix_flattened,
|
||||
${CURVE_NAME_U}::scalar_t *input,
|
||||
${CURVE_NAME_U}::scalar_t *output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
extern "C" int32_t matrix_vec_mod_mult_${CURVE_NAME_L}(
|
||||
${CURVE_NAME_U}::scalar_t* matrix_flattened,
|
||||
${CURVE_NAME_U}::scalar_t* input,
|
||||
${CURVE_NAME_U}::scalar_t* output,
|
||||
size_t n_elments,
|
||||
size_t device_id,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
// TODO: use device_id when working with multiple devices
|
||||
(void)device_id;
|
||||
try
|
||||
{
|
||||
try {
|
||||
// TODO: device_id
|
||||
matrix_mod_mult<${CURVE_NAME_U}::scalar_t>(matrix_flattened, input, output, n_elments, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
{
|
||||
} catch (const std::runtime_error& ex) {
|
||||
printf("error %s", ex.what()); // TODO: error code and message
|
||||
return -1;
|
||||
}
|
||||
@@ -1,3 +1,3 @@
|
||||
#include "bls12_381/supported_operations.cu"
|
||||
#include "bls12_377/supported_operations.cu"
|
||||
#include "bls12_381/supported_operations.cu"
|
||||
#include "bn254/supported_operations.cu"
|
||||
@@ -3,21 +3,22 @@
|
||||
#include "field.cuh"
|
||||
|
||||
template <class FF>
|
||||
class Affine {
|
||||
public:
|
||||
FF x;
|
||||
FF y;
|
||||
class Affine
|
||||
{
|
||||
public:
|
||||
FF x;
|
||||
FF y;
|
||||
|
||||
static HOST_DEVICE_INLINE Affine neg(const Affine &point) {
|
||||
return {point.x, FF::neg(point.y)};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Affine neg(const Affine& point) { return {point.x, FF::neg(point.y)}; }
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Affine& xs, const Affine& ys) {
|
||||
return (xs.x == ys.x) && (xs.y == ys.y);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Affine& xs, const Affine& ys)
|
||||
{
|
||||
return (xs.x == ys.x) && (xs.y == ys.y);
|
||||
}
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Affine& point) {
|
||||
os << "x: " << point.x << "; y: " << point.y;
|
||||
return os;
|
||||
}
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Affine& point)
|
||||
{
|
||||
os << "x: " << point.x << "; y: " << point.y;
|
||||
return os;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -2,143 +2,157 @@
|
||||
|
||||
#include "field.cuh"
|
||||
|
||||
#define HOST_INLINE __host__ __forceinline__
|
||||
#define DEVICE_INLINE __device__ __forceinline__
|
||||
#define HOST_INLINE __host__ __forceinline__
|
||||
#define DEVICE_INLINE __device__ __forceinline__
|
||||
#define HOST_DEVICE_INLINE __host__ __device__ __forceinline__
|
||||
|
||||
template <typename CONFIG> class ExtensionField {
|
||||
private:
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
template <typename CONFIG>
|
||||
class ExtensionField
|
||||
{
|
||||
private:
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
FWide imaginary;
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys) {
|
||||
return ExtensionWide { xs.real + ys.real, xs.imaginary + ys.imaginary };
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys) {
|
||||
return ExtensionWide { xs.real - ys.real, xs.imaginary - ys.imaginary };
|
||||
}
|
||||
};
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
FWide imaginary;
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
FF imaginary;
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField zero() {
|
||||
return ExtensionField { FF::zero(), FF::zero() };
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys)
|
||||
{
|
||||
return ExtensionWide{xs.real + ys.real, xs.imaginary + ys.imaginary};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField one() {
|
||||
return ExtensionField { FF::one(), FF::zero() };
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys)
|
||||
{
|
||||
return ExtensionWide{xs.real - ys.real, xs.imaginary - ys.imaginary};
|
||||
}
|
||||
};
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_x() {
|
||||
return ExtensionField { FF { CONFIG::g2_gen_x_re }, FF { CONFIG::g2_gen_x_im } };
|
||||
}
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_y() {
|
||||
return ExtensionField { FF { CONFIG::g2_gen_y_re }, FF { CONFIG::g2_gen_y_im } };
|
||||
}
|
||||
FF real;
|
||||
FF imaginary;
|
||||
|
||||
static HOST_INLINE ExtensionField rand_host() {
|
||||
return ExtensionField { FF::rand_host(), FF::rand_host() };
|
||||
}
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField zero() { return ExtensionField{FF::zero(), FF::zero()}; }
|
||||
|
||||
template <unsigned REDUCTION_SIZE = 1> static constexpr HOST_DEVICE_INLINE ExtensionField sub_modulus(const ExtensionField &xs) {
|
||||
return ExtensionField { FF::sub_modulus<REDUCTION_SIZE>(&xs.real), FF::sub_modulus<REDUCTION_SIZE>(&xs.imaginary) };
|
||||
}
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField one() { return ExtensionField{FF::one(), FF::zero()}; }
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs) {
|
||||
os << "{ Real: " << xs.real << " }; { Imaginary: " << xs.imaginary << " }";
|
||||
return os;
|
||||
}
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_x()
|
||||
{
|
||||
return ExtensionField{FF{CONFIG::g2_gen_x_re}, FF{CONFIG::g2_gen_x_im}};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys) {
|
||||
return ExtensionField { xs.real + ys.real, xs.imaginary + ys.imaginary };
|
||||
}
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField generator_y()
|
||||
{
|
||||
return ExtensionField{FF{CONFIG::g2_gen_y_re}, FF{CONFIG::g2_gen_y_im}};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys) {
|
||||
return ExtensionField { xs.real - ys.real, xs.imaginary - ys.imaginary };
|
||||
}
|
||||
static HOST_INLINE ExtensionField rand_host() { return ExtensionField{FF::rand_host(), FF::rand_host()}; }
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
FWide real_prod = FF::mul_wide(xs.real, ys.real);
|
||||
FWide imaginary_prod = FF::mul_wide(xs.imaginary, ys.imaginary);
|
||||
FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
|
||||
FWide i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FWide::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionWide { real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod };
|
||||
}
|
||||
template <unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sub_modulus(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{FF::sub_modulus<REDUCTION_SIZE>(&xs.real), FF::sub_modulus<REDUCTION_SIZE>(&xs.imaginary)};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionWide& xs) {
|
||||
return ExtensionField { FF::template reduce<MODULUS_MULTIPLE>(xs.real), FF::template reduce<MODULUS_MULTIPLE>(xs.imaginary) };
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs)
|
||||
{
|
||||
os << "{ Real: " << xs.real << " }; { Imaginary: " << xs.imaginary << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator*(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
ExtensionWide xy = mul_wide(xs, ys);
|
||||
return reduce(xy);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs.real + ys.real, xs.imaginary + ys.imaginary};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
return (xs.real == ys.real) && (xs.imaginary == ys.imaginary);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs.real - ys.real, xs.imaginary - ys.imaginary};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) {
|
||||
return !(xs == ys);
|
||||
}
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys)
|
||||
{
|
||||
FWide real_prod = FF::mul_wide(xs.real, ys.real);
|
||||
FWide imaginary_prod = FF::mul_wide(xs.imaginary, ys.imaginary);
|
||||
FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
|
||||
FWide i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FWide::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionWide{real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod};
|
||||
}
|
||||
|
||||
template <const ExtensionField& multiplier>
|
||||
static HOST_DEVICE_INLINE ExtensionField mul_const(const ExtensionField &xs) {
|
||||
static constexpr FF mul_real = multiplier.real;
|
||||
static constexpr FF mul_imaginary = multiplier.imaginary;
|
||||
const FF xs_real = xs.real;
|
||||
const FF xs_imaginary = xs.imaginary;
|
||||
FF real_prod = FF::template mul_const<mul_real>(xs_real);
|
||||
FF imaginary_prod = FF::template mul_const<mul_imaginary>(xs_imaginary);
|
||||
FF re_im = FF::template mul_const<mul_real>(xs_imaginary);
|
||||
FF im_re = FF::template mul_const<mul_imaginary>(xs_real);
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField { real_prod + i_sq_times_im, re_im + im_re };
|
||||
}
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionWide& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
FF::template reduce<MODULUS_MULTIPLE>(xs.real), FF::template reduce<MODULUS_MULTIPLE>(xs.imaginary)};
|
||||
}
|
||||
|
||||
template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField &xs) {
|
||||
return { FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary) };
|
||||
}
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator*(const ExtensionField& xs, const ExtensionField& ys)
|
||||
{
|
||||
ExtensionWide xy = mul_wide(xs, ys);
|
||||
return reduce(xy);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys)
|
||||
{
|
||||
return (xs.real == ys.real) && (xs.imaginary == ys.imaginary);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs) {
|
||||
// TODO: change to a more efficient squaring
|
||||
return xs * xs;
|
||||
}
|
||||
friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) { return !(xs == ys); }
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs) {
|
||||
return ExtensionField { FF::neg(xs.real), FF::neg(xs.imaginary) };
|
||||
}
|
||||
template <const ExtensionField& multiplier>
|
||||
static HOST_DEVICE_INLINE ExtensionField mul_const(const ExtensionField& xs)
|
||||
{
|
||||
static constexpr FF mul_real = multiplier.real;
|
||||
static constexpr FF mul_imaginary = multiplier.imaginary;
|
||||
const FF xs_real = xs.real;
|
||||
const FF xs_imaginary = xs.imaginary;
|
||||
FF real_prod = FF::template mul_const<mul_real>(xs_real);
|
||||
FF imaginary_prod = FF::template mul_const<mul_imaginary>(xs_imaginary);
|
||||
FF re_im = FF::template mul_const<mul_real>(xs_imaginary);
|
||||
FF im_re = FF::template mul_const<mul_imaginary>(xs_real);
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField{real_prod + i_sq_times_im, re_im + im_re};
|
||||
}
|
||||
|
||||
// inverse assumes that xs is nonzero
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs) {
|
||||
ExtensionField xs_conjugate = { xs.real, FF::neg(xs.imaginary) };
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(FF::sqr(xs.imaginary));
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
// TODO: wide here
|
||||
FF xs_norm_squared = FF::sqr(xs.real) - i_sq_times_im;
|
||||
return xs_conjugate * ExtensionField { FF::inverse(xs_norm_squared), FF::zero() };
|
||||
}
|
||||
template <uint32_t mutliplier, unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs)
|
||||
{
|
||||
return {FF::template mul_unsigned<mutliplier>(xs.real), FF::template mul_unsigned<mutliplier>(xs.imaginary)};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs)
|
||||
{
|
||||
// TODO: change to a more efficient squaring
|
||||
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs)
|
||||
{
|
||||
// TODO: change to a more efficient squaring
|
||||
return xs * xs;
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{FF::neg(xs.real), FF::neg(xs.imaginary)};
|
||||
}
|
||||
|
||||
// inverse assumes that xs is nonzero
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs)
|
||||
{
|
||||
ExtensionField xs_conjugate = {xs.real, FF::neg(xs.imaginary)};
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(FF::sqr(xs.imaginary));
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
// TODO: wide here
|
||||
FF xs_norm_squared = FF::sqr(xs.real) - i_sq_times_im;
|
||||
return xs_conjugate * ExtensionField{FF::inverse(xs_norm_squared), FF::zero()};
|
||||
}
|
||||
};
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,49 +1,61 @@
|
||||
#include <cuda.h>
|
||||
#include "../curves/bls12_381/curve_config.cuh"
|
||||
#include "../curves/bls12_377/curve_config.cuh"
|
||||
#include "../curves/bls12_381/curve_config.cuh"
|
||||
#include "../curves/bn254/curve_config.cuh"
|
||||
#include "projective.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
|
||||
extern "C" bool eq_bls12_381(BLS12_381::projective_t* point1, BLS12_381::projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) &&
|
||||
(point1->z == BLS12_381::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) &&
|
||||
(point2->z == BLS12_381::point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
|
||||
extern "C" bool eq_bls12_377(BLS12_377::projective_t* point1, BLS12_377::projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) &&
|
||||
(point1->z == BLS12_377::point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) &&
|
||||
(point2->z == BLS12_377::point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
|
||||
extern "C" bool eq_bn254(BN254::projective_t* point1, BN254::projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) &&
|
||||
(point1->z == BN254::point_field_t::zero())) &&
|
||||
!((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) &&
|
||||
(point2->z == BN254::point_field_t::zero()));
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
|
||||
extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t* point1, BLS12_381::g2_projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) &&
|
||||
(point1->z == BLS12_381::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) &&
|
||||
(point2->z == BLS12_381::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
|
||||
extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t* point1, BLS12_377::g2_projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) &&
|
||||
(point1->z == BLS12_377::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) &&
|
||||
(point2->z == BLS12_377::g2_point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
|
||||
extern "C" bool eq_g2_bn254(BN254::g2_projective_t* point1, BN254::g2_projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) &&
|
||||
(point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) &&
|
||||
(point2->z == BN254::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
@@ -3,170 +3,164 @@
|
||||
#include "affine.cuh"
|
||||
|
||||
template <typename FF, class SCALAR_FF, const FF& B_VALUE>
|
||||
class Projective {
|
||||
class Projective
|
||||
{
|
||||
friend Affine<FF>;
|
||||
|
||||
public:
|
||||
FF x;
|
||||
FF y;
|
||||
FF z;
|
||||
public:
|
||||
FF x;
|
||||
FF y;
|
||||
FF z;
|
||||
|
||||
static HOST_DEVICE_INLINE Projective zero() {
|
||||
return {FF::zero(), FF::one(), FF::zero()};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Projective zero() { return {FF::zero(), FF::one(), FF::zero()}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Affine<FF> to_affine(const Projective &point) {
|
||||
FF denom = FF::inverse(point.z);
|
||||
return {point.x * denom, point.y * denom};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Affine<FF> to_affine(const Projective& point)
|
||||
{
|
||||
FF denom = FF::inverse(point.z);
|
||||
return {point.x * denom, point.y * denom};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF> &point) {
|
||||
return {point.x, point.y, FF::one()};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF>& point) { return {point.x, point.y, FF::one()}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Projective generator() {
|
||||
return {FF::generator_x(), FF::generator_y(), FF::one()};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Projective generator() { return {FF::generator_x(), FF::generator_y(), FF::one()}; }
|
||||
|
||||
static HOST_DEVICE_INLINE Projective neg(const Projective &point) {
|
||||
return {point.x, FF::neg(point.y), point.z};
|
||||
}
|
||||
static HOST_DEVICE_INLINE Projective neg(const Projective& point) { return {point.x, FF::neg(point.y), point.z}; }
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2) {
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF Z2 = p2.z; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17 < 2
|
||||
const auto t24 = FF::mul_wide(t12, t23); // t24 ← t12 · t23 < 2
|
||||
const auto t25 = FF::mul_wide(t07, t22); // t25 ← t07 · t22 < 2
|
||||
const FF X3 = FF::reduce(t25 - t24); // X3 ← t25 − t24 < 2
|
||||
const auto t27 = FF::mul_wide(t23, t19); // t27 ← t23 · t19 < 2
|
||||
const auto t28 = FF::mul_wide(t22, t21); // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = FF::reduce(t28 + t27); // Y3 ← t28 + t27 < 2
|
||||
const auto t30 = FF::mul_wide(t19, t07); // t30 ← t19 · t07 < 2
|
||||
const auto t31 = FF::mul_wide(t21, t12); // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = FF::reduce(t31 + t30); // Z3 ← t31 + t30 < 2
|
||||
return {X3, Y3, Z3};
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Projective& p2)
|
||||
{
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF Z2 = p2.z; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1 * Z2; // t02 ← Z1 · Z2 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + Z2; // t09 ← Y2 + Z2 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + Z2; // t14 ← X2 + Z2 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17 < 2
|
||||
const auto t24 = FF::mul_wide(t12, t23); // t24 ← t12 · t23 < 2
|
||||
const auto t25 = FF::mul_wide(t07, t22); // t25 ← t07 · t22 < 2
|
||||
const FF X3 = FF::reduce(t25 - t24); // X3 ← t25 − t24 < 2
|
||||
const auto t27 = FF::mul_wide(t23, t19); // t27 ← t23 · t19 < 2
|
||||
const auto t28 = FF::mul_wide(t22, t21); // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = FF::reduce(t28 + t27); // Y3 ← t28 + t27 < 2
|
||||
const auto t30 = FF::mul_wide(t19, t07); // t30 ← t19 · t07 < 2
|
||||
const auto t31 = FF::mul_wide(t21, t12); // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = FF::reduce(t31 + t30); // Z3 ← t31 + t30 < 2
|
||||
return {X3, Y3, Z3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Projective& p2) {
|
||||
return p1 + neg(p2);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Projective& p2) { return p1 + neg(p2); }
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Affine<FF>& p2) {
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1; // t02 ← Z1 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + FF::one(); // t09 ← Y2 + 1 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + FF::one(); // t14 ← X2 + 1 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::template mul_unsigned<3>(
|
||||
FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17 < 2
|
||||
const auto t24 = FF::mul_wide(t12, t23); // t24 ← t12 · t23 < 2
|
||||
const auto t25 = FF::mul_wide(t07, t22); // t25 ← t07 · t22 < 2
|
||||
const FF X3 = FF::reduce(t25 - t24); // X3 ← t25 − t24 < 2
|
||||
const auto t27 = FF::mul_wide(t23, t19); // t27 ← t23 · t19 < 2
|
||||
const auto t28 = FF::mul_wide(t22, t21); // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = FF::reduce(t28 + t27); // Y3 ← t28 + t27 < 2
|
||||
const auto t30 = FF::mul_wide(t19, t07); // t30 ← t19 · t07 < 2
|
||||
const auto t31 = FF::mul_wide(t21, t12); // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = FF::reduce(t31 + t30); // Z3 ← t31 + t30 < 2
|
||||
return {X3, Y3, Z3};
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Projective operator+(Projective p1, const Affine<FF>& p2)
|
||||
{
|
||||
const FF X1 = p1.x; // < 2
|
||||
const FF Y1 = p1.y; // < 2
|
||||
const FF Z1 = p1.z; // < 2
|
||||
const FF X2 = p2.x; // < 2
|
||||
const FF Y2 = p2.y; // < 2
|
||||
const FF t00 = X1 * X2; // t00 ← X1 · X2 < 2
|
||||
const FF t01 = Y1 * Y2; // t01 ← Y1 · Y2 < 2
|
||||
const FF t02 = Z1; // t02 ← Z1 < 2
|
||||
const FF t03 = X1 + Y1; // t03 ← X1 + Y1 < 4
|
||||
const FF t04 = X2 + Y2; // t04 ← X2 + Y2 < 4
|
||||
const FF t05 = t03 * t04; // t03 ← t03 · t04 < 3
|
||||
const FF t06 = t00 + t01; // t06 ← t00 + t01 < 4
|
||||
const FF t07 = t05 - t06; // t05 ← t05 − t06 < 2
|
||||
const FF t08 = Y1 + Z1; // t08 ← Y1 + Z1 < 4
|
||||
const FF t09 = Y2 + FF::one(); // t09 ← Y2 + 1 < 4
|
||||
const FF t10 = t08 * t09; // t10 ← t08 · t09 < 3
|
||||
const FF t11 = t01 + t02; // t11 ← t01 + t02 < 4
|
||||
const FF t12 = t10 - t11; // t12 ← t10 − t11 < 2
|
||||
const FF t13 = X1 + Z1; // t13 ← X1 + Z1 < 4
|
||||
const FF t14 = X2 + FF::one(); // t14 ← X2 + 1 < 4
|
||||
const FF t15 = t13 * t14; // t15 ← t13 · t14 < 3
|
||||
const FF t16 = t00 + t02; // t16 ← t00 + t02 < 4
|
||||
const FF t17 = t15 - t16; // t17 ← t15 − t16 < 2
|
||||
const FF t18 = t00 + t00; // t18 ← t00 + t00 < 2
|
||||
const FF t19 = t18 + t00; // t19 ← t18 + t00 < 2
|
||||
const FF t20 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t02)); // t20 ← b3 · t02 < 2
|
||||
const FF t21 = t01 + t20; // t21 ← t01 + t20 < 2
|
||||
const FF t22 = t01 - t20; // t22 ← t01 − t20 < 2
|
||||
const FF t23 = FF::template mul_unsigned<3>(FF::template mul_const<B_VALUE>(t17)); // t23 ← b3 · t17 < 2
|
||||
const auto t24 = FF::mul_wide(t12, t23); // t24 ← t12 · t23 < 2
|
||||
const auto t25 = FF::mul_wide(t07, t22); // t25 ← t07 · t22 < 2
|
||||
const FF X3 = FF::reduce(t25 - t24); // X3 ← t25 − t24 < 2
|
||||
const auto t27 = FF::mul_wide(t23, t19); // t27 ← t23 · t19 < 2
|
||||
const auto t28 = FF::mul_wide(t22, t21); // t28 ← t22 · t21 < 2
|
||||
const FF Y3 = FF::reduce(t28 + t27); // Y3 ← t28 + t27 < 2
|
||||
const auto t30 = FF::mul_wide(t19, t07); // t30 ← t19 · t07 < 2
|
||||
const auto t31 = FF::mul_wide(t21, t12); // t31 ← t21 · t12 < 2
|
||||
const FF Z3 = FF::reduce(t31 + t30); // Z3 ← t31 + t30 < 2
|
||||
return {X3, Y3, Z3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Affine<FF>& p2) {
|
||||
return p1 + Affine<FF>::neg(p2);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE Projective operator-(Projective p1, const Affine<FF>& p2)
|
||||
{
|
||||
return p1 + Affine<FF>::neg(p2);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point) {
|
||||
Projective res = zero();
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
|
||||
if (i > 0) {
|
||||
res = res + res;
|
||||
}
|
||||
if (scalar.get_scalar_digit(SCALAR_FF::NBITS - i - 1, 1)) {
|
||||
res = res + point;
|
||||
}
|
||||
}
|
||||
return res;
|
||||
friend HOST_DEVICE_INLINE Projective operator*(SCALAR_FF scalar, const Projective& point)
|
||||
{
|
||||
Projective res = zero();
|
||||
#ifdef __CUDA_ARCH__
|
||||
#pragma unroll
|
||||
#endif
|
||||
for (int i = 0; i < SCALAR_FF::NBITS; i++) {
|
||||
if (i > 0) { res = res + res; }
|
||||
if (scalar.get_scalar_digit(SCALAR_FF::NBITS - i - 1, 1)) { res = res + point; }
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Projective& p1, const Projective& p2) {
|
||||
return (p1.x * p2.z == p2.x * p1.z) && (p1.y * p2.z == p2.y * p1.z);
|
||||
}
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Projective& p1, const Projective& p2)
|
||||
{
|
||||
return (p1.x * p2.z == p2.x * p1.z) && (p1.y * p2.z == p2.y * p1.z);
|
||||
}
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point) {
|
||||
os << "Point { x: " << point.x << "; y: " << point.y << "; z: " << point.z << " }";
|
||||
return os;
|
||||
}
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Projective& point)
|
||||
{
|
||||
os << "Point { x: " << point.x << "; y: " << point.y << "; z: " << point.z << " }";
|
||||
return os;
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Projective &point) {
|
||||
return point.x == FF::zero() && point.y != FF::zero() && point.z == FF::zero();
|
||||
}
|
||||
static HOST_DEVICE_INLINE bool is_zero(const Projective& point)
|
||||
{
|
||||
return point.x == FF::zero() && point.y != FF::zero() && point.z == FF::zero();
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE bool is_on_curve(const Projective &point) {
|
||||
if (is_zero(point))
|
||||
return true;
|
||||
bool eq_holds = (FF::template mul_const<B_VALUE>(FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x == point.z * FF::sqr(point.y));
|
||||
return point.z != FF::zero() && eq_holds;
|
||||
}
|
||||
static HOST_DEVICE_INLINE bool is_on_curve(const Projective& point)
|
||||
{
|
||||
if (is_zero(point)) return true;
|
||||
bool eq_holds =
|
||||
(FF::template mul_const<B_VALUE>(FF::sqr(point.z) * point.z) + FF::sqr(point.x) * point.x ==
|
||||
point.z * FF::sqr(point.y));
|
||||
return point.z != FF::zero() && eq_holds;
|
||||
}
|
||||
|
||||
static HOST_INLINE Projective rand_host() {
|
||||
SCALAR_FF rand_scalar = SCALAR_FF::rand_host();
|
||||
return rand_scalar * generator();
|
||||
}
|
||||
static HOST_INLINE Projective rand_host()
|
||||
{
|
||||
SCALAR_FF rand_scalar = SCALAR_FF::rand_host();
|
||||
return rand_scalar * generator();
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1,62 +1,65 @@
|
||||
#include "test_kernels.cuh"
|
||||
#include <boost/multiprecision/cpp_int.hpp>
|
||||
#include <cuda_runtime.h>
|
||||
#include <gtest/gtest.h>
|
||||
#include "test_kernels.cuh"
|
||||
#include <iostream>
|
||||
#include <boost/multiprecision/cpp_int.hpp>
|
||||
namespace mp = boost::multiprecision;
|
||||
|
||||
template <class T>
|
||||
int device_populate_random(T* d_elements, unsigned n) {
|
||||
T h_elements[n];
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
h_elements[i] = T::rand_host();
|
||||
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
|
||||
int device_populate_random(T* d_elements, unsigned n)
|
||||
{
|
||||
T h_elements[n];
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
h_elements[i] = T::rand_host();
|
||||
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
int device_set(T* d_elements, T el, unsigned n) {
|
||||
T h_elements[n];
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
h_elements[i] = el;
|
||||
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
|
||||
int device_set(T* d_elements, T el, unsigned n)
|
||||
{
|
||||
T h_elements[n];
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
h_elements[i] = el;
|
||||
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
mp::int1024_t convert_to_boost_mp(uint32_t *a, uint32_t length)
|
||||
mp::int1024_t convert_to_boost_mp(uint32_t* a, uint32_t length)
|
||||
{
|
||||
mp::int1024_t res = 0;
|
||||
for (uint32_t i = 0; i < length; i++)
|
||||
{
|
||||
for (uint32_t i = 0; i < length; i++) {
|
||||
res += (mp::int1024_t)(a[i]) << 32 * i;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
class PrimitivesTest : public ::testing::Test {
|
||||
class PrimitivesTest : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
static const unsigned n = 1 << 4;
|
||||
|
||||
projective_t *points1{};
|
||||
projective_t *points2{};
|
||||
g2_projective_t *g2_points1{};
|
||||
g2_projective_t *g2_points2{};
|
||||
scalar_field_t *scalars1{};
|
||||
scalar_field_t *scalars2{};
|
||||
projective_t *zero_points{};
|
||||
g2_projective_t *g2_zero_points{};
|
||||
scalar_field_t *zero_scalars{};
|
||||
scalar_field_t *one_scalars{};
|
||||
affine_t *aff_points{};
|
||||
g2_affine_t *g2_aff_points{};
|
||||
projective_t *res_points1{};
|
||||
projective_t *res_points2{};
|
||||
g2_projective_t *g2_res_points1{};
|
||||
g2_projective_t *g2_res_points2{};
|
||||
scalar_field_t *res_scalars1{};
|
||||
scalar_field_t *res_scalars2{};
|
||||
scalar_field_t::Wide *res_scalars_wide{};
|
||||
scalar_field_t::Wide *res_scalars_wide_full{};
|
||||
projective_t* points1{};
|
||||
projective_t* points2{};
|
||||
g2_projective_t* g2_points1{};
|
||||
g2_projective_t* g2_points2{};
|
||||
scalar_field_t* scalars1{};
|
||||
scalar_field_t* scalars2{};
|
||||
projective_t* zero_points{};
|
||||
g2_projective_t* g2_zero_points{};
|
||||
scalar_field_t* zero_scalars{};
|
||||
scalar_field_t* one_scalars{};
|
||||
affine_t* aff_points{};
|
||||
g2_affine_t* g2_aff_points{};
|
||||
projective_t* res_points1{};
|
||||
projective_t* res_points2{};
|
||||
g2_projective_t* g2_res_points1{};
|
||||
g2_projective_t* g2_res_points2{};
|
||||
scalar_field_t* res_scalars1{};
|
||||
scalar_field_t* res_scalars2{};
|
||||
scalar_field_t::Wide* res_scalars_wide{};
|
||||
scalar_field_t::Wide* res_scalars_wide_full{};
|
||||
|
||||
PrimitivesTest() {
|
||||
PrimitivesTest()
|
||||
{
|
||||
assert(!cudaDeviceReset());
|
||||
assert(!cudaMallocManaged(&points1, n * sizeof(projective_t)));
|
||||
assert(!cudaMallocManaged(&points2, n * sizeof(projective_t)));
|
||||
@@ -80,7 +83,8 @@ protected:
|
||||
assert(!cudaMallocManaged(&res_scalars_wide_full, n * sizeof(scalar_field_t::Wide)));
|
||||
}
|
||||
|
||||
~PrimitivesTest() override {
|
||||
~PrimitivesTest() override
|
||||
{
|
||||
cudaFree(points1);
|
||||
cudaFree(points2);
|
||||
cudaFree(g2_points1);
|
||||
@@ -106,7 +110,8 @@ protected:
|
||||
cudaDeviceReset();
|
||||
}
|
||||
|
||||
void SetUp() override {
|
||||
void SetUp() override
|
||||
{
|
||||
ASSERT_EQ(device_populate_random<projective_t>(points1, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<projective_t>(points2, n), cudaSuccess);
|
||||
ASSERT_EQ(device_populate_random<g2_projective_t>(g2_points1, n), cudaSuccess);
|
||||
@@ -130,32 +135,37 @@ protected:
|
||||
}
|
||||
};
|
||||
|
||||
TEST_F(PrimitivesTest, FieldAdditionSubtractionCancel) {
|
||||
TEST_F(PrimitivesTest, FieldAdditionSubtractionCancel)
|
||||
{
|
||||
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(res_scalars1, scalars2, res_scalars2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i], res_scalars2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldZeroAddition) {
|
||||
TEST_F(PrimitivesTest, FieldZeroAddition)
|
||||
{
|
||||
ASSERT_EQ(vec_add(scalars1, zero_scalars, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i], res_scalars1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldAdditionHostDeviceEq) {
|
||||
TEST_F(PrimitivesTest, FieldAdditionHostDeviceEq)
|
||||
{
|
||||
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i] + scalars2[i], res_scalars1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByOne) {
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByOne)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, one_scalars, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i], res_scalars1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByMinusOne) {
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByMinusOne)
|
||||
{
|
||||
ASSERT_EQ(vec_neg(one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars1, res_scalars1, res_scalars2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(scalars1, res_scalars2, res_scalars1, n), cudaSuccess);
|
||||
@@ -163,82 +173,95 @@ TEST_F(PrimitivesTest, FieldMultiplicationByMinusOne) {
|
||||
ASSERT_EQ(res_scalars1[i], zero_scalars[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByZero) {
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByZero)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, zero_scalars, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(zero_scalars[i], res_scalars1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationInverseCancel) {
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationInverseCancel)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(field_vec_inv(scalars2, res_scalars2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i], res_scalars1[i] * res_scalars2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationHostDeviceEq) {
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationHostDeviceEq)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i] * scalars2[i], res_scalars1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByTwoEqSum) {
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationByTwoEqSum)
|
||||
{
|
||||
ASSERT_EQ(vec_add(one_scalars, one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, scalars1, res_scalars2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(res_scalars2[i], scalars1[i] + scalars1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldSqrHostDeviceEq) {
|
||||
TEST_F(PrimitivesTest, FieldSqrHostDeviceEq)
|
||||
{
|
||||
ASSERT_EQ(field_vec_sqr(scalars1, res_scalars1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i] * scalars1[i], res_scalars1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationSqrEq) {
|
||||
TEST_F(PrimitivesTest, FieldMultiplicationSqrEq)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, scalars1, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(field_vec_sqr(scalars1, res_scalars2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(res_scalars1[i], res_scalars2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECRandomPointsAreOnCurve) {
|
||||
TEST_F(PrimitivesTest, ECRandomPointsAreOnCurve)
|
||||
{
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_PRED1(projective_t::is_on_curve, points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECPointAdditionSubtractionCancel) {
|
||||
TEST_F(PrimitivesTest, ECPointAdditionSubtractionCancel)
|
||||
{
|
||||
ASSERT_EQ(vec_add(points1, points2, res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(res_points1, points2, res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(points1[i], res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECPointZeroAddition) {
|
||||
TEST_F(PrimitivesTest, ECPointZeroAddition)
|
||||
{
|
||||
ASSERT_EQ(vec_add(points1, zero_points, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(points1[i], res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECPointAdditionHostDeviceEq) {
|
||||
TEST_F(PrimitivesTest, ECPointAdditionHostDeviceEq)
|
||||
{
|
||||
ASSERT_EQ(vec_add(points1, points2, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(points1[i] + points2[i], res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationHostDeviceEq) {
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationHostDeviceEq)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, points1, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i] * points1[i], res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationByOne) {
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationByOne)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(one_scalars, points1, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(points1[i], res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationByMinusOne) {
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationByMinusOne)
|
||||
{
|
||||
ASSERT_EQ(vec_neg(one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, points1, res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(points1, res_points2, n), cudaSuccess);
|
||||
@@ -246,14 +269,16 @@ TEST_F(PrimitivesTest, ECScalarMultiplicationByMinusOne) {
|
||||
ASSERT_EQ(res_points1[i], res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationByTwo) {
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationByTwo)
|
||||
{
|
||||
ASSERT_EQ(vec_add(one_scalars, one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, points1, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ((one_scalars[i] + one_scalars[i]) * points1[i], res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationInverseCancel) {
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationInverseCancel)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, points1, res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(field_vec_inv(scalars1, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, res_points1, res_points2, n), cudaSuccess);
|
||||
@@ -261,7 +286,8 @@ TEST_F(PrimitivesTest, ECScalarMultiplicationInverseCancel) {
|
||||
ASSERT_EQ(points1[i], res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationIsDistributiveOverMultiplication) {
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationIsDistributiveOverMultiplication)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, points1, res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, res_points1, res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
@@ -270,7 +296,8 @@ TEST_F(PrimitivesTest, ECScalarMultiplicationIsDistributiveOverMultiplication) {
|
||||
ASSERT_EQ(res_points1[i], res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationIsDistributiveOverAddition) {
|
||||
TEST_F(PrimitivesTest, ECScalarMultiplicationIsDistributiveOverAddition)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, points1, res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, points1, res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
@@ -278,13 +305,15 @@ TEST_F(PrimitivesTest, ECScalarMultiplicationIsDistributiveOverAddition) {
|
||||
ASSERT_EQ(res_scalars1[i] * points1[i], res_points1[i] + res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECProjectiveToAffine) {
|
||||
TEST_F(PrimitivesTest, ECProjectiveToAffine)
|
||||
{
|
||||
ASSERT_EQ(point_vec_to_affine(points1, aff_points, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(points1[i], projective_t::from_affine(aff_points[i]));
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECMixedPointAddition) {
|
||||
TEST_F(PrimitivesTest, ECMixedPointAddition)
|
||||
{
|
||||
ASSERT_EQ(point_vec_to_affine(points2, aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(points1, aff_points, res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(points1, points2, res_points2, n), cudaSuccess);
|
||||
@@ -292,7 +321,8 @@ TEST_F(PrimitivesTest, ECMixedPointAddition) {
|
||||
ASSERT_EQ(res_points1[i], res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
TEST_F(PrimitivesTest, ECMixedAdditionOfNegatedPointEqSubtraction)
|
||||
{
|
||||
ASSERT_EQ(point_vec_to_affine(points2, aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(points1, aff_points, res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(points2, res_points2, n), cudaSuccess);
|
||||
@@ -300,117 +330,100 @@ TEST_F(PrimitivesTest, ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
ASSERT_EQ(res_points1[i], points1[i] + res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, MP_LSB_MULT) {
|
||||
TEST_F(PrimitivesTest, MP_LSB_MULT)
|
||||
{
|
||||
// LSB multiply, check correctness of first TLC + 1 digits result.
|
||||
ASSERT_EQ(mp_lsb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
|
||||
std::cout << "first GPU lsb mult output = 0x";
|
||||
for (int i=0; i<2*scalar_field_t::TLC; i++)
|
||||
{
|
||||
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "first GPU full mult output = 0x";
|
||||
for (int i=0; i<2*scalar_field_t::TLC; i++)
|
||||
{
|
||||
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i];
|
||||
}
|
||||
std::cout << std::endl;
|
||||
for (int j = 0; j < n; j++)
|
||||
{
|
||||
for (int i=0; i<scalar_field_t::TLC + 1; i++)
|
||||
{
|
||||
for (int j = 0; j < n; j++) {
|
||||
for (int i = 0; i < scalar_field_t::TLC + 1; i++) {
|
||||
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, MP_MSB_MULT) {
|
||||
TEST_F(PrimitivesTest, MP_MSB_MULT)
|
||||
{
|
||||
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
|
||||
ASSERT_EQ(mp_msb_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
|
||||
std::cout << "first GPU msb mult output = 0x";
|
||||
for (int i=2*scalar_field_t::TLC - 1; i >=0 ; i--)
|
||||
{
|
||||
for (int i = 2 * scalar_field_t::TLC - 1; i >= 0; i--) {
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "first GPU full mult output = 0x";
|
||||
for (int i=2*scalar_field_t::TLC - 1; i >=0 ; i--)
|
||||
{
|
||||
for (int i = 2 * scalar_field_t::TLC - 1; i >= 0; i--) {
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
for (int i=0; i < 2*scalar_field_t::TLC - 1; i++)
|
||||
{
|
||||
for (int i = 0; i < 2 * scalar_field_t::TLC - 1; i++) {
|
||||
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
|
||||
std::cout << "matched word idx = " << i << std::endl;
|
||||
std::cout << "matched word idx = " << i << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, INGO_MP_MULT) {
|
||||
TEST_F(PrimitivesTest, INGO_MP_MULT)
|
||||
{
|
||||
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
|
||||
ASSERT_EQ(ingo_mp_mult(scalars1, scalars2, res_scalars_wide), cudaSuccess);
|
||||
std::cout << "INGO = 0x";
|
||||
for (int i=0; i < 2*scalar_field_t::TLC ; i++)
|
||||
{
|
||||
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "ZKSYNC = 0x";
|
||||
for (int i=0; i < 2*scalar_field_t::TLC ; i++)
|
||||
{
|
||||
for (int i = 0; i < 2 * scalar_field_t::TLC; i++) {
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
for (int i=0; i < 2*scalar_field_t::TLC - 1; i++)
|
||||
{
|
||||
for (int i = 0; i < 2 * scalar_field_t::TLC - 1; i++) {
|
||||
if (res_scalars_wide_full[0].limbs_storage.limbs[i] == res_scalars_wide[0].limbs_storage.limbs[i])
|
||||
std::cout << "matched word idx = " << i << std::endl;
|
||||
std::cout << "matched word idx = " << i << std::endl;
|
||||
}
|
||||
for (int j=0; j<n; j++)
|
||||
{
|
||||
for (int i=0; i < 2*scalar_field_t::TLC - 1; i++)
|
||||
{
|
||||
for (int j = 0; j < n; j++) {
|
||||
for (int i = 0; i < 2 * scalar_field_t::TLC - 1; i++) {
|
||||
ASSERT_EQ(res_scalars_wide_full[j].limbs_storage.limbs[i], res_scalars_wide[j].limbs_storage.limbs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
TEST_F(PrimitivesTest, INGO_MP_MSB_MULT) {
|
||||
TEST_F(PrimitivesTest, INGO_MP_MSB_MULT)
|
||||
{
|
||||
// MSB multiply, take n msb bits of multiplication, assert that the error is up to 1.
|
||||
ASSERT_EQ(ingo_mp_msb_mult(scalars1, scalars2, res_scalars_wide, n), cudaSuccess);
|
||||
std::cout << "INGO MSB = 0x";
|
||||
for (int i=2*scalar_field_t::TLC - 1; i >= 0 ; i--)
|
||||
{
|
||||
for (int i = 2 * scalar_field_t::TLC - 1; i >= 0; i--) {
|
||||
std::cout << std::hex << res_scalars_wide[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
ASSERT_EQ(mp_mult(scalars1, scalars2, res_scalars_wide_full), cudaSuccess);
|
||||
std::cout << "ZKSYNC = 0x";
|
||||
for (int i=2*scalar_field_t::TLC - 1; i >= 0 ; i--)
|
||||
{
|
||||
for (int i = 2 * scalar_field_t::TLC - 1; i >= 0; i--) {
|
||||
std::cout << std::hex << res_scalars_wide_full[0].limbs_storage.limbs[i] << " ";
|
||||
}
|
||||
|
||||
std::cout << std::endl;
|
||||
|
||||
|
||||
|
||||
// for (int i=scalar_field::TLC; i < 2*scalar_field::TLC - 1; i++)
|
||||
// {
|
||||
// ASSERT_EQ(in_bound, true);
|
||||
@@ -428,9 +441,8 @@ TEST_F(PrimitivesTest, INGO_MP_MSB_MULT) {
|
||||
mp::int1024_t res_mp = 0;
|
||||
mp::int1024_t res_gpu = 0;
|
||||
uint32_t num_limbs = scalar_field_t::TLC;
|
||||
|
||||
for (int j=0; j<n; j++)
|
||||
{
|
||||
|
||||
for (int j = 0; j < n; j++) {
|
||||
uint32_t* scalar1_limbs = scalars1[j].limbs_storage.limbs;
|
||||
uint32_t* scalar2_limbs = scalars2[j].limbs_storage.limbs;
|
||||
scalar_1_mp = convert_to_boost_mp(scalar1_limbs, num_limbs);
|
||||
@@ -438,24 +450,24 @@ TEST_F(PrimitivesTest, INGO_MP_MSB_MULT) {
|
||||
res_mp = scalar_1_mp * scalar_2_mp;
|
||||
res_mp = res_mp >> (num_limbs * 32);
|
||||
res_gpu = convert_to_boost_mp(&(res_scalars_wide[j]).limbs_storage.limbs[num_limbs], num_limbs);
|
||||
std::cout << "res mp = " << res_mp << std::endl;
|
||||
std::cout << "res mp = " << res_mp << std::endl;
|
||||
std::cout << "res gpu = " << res_gpu << std::endl;
|
||||
std::cout << "error = " << res_mp - res_gpu << std::endl;
|
||||
bool upper_bound = res_gpu <= res_mp;
|
||||
bool lower_bound = res_gpu > (res_mp - num_limbs);
|
||||
bool in_bound = upper_bound && lower_bound;
|
||||
|
||||
|
||||
|
||||
ASSERT_EQ(in_bound, true);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, INGO_MP_MOD_MULT) {
|
||||
std::cout << " taking num limbs " << std::endl;
|
||||
TEST_F(PrimitivesTest, INGO_MP_MOD_MULT)
|
||||
{
|
||||
std::cout << " taking num limbs " << std::endl;
|
||||
uint32_t num_limbs = scalar_field_t::TLC;
|
||||
std::cout << " calling gpu... = " << std::endl;
|
||||
std::cout << " calling gpu... = " << std::endl;
|
||||
ASSERT_EQ(ingo_mp_mod_mult(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
std::cout << " gpu call done " << std::endl;
|
||||
std::cout << " gpu call done " << std::endl;
|
||||
// mp testing
|
||||
mp::int1024_t scalar_1_mp = 0;
|
||||
mp::int1024_t scalar_2_mp = 0;
|
||||
@@ -463,10 +475,8 @@ TEST_F(PrimitivesTest, INGO_MP_MOD_MULT) {
|
||||
mp::int1024_t res_gpu = 0;
|
||||
mp::int1024_t p = convert_to_boost_mp(scalar_field_t::get_modulus().limbs, num_limbs);
|
||||
std::cout << " p = " << p << std::endl;
|
||||
|
||||
|
||||
for (int j=0; j<n; j++)
|
||||
{
|
||||
|
||||
for (int j = 0; j < n; j++) {
|
||||
uint32_t* scalar1_limbs = scalars1[j].limbs_storage.limbs;
|
||||
uint32_t* scalar2_limbs = scalars2[j].limbs_storage.limbs;
|
||||
scalar_1_mp = convert_to_boost_mp(scalar1_limbs, num_limbs);
|
||||
@@ -475,51 +485,57 @@ TEST_F(PrimitivesTest, INGO_MP_MOD_MULT) {
|
||||
// std::cout << " s2 = " << scalar_2_mp << std::endl;
|
||||
res_mp = (scalar_1_mp * scalar_2_mp) % p;
|
||||
res_gpu = convert_to_boost_mp((res_scalars1[j]).limbs_storage.limbs, num_limbs);
|
||||
std::cout << "res mp = " << res_mp << std::endl;
|
||||
std::cout << "res mp = " << res_mp << std::endl;
|
||||
std::cout << "res gpu = " << res_gpu << std::endl;
|
||||
std::cout << "error = " << res_mp - res_gpu << std::endl;
|
||||
ASSERT_EQ(res_gpu, res_mp);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECRandomPointsAreOnCurve) {
|
||||
TEST_F(PrimitivesTest, G2ECRandomPointsAreOnCurve)
|
||||
{
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_PRED1(g2_projective_t::is_on_curve, g2_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionSubtractionCancel) {
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionSubtractionCancel)
|
||||
{
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(g2_res_points1, g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointZeroAddition) {
|
||||
TEST_F(PrimitivesTest, G2ECPointZeroAddition)
|
||||
{
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_zero_points, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionHostDeviceEq) {
|
||||
TEST_F(PrimitivesTest, G2ECPointAdditionHostDeviceEq)
|
||||
{
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i] + g2_points2[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationHostDeviceEq) {
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationHostDeviceEq)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(scalars1[i] * g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByOne) {
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByOne)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(one_scalars, points1, res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByMinusOne) {
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByMinusOne)
|
||||
{
|
||||
ASSERT_EQ(vec_neg(one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(g2_points1, g2_res_points2, n), cudaSuccess);
|
||||
@@ -527,14 +543,16 @@ TEST_F(PrimitivesTest, G2ECScalarMultiplicationByMinusOne) {
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByTwo) {
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationByTwo)
|
||||
{
|
||||
ASSERT_EQ(vec_add(one_scalars, one_scalars, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ((one_scalars[i] + one_scalars[i]) * g2_points1[i], g2_res_points1[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationInverseCancel) {
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationInverseCancel)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(field_vec_inv(scalars1, res_scalars1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(res_scalars1, g2_res_points1, g2_res_points2, n), cudaSuccess);
|
||||
@@ -542,7 +560,8 @@ TEST_F(PrimitivesTest, G2ECScalarMultiplicationInverseCancel) {
|
||||
ASSERT_EQ(g2_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverMultiplication) {
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverMultiplication)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, g2_res_points1, g2_res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
@@ -551,7 +570,8 @@ TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverMultiplication)
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverAddition) {
|
||||
TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverAddition)
|
||||
{
|
||||
ASSERT_EQ(vec_mul(scalars1, g2_points1, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_mul(scalars2, g2_points1, g2_res_points2, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(scalars1, scalars2, res_scalars1, n), cudaSuccess);
|
||||
@@ -559,13 +579,15 @@ TEST_F(PrimitivesTest, G2ECScalarMultiplicationIsDistributiveOverAddition) {
|
||||
ASSERT_EQ(res_scalars1[i] * g2_points1[i], g2_res_points1[i] + g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECProjectiveToAffine) {
|
||||
TEST_F(PrimitivesTest, G2ECProjectiveToAffine)
|
||||
{
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points1, g2_aff_points, n), cudaSuccess);
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
ASSERT_EQ(g2_points1[i], g2_projective_t::from_affine(g2_aff_points[i]));
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECMixedPointAddition) {
|
||||
TEST_F(PrimitivesTest, G2ECMixedPointAddition)
|
||||
{
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_add(g2_points1, g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
@@ -573,7 +595,8 @@ TEST_F(PrimitivesTest, G2ECMixedPointAddition) {
|
||||
ASSERT_EQ(g2_res_points1[i], g2_res_points2[i]);
|
||||
}
|
||||
|
||||
TEST_F(PrimitivesTest, G2ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
TEST_F(PrimitivesTest, G2ECMixedAdditionOfNegatedPointEqSubtraction)
|
||||
{
|
||||
ASSERT_EQ(point_vec_to_affine(g2_points2, g2_aff_points, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_sub(g2_points1, g2_aff_points, g2_res_points1, n), cudaSuccess);
|
||||
ASSERT_EQ(vec_neg(g2_points2, g2_res_points2, n), cudaSuccess);
|
||||
@@ -581,7 +604,8 @@ TEST_F(PrimitivesTest, G2ECMixedAdditionOfNegatedPointEqSubtraction) {
|
||||
ASSERT_EQ(g2_res_points1[i], g2_points1[i] + g2_res_points2[i]);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
::testing::InitGoogleTest(&argc, argv);
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
@@ -5,189 +5,195 @@
|
||||
|
||||
// TODO: change the curve depending on env variable
|
||||
#include "../curves/bn254/curve_config.cuh"
|
||||
#include "projective.cuh"
|
||||
#include "extension_field.cuh"
|
||||
#include "projective.cuh"
|
||||
|
||||
#endif
|
||||
|
||||
using namespace BN254;
|
||||
|
||||
template <class T1, class T2>
|
||||
__global__ void add_elements_kernel(const T1 *x, const T2 *y, T1 *result, const unsigned count) {
|
||||
__global__ void add_elements_kernel(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
if (gid >= count) return;
|
||||
result[gid] = x[gid] + y[gid];
|
||||
}
|
||||
|
||||
template <class T1, class T2> int vec_add(const T1 *x, const T2 *y, T1 *result, const unsigned count) {
|
||||
template <class T1, class T2>
|
||||
int vec_add(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
add_elements_kernel<T1, T2><<<(count - 1) / 32 + 1, 32>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
__global__ void sub_elements_kernel(const T1 *x, const T2 *y, T1 *result, const unsigned count) {
|
||||
__global__ void sub_elements_kernel(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
if (gid >= count) return;
|
||||
result[gid] = x[gid] - y[gid];
|
||||
}
|
||||
|
||||
template <class T1, class T2> int vec_sub(const T1 *x, const T2 *y, T1 *result, const unsigned count) {
|
||||
template <class T1, class T2>
|
||||
int vec_sub(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
sub_elements_kernel<T1, T2><<<(count - 1) / 32 + 1, 32>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void neg_elements_kernel(const T *x, T *result, const unsigned count) {
|
||||
__global__ void neg_elements_kernel(const T* x, T* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
if (gid >= count) return;
|
||||
result[gid] = T::neg(x[gid]);
|
||||
}
|
||||
|
||||
template <class T> int vec_neg(const T *x, T *result, const unsigned count) {
|
||||
template <class T>
|
||||
int vec_neg(const T* x, T* result, const unsigned count)
|
||||
{
|
||||
neg_elements_kernel<T><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class F, class G>
|
||||
__global__ void mul_elements_kernel(const F *x, const G *y, G *result, const unsigned count) {
|
||||
__global__ void mul_elements_kernel(const F* x, const G* y, G* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
if (gid >= count) return;
|
||||
result[gid] = x[gid] * y[gid];
|
||||
}
|
||||
|
||||
template <class F, class G> int vec_mul(const F *x, const G *y, G *result, const unsigned count) {
|
||||
template <class F, class G>
|
||||
int vec_mul(const F* x, const G* y, G* result, const unsigned count)
|
||||
{
|
||||
mul_elements_kernel<F, G><<<(count - 1) / 32 + 1, 32>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void inv_field_elements_kernel(const scalar_field_t *x, scalar_field_t *result, const unsigned count) {
|
||||
__global__ void inv_field_elements_kernel(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
if (gid >= count) return;
|
||||
result[gid] = scalar_field_t::inverse(x[gid]);
|
||||
}
|
||||
|
||||
int field_vec_inv(const scalar_field_t *x, scalar_field_t *result, const unsigned count) {
|
||||
int field_vec_inv(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
|
||||
{
|
||||
inv_field_elements_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void sqr_field_elements_kernel(const scalar_field_t *x, scalar_field_t *result, const unsigned count) {
|
||||
__global__ void sqr_field_elements_kernel(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
if (gid >= count) return;
|
||||
result[gid] = scalar_field_t::sqr(x[gid]);
|
||||
}
|
||||
|
||||
int field_vec_sqr(const scalar_field_t *x, scalar_field_t *result, const unsigned count) {
|
||||
int field_vec_sqr(const scalar_field_t* x, scalar_field_t* result, const unsigned count)
|
||||
{
|
||||
sqr_field_elements_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class P, class A>
|
||||
__global__ void to_affine_points_kernel(const P *x, A *result, const unsigned count) {
|
||||
__global__ void to_affine_points_kernel(const P* x, A* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count)
|
||||
return;
|
||||
if (gid >= count) return;
|
||||
result[gid] = P::to_affine(x[gid]);
|
||||
}
|
||||
|
||||
template <class P, class A> int point_vec_to_affine(const P *x, A *result, const unsigned count) {
|
||||
template <class P, class A>
|
||||
int point_vec_to_affine(const P* x, A* result, const unsigned count)
|
||||
{
|
||||
to_affine_points_kernel<P, A><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void mp_mult_kernel(const scalar_field_t *x, const scalar_field_t *y, scalar_field_t::Wide *result) {
|
||||
__global__ void mp_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field_t::multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int mp_mult(const scalar_field_t *x, scalar_field_t *y, scalar_field_t::Wide *result)
|
||||
int mp_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
mp_mult_kernel<<<1, 32>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
|
||||
__global__ void mp_lsb_mult_kernel(const scalar_field_t *x, const scalar_field_t *y, scalar_field_t::Wide *result) {
|
||||
__global__ void mp_lsb_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field_t::multiply_lsb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int mp_lsb_mult(const scalar_field_t *x, scalar_field_t *y, scalar_field_t::Wide *result)
|
||||
int mp_lsb_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
mp_lsb_mult_kernel<<<1, 32>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void mp_msb_mult_kernel(const scalar_field_t *x, const scalar_field_t *y, scalar_field_t::Wide *result) {
|
||||
__global__ void mp_msb_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field_t::multiply_msb_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int mp_msb_mult(const scalar_field_t *x, scalar_field_t *y, scalar_field_t::Wide *result)
|
||||
int mp_msb_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
mp_msb_mult_kernel<<<1, 1>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void ingo_mp_mult_kernel(const scalar_field_t *x, const scalar_field_t *y, scalar_field_t::Wide *result) {
|
||||
__global__ void ingo_mp_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field_t::ingo_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int ingo_mp_mult(const scalar_field_t *x, scalar_field_t *y, scalar_field_t::Wide *result)
|
||||
int ingo_mp_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
ingo_mp_mult_kernel<<<1, 32>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void ingo_mp_msb_mult_kernel(const scalar_field_t *x, const scalar_field_t *y, scalar_field_t::Wide *result) {
|
||||
__global__ void ingo_mp_msb_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t::Wide* result)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
scalar_field_t::ingo_msb_multiply_raw_device(x[gid].limbs_storage, y[gid].limbs_storage, result[gid].limbs_storage);
|
||||
}
|
||||
|
||||
|
||||
int ingo_mp_msb_mult(const scalar_field_t *x, scalar_field_t *y, scalar_field_t::Wide *result, const unsigned n)
|
||||
int ingo_mp_msb_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t::Wide* result, const unsigned n)
|
||||
{
|
||||
ingo_mp_msb_mult_kernel<<<1, n>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
|
||||
__global__ void ingo_mp_mod_mult_kernel(const scalar_field_t *x, const scalar_field_t *y, scalar_field_t *result) {
|
||||
__global__ void ingo_mp_mod_mult_kernel(const scalar_field_t* x, const scalar_field_t* y, scalar_field_t* result)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
result[gid] = x[gid] * y[gid];
|
||||
}
|
||||
|
||||
|
||||
int ingo_mp_mod_mult(const scalar_field_t *x, scalar_field_t *y, scalar_field_t *result, const unsigned n)
|
||||
int ingo_mp_mod_mult(const scalar_field_t* x, scalar_field_t* y, scalar_field_t* result, const unsigned n)
|
||||
{
|
||||
ingo_mp_mod_mult_kernel<<<1, n>>>(x, y, result);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
@@ -2,39 +2,30 @@
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
struct cuda_ctx {
|
||||
int device_id;
|
||||
cudaMemPool_t mempool;
|
||||
cudaStream_t stream;
|
||||
int device_id;
|
||||
cudaMemPool_t mempool;
|
||||
cudaStream_t stream;
|
||||
|
||||
cuda_ctx(int gpu_id) {
|
||||
gpu_id = gpu_id;
|
||||
cudaMemPoolProps pool_props;
|
||||
pool_props.allocType = cudaMemAllocationTypePinned;
|
||||
pool_props.handleTypes = cudaMemHandleTypePosixFileDescriptor;
|
||||
pool_props.location.type = cudaMemLocationTypeDevice;
|
||||
pool_props.location.id = device_id;
|
||||
cuda_ctx(int gpu_id)
|
||||
{
|
||||
gpu_id = gpu_id;
|
||||
cudaMemPoolProps pool_props;
|
||||
pool_props.allocType = cudaMemAllocationTypePinned;
|
||||
pool_props.handleTypes = cudaMemHandleTypePosixFileDescriptor;
|
||||
pool_props.location.type = cudaMemLocationTypeDevice;
|
||||
pool_props.location.id = device_id;
|
||||
|
||||
cudaMemPoolCreate(&mempool, &pool_props);
|
||||
cudaStreamCreate(&stream);
|
||||
}
|
||||
cudaMemPoolCreate(&mempool, &pool_props);
|
||||
cudaStreamCreate(&stream);
|
||||
}
|
||||
|
||||
void set_device() {
|
||||
cudaSetDevice(device_id);
|
||||
}
|
||||
void set_device() { cudaSetDevice(device_id); }
|
||||
|
||||
void sync_stream() {
|
||||
cudaStreamSynchronize(stream);
|
||||
}
|
||||
|
||||
void malloc(void *ptr, size_t bytesize) {
|
||||
cudaMallocFromPoolAsync(&ptr, bytesize, mempool, stream);
|
||||
}
|
||||
|
||||
void free(void *ptr) {
|
||||
cudaFreeAsync(ptr, stream);
|
||||
}
|
||||
void sync_stream() { cudaStreamSynchronize(stream); }
|
||||
|
||||
void malloc(void* ptr, size_t bytesize) { cudaMallocFromPoolAsync(&ptr, bytesize, mempool, stream); }
|
||||
|
||||
void free(void* ptr) { cudaFreeAsync(ptr, stream); }
|
||||
};
|
||||
|
||||
// -- Proposed Function Tops --------------------------------------------------
|
||||
|
||||
@@ -5,85 +5,92 @@
|
||||
|
||||
namespace host_math {
|
||||
|
||||
// return x + y with uint32_t operands
|
||||
static __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
// return x + y with uint32_t operands
|
||||
static __host__ uint32_t add(const uint32_t x, const uint32_t y) { return x + y; }
|
||||
|
||||
// return x + y + carry with uint32_t operands
|
||||
static __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry) { return x + y + carry; }
|
||||
// return x + y + carry with uint32_t operands
|
||||
static __host__ uint32_t addc(const uint32_t x, const uint32_t y, const uint32_t carry) { return x + y + carry; }
|
||||
|
||||
// return x + y and carry out with uint32_t operands
|
||||
static __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t &carry) {
|
||||
uint32_t result;
|
||||
result = x + y;
|
||||
carry = x > result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x + y + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t &carry) {
|
||||
const uint32_t result = x + y + carry;
|
||||
carry = carry && x >= result || !carry && x > result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x - y with uint32_t operands
|
||||
static __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow) { return x - y - borrow; }
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t &borrow) {
|
||||
uint32_t result;
|
||||
result = x - y;
|
||||
borrow = x < result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t &borrow) {
|
||||
const uint32_t result = x - y - borrow;
|
||||
borrow = borrow && x <= result || !borrow && x < result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x * y + z + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t &carry) {
|
||||
uint32_t result;
|
||||
uint64_t r = static_cast<uint64_t>(x) * y + z + carry;
|
||||
carry = r >> 32;
|
||||
result = r & 0xffffffff;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
template <unsigned OPS_COUNT = UINT32_MAX, bool CARRY_IN = false, bool CARRY_OUT = false> struct carry_chain {
|
||||
unsigned index;
|
||||
|
||||
constexpr __host__ __forceinline__ carry_chain() : index(0) {}
|
||||
|
||||
__host__ __forceinline__ uint32_t add(const uint32_t x, const uint32_t y, uint32_t &carry) {
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
return host_math::add(x, y);
|
||||
else if (index == 1 && !CARRY_IN)
|
||||
return host_math::add_cc(x, y, carry);
|
||||
else if (index < OPS_COUNT || CARRY_OUT)
|
||||
return host_math::addc_cc(x, y, carry);
|
||||
else
|
||||
return host_math::addc(x, y, carry);
|
||||
// return x + y and carry out with uint32_t operands
|
||||
static __host__ uint32_t add_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x + y;
|
||||
carry = x > result;
|
||||
return result;
|
||||
}
|
||||
|
||||
__host__ __forceinline__ uint32_t sub(const uint32_t x, const uint32_t y, uint32_t &carry) {
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
return host_math::sub(x, y);
|
||||
else if (index == 1 && !CARRY_IN)
|
||||
return host_math::sub_cc(x, y, carry);
|
||||
else if (index < OPS_COUNT || CARRY_OUT)
|
||||
return host_math::subc_cc(x, y, carry);
|
||||
else
|
||||
return host_math::subc(x, y, carry);
|
||||
// return x + y + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t addc_cc(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
const uint32_t result = x + y + carry;
|
||||
carry = carry && x >= result || !carry && x > result;
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
// return x - y with uint32_t operands
|
||||
static __host__ uint32_t sub(const uint32_t x, const uint32_t y) { return x - y; }
|
||||
|
||||
// return x - y - borrow with uint32_t operands
|
||||
static __host__ uint32_t subc(const uint32_t x, const uint32_t y, const uint32_t borrow) { return x - y - borrow; }
|
||||
|
||||
// return x - y and borrow out with uint32_t operands
|
||||
static __host__ uint32_t sub_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
uint32_t result;
|
||||
result = x - y;
|
||||
borrow = x < result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x - y - borrow and borrow out with uint32_t operands
|
||||
static __host__ uint32_t subc_cc(const uint32_t x, const uint32_t y, uint32_t& borrow)
|
||||
{
|
||||
const uint32_t result = x - y - borrow;
|
||||
borrow = borrow && x <= result || !borrow && x < result;
|
||||
return result;
|
||||
}
|
||||
|
||||
// return x * y + z + carry and carry out with uint32_t operands
|
||||
static __host__ uint32_t madc_cc(const uint32_t x, const uint32_t y, const uint32_t z, uint32_t& carry)
|
||||
{
|
||||
uint32_t result;
|
||||
uint64_t r = static_cast<uint64_t>(x) * y + z + carry;
|
||||
carry = r >> 32;
|
||||
result = r & 0xffffffff;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <unsigned OPS_COUNT = UINT32_MAX, bool CARRY_IN = false, bool CARRY_OUT = false>
|
||||
struct carry_chain {
|
||||
unsigned index;
|
||||
|
||||
constexpr __host__ __forceinline__ carry_chain() : index(0) {}
|
||||
|
||||
__host__ __forceinline__ uint32_t add(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
return host_math::add(x, y);
|
||||
else if (index == 1 && !CARRY_IN)
|
||||
return host_math::add_cc(x, y, carry);
|
||||
else if (index < OPS_COUNT || CARRY_OUT)
|
||||
return host_math::addc_cc(x, y, carry);
|
||||
else
|
||||
return host_math::addc(x, y, carry);
|
||||
}
|
||||
|
||||
__host__ __forceinline__ uint32_t sub(const uint32_t x, const uint32_t y, uint32_t& carry)
|
||||
{
|
||||
index++;
|
||||
if (index == 1 && OPS_COUNT == 1 && !CARRY_IN && !CARRY_OUT)
|
||||
return host_math::sub(x, y);
|
||||
else if (index == 1 && !CARRY_IN)
|
||||
return host_math::sub_cc(x, y, carry);
|
||||
else if (index < OPS_COUNT || CARRY_OUT)
|
||||
return host_math::subc_cc(x, y, carry);
|
||||
else
|
||||
return host_math::subc(x, y, carry);
|
||||
}
|
||||
};
|
||||
} // namespace host_math
|
||||
|
||||
@@ -3,23 +3,25 @@
|
||||
#include "../appUtils/vector_manipulation/ve_mod_mult.cuh"
|
||||
|
||||
template <typename E>
|
||||
int convert_montgomery(E *d_inout, size_t n_elments, bool is_into, cudaStream_t stream)
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int num_threads = MAX_THREADS_PER_BLOCK;
|
||||
int num_blocks = (n_elments + num_threads - 1) / num_threads;
|
||||
E mont = is_into ? E::montgomery_r() : E::montgomery_r_inv();
|
||||
template_normalize_kernel<<<num_blocks, num_threads, 0, stream>>>(d_inout, n_elments, mont);
|
||||
int convert_montgomery(E* d_inout, size_t n_elments, bool is_into, cudaStream_t stream)
|
||||
{
|
||||
// Set the grid and block dimensions
|
||||
int num_threads = MAX_THREADS_PER_BLOCK;
|
||||
int num_blocks = (n_elments + num_threads - 1) / num_threads;
|
||||
E mont = is_into ? E::montgomery_r() : E::montgomery_r_inv();
|
||||
template_normalize_kernel<<<num_blocks, num_threads, 0, stream>>>(d_inout, n_elments, mont);
|
||||
|
||||
return 0; //TODO: void with propper error handling
|
||||
return 0; // TODO: void with propper error handling
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int to_montgomery(E* d_inout, unsigned n, cudaStream_t stream) {
|
||||
return convert_montgomery(d_inout, n, true, stream);
|
||||
int to_montgomery(E* d_inout, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
return convert_montgomery(d_inout, n, true, stream);
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
int from_montgomery(E* d_inout, unsigned n, cudaStream_t stream){
|
||||
return convert_montgomery(d_inout, n, false, stream);
|
||||
int from_montgomery(E* d_inout, unsigned n, cudaStream_t stream)
|
||||
{
|
||||
return convert_montgomery(d_inout, n, false, stream);
|
||||
}
|
||||
@@ -1,73 +1,63 @@
|
||||
#pragma once
|
||||
template < class F > class Element {
|
||||
public:
|
||||
int v;
|
||||
__device__ __host__ Element < F > () {
|
||||
v = 0;
|
||||
}
|
||||
__device__ __host__ Element < F > (int r) {
|
||||
v = r % F::q;
|
||||
if (r == F::q) v = F::q;
|
||||
}
|
||||
__device__ __host__ Element < F > operator + (Element < F >
|
||||
const & obj) {
|
||||
Element < F > res;
|
||||
res.v = (v + obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
__device__ __host__ Element < F > operator - (Element < F >
|
||||
const & obj) {
|
||||
Element < F > res;
|
||||
res.v = (v - obj.v) % F::q;
|
||||
if (res.v < 0) {
|
||||
res.v = F::q + res.v;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
template <class F>
|
||||
class Element
|
||||
{
|
||||
public:
|
||||
int v;
|
||||
__device__ __host__ Element<F>() { v = 0; }
|
||||
__device__ __host__ Element<F>(int r)
|
||||
{
|
||||
v = r % F::q;
|
||||
if (r == F::q) v = F::q;
|
||||
}
|
||||
__device__ __host__ Element<F> operator+(Element<F> const& obj)
|
||||
{
|
||||
Element<F> res;
|
||||
res.v = (v + obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
__device__ __host__ Element<F> operator-(Element<F> const& obj)
|
||||
{
|
||||
Element<F> res;
|
||||
res.v = (v - obj.v) % F::q;
|
||||
if (res.v < 0) { res.v = F::q + res.v; }
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
template < class F > class Scalar {
|
||||
public:
|
||||
int v;
|
||||
__device__ __host__ Scalar < F > () {
|
||||
v = 0;
|
||||
}
|
||||
__device__ __host__ Scalar < F > (int r) {
|
||||
v = r % F::q;
|
||||
}
|
||||
__device__ __host__ Scalar < F > operator + (Scalar < F >
|
||||
const & obj) {
|
||||
Scalar < F > res;
|
||||
res.v = (v + obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
__device__ __host__ Scalar < F > operator * (Scalar < F >
|
||||
const & obj) {
|
||||
Scalar < F > res;
|
||||
res.v = (v * obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
__device__ __host__ Element < F > operator * (Element < F >
|
||||
const & obj) {
|
||||
Element < F > res;
|
||||
res.v = (v * obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
Scalar < F > operator - (Scalar < F > const & obj) {
|
||||
Scalar < F > res;
|
||||
res.v = (v - obj.v) % F::q;
|
||||
if (res.v < 0) {
|
||||
res.v = F::q + res.v;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
bool operator < (Scalar < F > const & obj) {
|
||||
return v < obj.v;
|
||||
}
|
||||
static Scalar<F> one(){
|
||||
return Scalar<F>(1);
|
||||
}
|
||||
static Scalar<F> zero(){
|
||||
return Scalar<F>(0);
|
||||
}
|
||||
template <class F>
|
||||
class Scalar
|
||||
{
|
||||
public:
|
||||
int v;
|
||||
__device__ __host__ Scalar<F>() { v = 0; }
|
||||
__device__ __host__ Scalar<F>(int r) { v = r % F::q; }
|
||||
__device__ __host__ Scalar<F> operator+(Scalar<F> const& obj)
|
||||
{
|
||||
Scalar<F> res;
|
||||
res.v = (v + obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
__device__ __host__ Scalar<F> operator*(Scalar<F> const& obj)
|
||||
{
|
||||
Scalar<F> res;
|
||||
res.v = (v * obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
__device__ __host__ Element<F> operator*(Element<F> const& obj)
|
||||
{
|
||||
Element<F> res;
|
||||
res.v = (v * obj.v) % F::q;
|
||||
return res;
|
||||
}
|
||||
Scalar<F> operator-(Scalar<F> const& obj)
|
||||
{
|
||||
Scalar<F> res;
|
||||
res.v = (v - obj.v) % F::q;
|
||||
if (res.v < 0) { res.v = F::q + res.v; }
|
||||
return res;
|
||||
}
|
||||
bool operator<(Scalar<F> const& obj) { return v < obj.v; }
|
||||
static Scalar<F> one() { return Scalar<F>(1); }
|
||||
static Scalar<F> zero() { return Scalar<F>(0); }
|
||||
};
|
||||
@@ -4,238 +4,279 @@
|
||||
|
||||
namespace ptx {
|
||||
|
||||
__device__ __forceinline__ uint32_t add(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm("add.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t add(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm("add.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t add_cc(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm volatile("add.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t add_cc(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("add.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t addc(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm volatile("addc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t addc(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("addc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t addc_cc(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm volatile("addc.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t addc_cc(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("addc.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t sub(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm("sub.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t sub(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm("sub.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t sub_cc(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm volatile("sub.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t sub_cc(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("sub.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t subc(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm volatile("subc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t subc(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("subc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t subc_cc(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm volatile("subc.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t subc_cc(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("subc.cc.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t mul_lo(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm("mul.lo.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t mul_lo(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm("mul.lo.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t mul_hi(const uint32_t x, const uint32_t y) {
|
||||
uint32_t result;
|
||||
asm("mul.hi.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t mul_hi(const uint32_t x, const uint32_t y)
|
||||
{
|
||||
uint32_t result;
|
||||
asm("mul.hi.u32 %0, %1, %2;" : "=r"(result) : "r"(x), "r"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t mad_lo(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm("mad.lo.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t mad_lo(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm("mad.lo.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t mad_hi(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm("mad.hi.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t mad_hi(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm("mad.hi.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t mad_lo_cc(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm volatile("mad.lo.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t mad_lo_cc(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("mad.lo.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t mad_hi_cc(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm volatile("mad.hi.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t mad_hi_cc(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("mad.hi.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t madc_lo(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm volatile("madc.lo.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t madc_lo(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("madc.lo.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t madc_hi(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm volatile("madc.hi.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t madc_hi(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("madc.hi.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t madc_lo_cc(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm volatile("madc.lo.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t madc_lo_cc(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("madc.lo.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint32_t madc_hi_cc(const uint32_t x, const uint32_t y, const uint32_t z) {
|
||||
uint32_t result;
|
||||
asm volatile("madc.hi.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint32_t madc_hi_cc(const uint32_t x, const uint32_t y, const uint32_t z)
|
||||
{
|
||||
uint32_t result;
|
||||
asm volatile("madc.hi.cc.u32 %0, %1, %2, %3;" : "=r"(result) : "r"(x), "r"(y), "r"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t mov_b64(uint32_t lo, uint32_t hi) {
|
||||
uint64_t result;
|
||||
asm("mov.b64 %0, {%1,%2};" : "=l"(result) : "r"(lo), "r"(hi));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t mov_b64(uint32_t lo, uint32_t hi)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("mov.b64 %0, {%1,%2};" : "=l"(result) : "r"(lo), "r"(hi));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Gives u64 overloads a dedicated namespace.
|
||||
// Callers should know exactly what they're calling (no implicit conversions).
|
||||
namespace u64 {
|
||||
// Gives u64 overloads a dedicated namespace.
|
||||
// Callers should know exactly what they're calling (no implicit conversions).
|
||||
namespace u64 {
|
||||
|
||||
__device__ __forceinline__ uint64_t add(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm("add.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t add(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("add.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t add_cc(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm volatile("add.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t add_cc(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("add.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t addc(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm volatile("addc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t addc(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("addc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t addc_cc(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm volatile("addc.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t addc_cc(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("addc.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t sub(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm("sub.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t sub(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("sub.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t sub_cc(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm volatile("sub.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t sub_cc(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("sub.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t subc(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm volatile("subc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t subc(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("subc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t subc_cc(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm volatile("subc.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t subc_cc(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("subc.cc.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t mul_lo(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm("mul.lo.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t mul_lo(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("mul.lo.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t mul_hi(const uint64_t x, const uint64_t y) {
|
||||
uint64_t result;
|
||||
asm("mul.hi.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t mul_hi(const uint64_t x, const uint64_t y)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("mul.hi.u64 %0, %1, %2;" : "=l"(result) : "l"(x), "l"(y));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t mad_lo(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm("mad.lo.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t mad_lo(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("mad.lo.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t mad_hi(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm("mad.hi.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t mad_hi(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm("mad.hi.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t mad_lo_cc(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm volatile("mad.lo.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t mad_lo_cc(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("mad.lo.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t mad_hi_cc(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm volatile("mad.hi.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t mad_hi_cc(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("mad.hi.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t madc_lo(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm volatile("madc.lo.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t madc_lo(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("madc.lo.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t madc_hi(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm volatile("madc.hi.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t madc_hi(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("madc.hi.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t madc_lo_cc(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm volatile("madc.lo.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t madc_lo_cc(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("madc.lo.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ uint64_t madc_hi_cc(const uint64_t x, const uint64_t y, const uint64_t z) {
|
||||
uint64_t result;
|
||||
asm volatile("madc.hi.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
__device__ __forceinline__ uint64_t madc_hi_cc(const uint64_t x, const uint64_t y, const uint64_t z)
|
||||
{
|
||||
uint64_t result;
|
||||
asm volatile("madc.hi.cc.u64 %0, %1, %2, %3;" : "=l"(result) : "l"(x), "l"(y), "l"(z));
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace u64
|
||||
} // namespace u64
|
||||
|
||||
__device__ __forceinline__ void bar_arrive(const unsigned name, const unsigned count) {
|
||||
asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(count) : "memory");
|
||||
}
|
||||
__device__ __forceinline__ void bar_arrive(const unsigned name, const unsigned count)
|
||||
{
|
||||
asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(count) : "memory");
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void bar_sync(const unsigned name, const unsigned count) { asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(count) : "memory"); }
|
||||
__device__ __forceinline__ void bar_sync(const unsigned name, const unsigned count)
|
||||
{
|
||||
asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(count) : "memory");
|
||||
}
|
||||
|
||||
} // namespace ptx
|
||||
@@ -1,15 +1,15 @@
|
||||
// based on https://leimao.github.io/blog/CUDA-Shared-Memory-Templated-Kernel/
|
||||
// may be outdated, but only worked like that
|
||||
// may be outdated, but only worked like that
|
||||
|
||||
// -------------------------------------------------------------
|
||||
// cuDPP -- CUDA Data Parallel Primitives library
|
||||
// -------------------------------------------------------------
|
||||
// $Revision: 5636 $
|
||||
// $Date: 2009-07-02 13:39:38 +1000 (Thu, 02 Jul 2009) $
|
||||
// -------------------------------------------------------------
|
||||
// This source code is distributed under the terms of license.txt
|
||||
// -------------------------------------------------------------
|
||||
// This source code is distributed under the terms of license.txt
|
||||
// in the root directory of this source distribution.
|
||||
// -------------------------------------------------------------
|
||||
// -------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* @file
|
||||
@@ -18,18 +18,18 @@
|
||||
* @brief Shared memory declaration struct for templatized types.
|
||||
*
|
||||
* Because dynamically sized shared memory arrays are declared "extern" in CUDA,
|
||||
* we can't templatize their types directly. To get around this, we declare a
|
||||
* simple wrapper struct that will declare the extern array with a different
|
||||
* we can't templatize their types directly. To get around this, we declare a
|
||||
* simple wrapper struct that will declare the extern array with a different
|
||||
* name depending on the type. This avoids linker errors about multiple
|
||||
* definitions.
|
||||
*
|
||||
* To use dynamically allocated shared memory in a templatized __global__ or
|
||||
*
|
||||
* To use dynamically allocated shared memory in a templatized __global__ or
|
||||
* __device__ function, just replace code like this:
|
||||
*
|
||||
* <pre>
|
||||
* template<class T>
|
||||
* __global__ void
|
||||
* foo( T* d_out, T* d_in)
|
||||
* foo( T* d_out, T* d_in)
|
||||
* {
|
||||
* // Shared mem size is determined by the host app at run time
|
||||
* extern __shared__ T sdata[];
|
||||
@@ -38,12 +38,12 @@
|
||||
* ...
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
*
|
||||
* With this
|
||||
* <pre>
|
||||
* template<class T>
|
||||
* __global__ void
|
||||
* foo( T* d_out, T* d_in)
|
||||
* foo( T* d_out, T* d_in)
|
||||
* {
|
||||
* // Shared mem size is determined by the host app at run time
|
||||
* SharedMemory<T> smem;
|
||||
@@ -58,33 +58,32 @@
|
||||
#ifndef _SHAREDMEM_H_
|
||||
#define _SHAREDMEM_H_
|
||||
|
||||
#include "../curves/bls12_381/curve_config.cuh"
|
||||
#include "../curves/bls12_377/curve_config.cuh"
|
||||
#include "../curves/bls12_381/curve_config.cuh"
|
||||
#include "../curves/bn254/curve_config.cuh"
|
||||
|
||||
/** @brief Wrapper class for templatized dynamic shared memory arrays.
|
||||
*
|
||||
* This struct uses template specialization on the type \a T to declare
|
||||
* a differently named dynamic shared memory array for each type
|
||||
* (\code extern __shared__ T s_type[] \endcode).
|
||||
*
|
||||
* Currently there are specializations for the following types:
|
||||
* \c int, \c uint, \c char, \c uchar, \c short, \c ushort, \c long,
|
||||
* \c unsigned long, \c bool, \c float, and \c double. One can also specialize it
|
||||
* for user defined types.
|
||||
*/
|
||||
*
|
||||
* This struct uses template specialization on the type \a T to declare
|
||||
* a differently named dynamic shared memory array for each type
|
||||
* (\code extern __shared__ T s_type[] \endcode).
|
||||
*
|
||||
* Currently there are specializations for the following types:
|
||||
* \c int, \c uint, \c char, \c uchar, \c short, \c ushort, \c long,
|
||||
* \c unsigned long, \c bool, \c float, and \c double. One can also specialize it
|
||||
* for user defined types.
|
||||
*/
|
||||
template <typename T>
|
||||
struct SharedMemory
|
||||
{
|
||||
//! @brief Return a pointer to the runtime-sized shared memory array.
|
||||
//! @returns Pointer to runtime-sized shared memory array
|
||||
__device__ T* getPointer()
|
||||
{
|
||||
extern __device__ void Error_UnsupportedType(); // Ensure that we won't compile any un-specialized types
|
||||
Error_UnsupportedType();
|
||||
return (T*)0;
|
||||
}
|
||||
// TODO: Use operator overloading to make this class look like a regular array
|
||||
struct SharedMemory {
|
||||
//! @brief Return a pointer to the runtime-sized shared memory array.
|
||||
//! @returns Pointer to runtime-sized shared memory array
|
||||
__device__ T* getPointer()
|
||||
{
|
||||
extern __device__ void Error_UnsupportedType(); // Ensure that we won't compile any un-specialized types
|
||||
Error_UnsupportedType();
|
||||
return (T*)0;
|
||||
}
|
||||
// TODO: Use operator overloading to make this class look like a regular array
|
||||
};
|
||||
|
||||
// Following are the specializations for the following types.
|
||||
@@ -92,124 +91,183 @@ struct SharedMemory
|
||||
// One could also specialize it for user-defined types.
|
||||
|
||||
template <>
|
||||
struct SharedMemory <int>
|
||||
{
|
||||
__device__ int* getPointer() { extern __shared__ int s_int[]; return s_int; }
|
||||
struct SharedMemory<int> {
|
||||
__device__ int* getPointer()
|
||||
{
|
||||
extern __shared__ int s_int[];
|
||||
return s_int;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <unsigned int>
|
||||
{
|
||||
__device__ unsigned int* getPointer() { extern __shared__ unsigned int s_uint[]; return s_uint; }
|
||||
struct SharedMemory<unsigned int> {
|
||||
__device__ unsigned int* getPointer()
|
||||
{
|
||||
extern __shared__ unsigned int s_uint[];
|
||||
return s_uint;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <char>
|
||||
{
|
||||
__device__ char* getPointer() { extern __shared__ char s_char[]; return s_char; }
|
||||
struct SharedMemory<char> {
|
||||
__device__ char* getPointer()
|
||||
{
|
||||
extern __shared__ char s_char[];
|
||||
return s_char;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <unsigned char>
|
||||
{
|
||||
__device__ unsigned char* getPointer() { extern __shared__ unsigned char s_uchar[]; return s_uchar; }
|
||||
struct SharedMemory<unsigned char> {
|
||||
__device__ unsigned char* getPointer()
|
||||
{
|
||||
extern __shared__ unsigned char s_uchar[];
|
||||
return s_uchar;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <short>
|
||||
{
|
||||
__device__ short* getPointer() { extern __shared__ short s_short[]; return s_short; }
|
||||
struct SharedMemory<short> {
|
||||
__device__ short* getPointer()
|
||||
{
|
||||
extern __shared__ short s_short[];
|
||||
return s_short;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <unsigned short>
|
||||
{
|
||||
__device__ unsigned short* getPointer() { extern __shared__ unsigned short s_ushort[]; return s_ushort; }
|
||||
struct SharedMemory<unsigned short> {
|
||||
__device__ unsigned short* getPointer()
|
||||
{
|
||||
extern __shared__ unsigned short s_ushort[];
|
||||
return s_ushort;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <long>
|
||||
{
|
||||
__device__ long* getPointer() { extern __shared__ long s_long[]; return s_long; }
|
||||
struct SharedMemory<long> {
|
||||
__device__ long* getPointer()
|
||||
{
|
||||
extern __shared__ long s_long[];
|
||||
return s_long;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <unsigned long>
|
||||
{
|
||||
__device__ unsigned long* getPointer() { extern __shared__ unsigned long s_ulong[]; return s_ulong; }
|
||||
struct SharedMemory<unsigned long> {
|
||||
__device__ unsigned long* getPointer()
|
||||
{
|
||||
extern __shared__ unsigned long s_ulong[];
|
||||
return s_ulong;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <long long>
|
||||
{
|
||||
__device__ long long* getPointer() { extern __shared__ long long s_longlong[]; return s_longlong; }
|
||||
struct SharedMemory<long long> {
|
||||
__device__ long long* getPointer()
|
||||
{
|
||||
extern __shared__ long long s_longlong[];
|
||||
return s_longlong;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <unsigned long long>
|
||||
{
|
||||
__device__ unsigned long long* getPointer() { extern __shared__ unsigned long long s_ulonglong[]; return s_ulonglong; }
|
||||
struct SharedMemory<unsigned long long> {
|
||||
__device__ unsigned long long* getPointer()
|
||||
{
|
||||
extern __shared__ unsigned long long s_ulonglong[];
|
||||
return s_ulonglong;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <bool>
|
||||
{
|
||||
__device__ bool* getPointer() { extern __shared__ bool s_bool[]; return s_bool; }
|
||||
struct SharedMemory<bool> {
|
||||
__device__ bool* getPointer()
|
||||
{
|
||||
extern __shared__ bool s_bool[];
|
||||
return s_bool;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <float>
|
||||
{
|
||||
__device__ float* getPointer() { extern __shared__ float s_float[]; return s_float; }
|
||||
struct SharedMemory<float> {
|
||||
__device__ float* getPointer()
|
||||
{
|
||||
extern __shared__ float s_float[];
|
||||
return s_float;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <double>
|
||||
{
|
||||
__device__ double* getPointer() { extern __shared__ double s_double[]; return s_double; }
|
||||
struct SharedMemory<double> {
|
||||
__device__ double* getPointer()
|
||||
{
|
||||
extern __shared__ double s_double[];
|
||||
return s_double;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <uchar4>
|
||||
{
|
||||
__device__ uchar4* getPointer() { extern __shared__ uchar4 s_uchar4[]; return s_uchar4; }
|
||||
struct SharedMemory<uchar4> {
|
||||
__device__ uchar4* getPointer()
|
||||
{
|
||||
extern __shared__ uchar4 s_uchar4[];
|
||||
return s_uchar4;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <BLS12_381::scalar_t>
|
||||
{
|
||||
__device__ BLS12_381::scalar_t* getPointer() { extern __shared__ BLS12_381::scalar_t s_scalar_t_bls12_381[]; return s_scalar_t_bls12_381; }
|
||||
struct SharedMemory<BLS12_381::scalar_t> {
|
||||
__device__ BLS12_381::scalar_t* getPointer()
|
||||
{
|
||||
extern __shared__ BLS12_381::scalar_t s_scalar_t_bls12_381[];
|
||||
return s_scalar_t_bls12_381;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <BLS12_381::projective_t>
|
||||
{
|
||||
__device__ BLS12_381::projective_t* getPointer() { extern __shared__ BLS12_381::projective_t s_projective_t_bls12_381[]; return s_projective_t_bls12_381; }
|
||||
struct SharedMemory<BLS12_381::projective_t> {
|
||||
__device__ BLS12_381::projective_t* getPointer()
|
||||
{
|
||||
extern __shared__ BLS12_381::projective_t s_projective_t_bls12_381[];
|
||||
return s_projective_t_bls12_381;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <BLS12_377::scalar_t>
|
||||
{
|
||||
__device__ BLS12_377::scalar_t* getPointer() { extern __shared__ BLS12_377::scalar_t s_scalar_t_bls12_377[]; return s_scalar_t_bls12_377; }
|
||||
struct SharedMemory<BLS12_377::scalar_t> {
|
||||
__device__ BLS12_377::scalar_t* getPointer()
|
||||
{
|
||||
extern __shared__ BLS12_377::scalar_t s_scalar_t_bls12_377[];
|
||||
return s_scalar_t_bls12_377;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <BLS12_377::projective_t>
|
||||
{
|
||||
__device__ BLS12_377::projective_t* getPointer() { extern __shared__ BLS12_377::projective_t s_projective_t_bls12_377[]; return s_projective_t_bls12_377; }
|
||||
};
|
||||
|
||||
|
||||
template <>
|
||||
struct SharedMemory <BN254::scalar_t>
|
||||
{
|
||||
__device__ BN254::scalar_t* getPointer() { extern __shared__ BN254::scalar_t s_scalar_t_bn254[]; return s_scalar_t_bn254; }
|
||||
struct SharedMemory<BLS12_377::projective_t> {
|
||||
__device__ BLS12_377::projective_t* getPointer()
|
||||
{
|
||||
extern __shared__ BLS12_377::projective_t s_projective_t_bls12_377[];
|
||||
return s_projective_t_bls12_377;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory <BN254::projective_t>
|
||||
{
|
||||
__device__ BN254::projective_t* getPointer() { extern __shared__ BN254::projective_t s_projective_t_bn254[]; return s_projective_t_bn254; }
|
||||
struct SharedMemory<BN254::scalar_t> {
|
||||
__device__ BN254::scalar_t* getPointer()
|
||||
{
|
||||
extern __shared__ BN254::scalar_t s_scalar_t_bn254[];
|
||||
return s_scalar_t_bn254;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory<BN254::projective_t> {
|
||||
__device__ BN254::projective_t* getPointer()
|
||||
{
|
||||
extern __shared__ BN254::projective_t s_projective_t_bn254[];
|
||||
return s_projective_t_bn254;
|
||||
}
|
||||
};
|
||||
#endif //_SHAREDMEM_H_
|
||||
|
||||
|
||||
@@ -3,11 +3,15 @@
|
||||
|
||||
#define LIMBS_ALIGNMENT(x) ((x) % 4 == 0 ? 16 : ((x) % 2 == 0 ? 8 : 4))
|
||||
|
||||
template <unsigned LIMBS_COUNT> struct __align__(LIMBS_ALIGNMENT(LIMBS_COUNT)) storage {
|
||||
template <unsigned LIMBS_COUNT>
|
||||
struct __align__(LIMBS_ALIGNMENT(LIMBS_COUNT)) storage
|
||||
{
|
||||
static constexpr unsigned LC = LIMBS_COUNT;
|
||||
uint32_t limbs[LIMBS_COUNT];
|
||||
};
|
||||
|
||||
template <unsigned OMEGAS_COUNT, unsigned LIMBS_COUNT> struct __align__(LIMBS_ALIGNMENT(LIMBS_COUNT)) storage_array {
|
||||
storage<LIMBS_COUNT> storages[OMEGAS_COUNT];
|
||||
template <unsigned OMEGAS_COUNT, unsigned LIMBS_COUNT>
|
||||
struct __align__(LIMBS_ALIGNMENT(LIMBS_COUNT)) storage_array
|
||||
{
|
||||
storage<LIMBS_COUNT> storages[OMEGAS_COUNT];
|
||||
};
|
||||
@@ -1,13 +1,10 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger_limbs_q, BigInteger_limbs_p, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
@@ -143,7 +140,6 @@ impl Point_CURVE_NAME_U {
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_CURVE_NAME_U {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_CURVE_NAME_U::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
@@ -155,7 +151,6 @@ impl Point_CURVE_NAME_U {
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_CURVE_NAME_U) -> Point_CURVE_NAME_U {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
|
||||
@@ -1,13 +1,10 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_CURVE_NAME_L::{Fq as Fq_CURVE_NAME_U, Fr as Fr_CURVE_NAME_U, G1Affine as G1Affine_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
|
||||
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger_limbs_p, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
use std::ffi::{c_int, c_uint};
|
||||
|
||||
use rand::{rngs::StdRng, RngCore, SeedableRng};
|
||||
|
||||
|
||||
use crate::curves::CURVE_NAME_L::*;
|
||||
|
||||
use ark_CURVE_NAME_L::{Fr as Fr_CURVE_NAME_U, G1Projective as G1Projective_CURVE_NAME_U};
|
||||
use ark_ff::PrimeField;
|
||||
use ark_std::UniformRand;
|
||||
|
||||
use rustacuda::prelude::*;
|
||||
use rustacuda_core::DevicePointer;
|
||||
use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
|
||||
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use ark_bls12_377::{Fq as Fq_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use ark_ff::{BigInteger256, BigInteger384, PrimeField};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::ffi::c_uint;
|
||||
use std::mem::transmute;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
@@ -27,9 +24,7 @@ impl<const NUM_LIMBS: usize> Default for Field_BLS12_377<NUM_LIMBS> {
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_BLS12_377<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_BLS12_377 {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
Field_BLS12_377 { s: [0u32; NUM_LIMBS] }
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
@@ -41,7 +36,10 @@ impl<const NUM_LIMBS: usize> Field_BLS12_377<NUM_LIMBS> {
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.map(|s| {
|
||||
s.to_le_bytes()
|
||||
.to_vec()
|
||||
})
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
@@ -50,7 +48,9 @@ impl<const NUM_LIMBS: usize> Field_BLS12_377<NUM_LIMBS> {
|
||||
pub const BASE_LIMBS_BLS12_377: usize = 12;
|
||||
pub const SCALAR_LIMBS_BLS12_377: usize = 8;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type BaseField_BLS12_377 = Field_BLS12_377<BASE_LIMBS_BLS12_377>;
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type ScalarField_BLS12_377 = Field_BLS12_377<SCALAR_LIMBS_BLS12_377>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
@@ -60,7 +60,9 @@ fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
n if n == NUM_LIMBS => val
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
@@ -77,7 +79,11 @@ impl BaseField_BLS12_377 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
BigInteger384::new(
|
||||
u32_vec_to_u64_vec(&self.limbs())
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
@@ -91,7 +97,11 @@ impl ScalarField_BLS12_377 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
BigInteger256::new(
|
||||
u32_vec_to_u64_vec(&self.limbs())
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
@@ -136,25 +146,41 @@ impl Point_BLS12_377 {
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_377 {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
self.to_ark_affine()
|
||||
.into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_377 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(
|
||||
&self
|
||||
.x
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(
|
||||
&self
|
||||
.y
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(
|
||||
&self
|
||||
.z
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let inverse_z = proj_z_field
|
||||
.inverse()
|
||||
.unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_377::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_377) -> Point_BLS12_377 {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_inv = ark
|
||||
.z
|
||||
.inverse()
|
||||
.unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_BLS12_377 {
|
||||
@@ -196,17 +222,19 @@ impl PointAffineNoInfinity_BLS12_377 {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
x: BaseField_BLS12_377 { s: get_fixed_limbs(x) },
|
||||
y: BaseField_BLS12_377 { s: get_fixed_limbs(y) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
[
|
||||
self.x
|
||||
.limbs(),
|
||||
self.y
|
||||
.limbs(),
|
||||
]
|
||||
.concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_BLS12_377 {
|
||||
@@ -218,13 +246,31 @@ impl PointAffineNoInfinity_BLS12_377 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(Fq_BLS12_377::new(self.x.to_ark()), Fq_BLS12_377::new(self.y.to_ark()), false)
|
||||
G1Affine_BLS12_377::new(
|
||||
Fq_BLS12_377::new(
|
||||
self.x
|
||||
.to_ark(),
|
||||
),
|
||||
Fq_BLS12_377::new(
|
||||
self.y
|
||||
.to_ark(),
|
||||
),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_377 {
|
||||
G1Affine_BLS12_377::new(
|
||||
Fq_BLS12_377::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_377::from_repr(self.y.to_ark()).unwrap(),
|
||||
Fq_BLS12_377::from_repr(
|
||||
self.x
|
||||
.to_ark(),
|
||||
)
|
||||
.unwrap(),
|
||||
Fq_BLS12_377::from_repr(
|
||||
self.y
|
||||
.to_ark(),
|
||||
)
|
||||
.unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
@@ -242,30 +288,35 @@ impl Point_BLS12_377 {
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_BLS12_377 {
|
||||
x: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_BLS12_377 {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
x: BaseField_BLS12_377 { s: get_fixed_limbs(x) },
|
||||
y: BaseField_BLS12_377 { s: get_fixed_limbs(y) },
|
||||
z: BaseField_BLS12_377 { s: get_fixed_limbs(z) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_377 {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS_BLS12_377, "length must be 3 * {}", BASE_LIMBS_BLS12_377);
|
||||
assert_eq!(
|
||||
l,
|
||||
3 * BASE_LIMBS_BLS12_377,
|
||||
"length must be 3 * {}",
|
||||
BASE_LIMBS_BLS12_377
|
||||
);
|
||||
Point_BLS12_377 {
|
||||
x: BaseField_BLS12_377 {
|
||||
s: value[..BASE_LIMBS_BLS12_377].try_into().unwrap(),
|
||||
s: value[..BASE_LIMBS_BLS12_377]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
y: BaseField_BLS12_377 {
|
||||
s: value[BASE_LIMBS_BLS12_377..BASE_LIMBS_BLS12_377 * 2].try_into().unwrap(),
|
||||
s: value[BASE_LIMBS_BLS12_377..BASE_LIMBS_BLS12_377 * 2]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
z: BaseField_BLS12_377 {
|
||||
s: value[BASE_LIMBS_BLS12_377 * 2..].try_into().unwrap(),
|
||||
s: value[BASE_LIMBS_BLS12_377 * 2..]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -273,16 +324,21 @@ impl Point_BLS12_377 {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_377 {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: BaseField_BLS12_377::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_BLS12_377::from_ark(ark_affine.y.into_repr()),
|
||||
x: BaseField_BLS12_377::from_ark(
|
||||
ark_affine
|
||||
.x
|
||||
.into_repr(),
|
||||
),
|
||||
y: BaseField_BLS12_377::from_ark(
|
||||
ark_affine
|
||||
.y
|
||||
.into_repr(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_377 {
|
||||
PointAffineNoInfinity_BLS12_377 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
PointAffineNoInfinity_BLS12_377 { x: self.x, y: self.y }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -294,12 +350,9 @@ impl ScalarField_BLS12_377 {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bls12_377::{Fr as Fr_BLS12_377};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_377::{Point_BLS12_377, ScalarField_BLS12_377}};
|
||||
use crate::curves::bls12_377::{Point_BLS12_377, ScalarField_BLS12_377};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
@@ -329,4 +382,4 @@ mod tests {
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,13 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
|
||||
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use ark_bls12_381::{Fq as Fq_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger384, BigInteger256, PrimeField};
|
||||
use serde::{Serialize, Deserialize};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use ark_ff::{BigInteger256, BigInteger384, PrimeField};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::ffi::c_uint;
|
||||
use std::mem::transmute;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
@@ -28,9 +25,7 @@ impl<const NUM_LIMBS: usize> Default for Field_BLS12_381<NUM_LIMBS> {
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_BLS12_381<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_BLS12_381 {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
Field_BLS12_381 { s: [0u32; NUM_LIMBS] }
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
@@ -42,7 +37,10 @@ impl<const NUM_LIMBS: usize> Field_BLS12_381<NUM_LIMBS> {
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.map(|s| {
|
||||
s.to_le_bytes()
|
||||
.to_vec()
|
||||
})
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
@@ -51,7 +49,9 @@ impl<const NUM_LIMBS: usize> Field_BLS12_381<NUM_LIMBS> {
|
||||
pub const BASE_LIMBS_BLS12_381: usize = 12;
|
||||
pub const SCALAR_LIMBS_BLS12_381: usize = 8;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type BaseField_BLS12_381 = Field_BLS12_381<BASE_LIMBS_BLS12_381>;
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type ScalarField_BLS12_381 = Field_BLS12_381<SCALAR_LIMBS_BLS12_381>;
|
||||
|
||||
impl Serialize for ScalarField_BLS12_381 {
|
||||
@@ -59,7 +59,8 @@ impl Serialize for ScalarField_BLS12_381 {
|
||||
where
|
||||
S: serde::Serializer,
|
||||
{
|
||||
self.s.serialize(serializer)
|
||||
self.s
|
||||
.serialize(serializer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +81,9 @@ fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
n if n == NUM_LIMBS => val
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
@@ -97,7 +100,11 @@ impl BaseField_BLS12_381 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger384 {
|
||||
BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
BigInteger384::new(
|
||||
u32_vec_to_u64_vec(&self.limbs())
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger384) -> Self {
|
||||
@@ -111,7 +118,11 @@ impl ScalarField_BLS12_381 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
BigInteger256::new(
|
||||
u32_vec_to_u64_vec(&self.limbs())
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
@@ -156,25 +167,41 @@ impl Point_BLS12_381 {
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_BLS12_381 {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
self.to_ark_affine()
|
||||
.into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BLS12_381 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(
|
||||
&self
|
||||
.x
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(
|
||||
&self
|
||||
.y
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(
|
||||
&self
|
||||
.z
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let inverse_z = proj_z_field
|
||||
.inverse()
|
||||
.unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BLS12_381::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BLS12_381) -> Point_BLS12_381 {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_inv = ark
|
||||
.z
|
||||
.inverse()
|
||||
.unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_BLS12_381 {
|
||||
@@ -216,17 +243,19 @@ impl PointAffineNoInfinity_BLS12_381 {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
x: BaseField_BLS12_381 { s: get_fixed_limbs(x) },
|
||||
y: BaseField_BLS12_381 { s: get_fixed_limbs(y) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
[
|
||||
self.x
|
||||
.limbs(),
|
||||
self.y
|
||||
.limbs(),
|
||||
]
|
||||
.concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_BLS12_381 {
|
||||
@@ -238,13 +267,31 @@ impl PointAffineNoInfinity_BLS12_381 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(Fq_BLS12_381::new(self.x.to_ark()), Fq_BLS12_381::new(self.y.to_ark()), false)
|
||||
G1Affine_BLS12_381::new(
|
||||
Fq_BLS12_381::new(
|
||||
self.x
|
||||
.to_ark(),
|
||||
),
|
||||
Fq_BLS12_381::new(
|
||||
self.y
|
||||
.to_ark(),
|
||||
),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BLS12_381 {
|
||||
G1Affine_BLS12_381::new(
|
||||
Fq_BLS12_381::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BLS12_381::from_repr(self.y.to_ark()).unwrap(),
|
||||
Fq_BLS12_381::from_repr(
|
||||
self.x
|
||||
.to_ark(),
|
||||
)
|
||||
.unwrap(),
|
||||
Fq_BLS12_381::from_repr(
|
||||
self.y
|
||||
.to_ark(),
|
||||
)
|
||||
.unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
@@ -262,30 +309,35 @@ impl Point_BLS12_381 {
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_BLS12_381 {
|
||||
x: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_BLS12_381 {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
x: BaseField_BLS12_381 { s: get_fixed_limbs(x) },
|
||||
y: BaseField_BLS12_381 { s: get_fixed_limbs(y) },
|
||||
z: BaseField_BLS12_381 { s: get_fixed_limbs(z) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_xy_limbs(value: &[u32]) -> Point_BLS12_381 {
|
||||
let l = value.len();
|
||||
assert_eq!(l, 3 * BASE_LIMBS_BLS12_381, "length must be 3 * {}", BASE_LIMBS_BLS12_381);
|
||||
assert_eq!(
|
||||
l,
|
||||
3 * BASE_LIMBS_BLS12_381,
|
||||
"length must be 3 * {}",
|
||||
BASE_LIMBS_BLS12_381
|
||||
);
|
||||
Point_BLS12_381 {
|
||||
x: BaseField_BLS12_381 {
|
||||
s: value[..BASE_LIMBS_BLS12_381].try_into().unwrap(),
|
||||
s: value[..BASE_LIMBS_BLS12_381]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
y: BaseField_BLS12_381 {
|
||||
s: value[BASE_LIMBS_BLS12_381..BASE_LIMBS_BLS12_381 * 2].try_into().unwrap(),
|
||||
s: value[BASE_LIMBS_BLS12_381..BASE_LIMBS_BLS12_381 * 2]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
z: BaseField_BLS12_381 {
|
||||
s: value[BASE_LIMBS_BLS12_381 * 2..].try_into().unwrap(),
|
||||
s: value[BASE_LIMBS_BLS12_381 * 2..]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -293,16 +345,21 @@ impl Point_BLS12_381 {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_BLS12_381 {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: BaseField_BLS12_381::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_BLS12_381::from_ark(ark_affine.y.into_repr()),
|
||||
x: BaseField_BLS12_381::from_ark(
|
||||
ark_affine
|
||||
.x
|
||||
.into_repr(),
|
||||
),
|
||||
y: BaseField_BLS12_381::from_ark(
|
||||
ark_affine
|
||||
.y
|
||||
.into_repr(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BLS12_381 {
|
||||
PointAffineNoInfinity_BLS12_381 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
PointAffineNoInfinity_BLS12_381 { x: self.x, y: self.y }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -314,12 +371,10 @@ impl ScalarField_BLS12_381 {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bls12_381::{Fr as Fr_BLS12_381};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bls12_381::{Point_BLS12_381, ScalarField_BLS12_381}};
|
||||
use crate::curves::bls12_381::{Point_BLS12_381, ScalarField_BLS12_381};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
@@ -349,4 +404,4 @@ mod tests {
|
||||
);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,15 +1,12 @@
|
||||
use std::ffi::c_uint;
|
||||
|
||||
use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
|
||||
use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
|
||||
use ark_bn254::{Fq as Fq_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
|
||||
use ark_ec::AffineCurve;
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use std::mem::transmute;
|
||||
use ark_ff::Field;
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}};
|
||||
|
||||
use ark_ff::{BigInteger256, PrimeField};
|
||||
use rustacuda_core::DeviceCopy;
|
||||
use rustacuda_derive::DeviceCopy;
|
||||
use std::ffi::c_uint;
|
||||
use std::mem::transmute;
|
||||
|
||||
#[derive(Debug, PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
@@ -27,9 +24,7 @@ impl<const NUM_LIMBS: usize> Default for Field_BN254<NUM_LIMBS> {
|
||||
|
||||
impl<const NUM_LIMBS: usize> Field_BN254<NUM_LIMBS> {
|
||||
pub fn zero() -> Self {
|
||||
Field_BN254 {
|
||||
s: [0u32; NUM_LIMBS],
|
||||
}
|
||||
Field_BN254 { s: [0u32; NUM_LIMBS] }
|
||||
}
|
||||
|
||||
pub fn one() -> Self {
|
||||
@@ -41,7 +36,10 @@ impl<const NUM_LIMBS: usize> Field_BN254<NUM_LIMBS> {
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.s
|
||||
.iter()
|
||||
.map(|s| s.to_le_bytes().to_vec())
|
||||
.map(|s| {
|
||||
s.to_le_bytes()
|
||||
.to_vec()
|
||||
})
|
||||
.flatten()
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
@@ -50,7 +48,9 @@ impl<const NUM_LIMBS: usize> Field_BN254<NUM_LIMBS> {
|
||||
pub const BASE_LIMBS_BN254: usize = 8;
|
||||
pub const SCALAR_LIMBS_BN254: usize = 8;
|
||||
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type BaseField_BN254 = Field_BN254<BASE_LIMBS_BN254>;
|
||||
#[allow(non_camel_case_types)]
|
||||
pub type ScalarField_BN254 = Field_BN254<SCALAR_LIMBS_BN254>;
|
||||
|
||||
fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
@@ -60,7 +60,9 @@ fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
|
||||
padded[..val.len()].copy_from_slice(&val);
|
||||
padded
|
||||
}
|
||||
n if n == NUM_LIMBS => val.try_into().unwrap(),
|
||||
n if n == NUM_LIMBS => val
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
_ => panic!("slice has too many elements"),
|
||||
}
|
||||
}
|
||||
@@ -71,7 +73,11 @@ impl ScalarField_BN254 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> BigInteger256 {
|
||||
BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
|
||||
BigInteger256::new(
|
||||
u32_vec_to_u64_vec(&self.limbs())
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: BigInteger256) -> Self {
|
||||
@@ -116,25 +122,41 @@ impl Point_BN254 {
|
||||
|
||||
pub fn to_ark(&self) -> G1Projective_BN254 {
|
||||
//TODO: generic conversion
|
||||
self.to_ark_affine().into_projective()
|
||||
self.to_ark_affine()
|
||||
.into_projective()
|
||||
}
|
||||
|
||||
pub fn to_ark_affine(&self) -> G1Affine_BN254 {
|
||||
//TODO: generic conversion
|
||||
use ark_ff::Field;
|
||||
use std::ops::Mul;
|
||||
let proj_x_field = Fq_BN254::from_le_bytes_mod_order(&self.x.to_bytes_le());
|
||||
let proj_y_field = Fq_BN254::from_le_bytes_mod_order(&self.y.to_bytes_le());
|
||||
let proj_z_field = Fq_BN254::from_le_bytes_mod_order(&self.z.to_bytes_le());
|
||||
let inverse_z = proj_z_field.inverse().unwrap();
|
||||
let proj_x_field = Fq_BN254::from_le_bytes_mod_order(
|
||||
&self
|
||||
.x
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let proj_y_field = Fq_BN254::from_le_bytes_mod_order(
|
||||
&self
|
||||
.y
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let proj_z_field = Fq_BN254::from_le_bytes_mod_order(
|
||||
&self
|
||||
.z
|
||||
.to_bytes_le(),
|
||||
);
|
||||
let inverse_z = proj_z_field
|
||||
.inverse()
|
||||
.unwrap();
|
||||
let aff_x = proj_x_field.mul(inverse_z);
|
||||
let aff_y = proj_y_field.mul(inverse_z);
|
||||
G1Affine_BN254::new(aff_x, aff_y, false)
|
||||
}
|
||||
|
||||
pub fn from_ark(ark: G1Projective_BN254) -> Point_BN254 {
|
||||
use ark_ff::Field;
|
||||
let z_inv = ark.z.inverse().unwrap();
|
||||
let z_inv = ark
|
||||
.z
|
||||
.inverse()
|
||||
.unwrap();
|
||||
let z_invsq = z_inv * z_inv;
|
||||
let z_invq3 = z_invsq * z_inv;
|
||||
Point_BN254 {
|
||||
@@ -176,17 +198,19 @@ impl PointAffineNoInfinity_BN254 {
|
||||
///From u32 limbs x,y
|
||||
pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: BaseField_BN254 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BN254 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
x: BaseField_BN254 { s: get_fixed_limbs(x) },
|
||||
y: BaseField_BN254 { s: get_fixed_limbs(y) },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn limbs(&self) -> Vec<u32> {
|
||||
[self.x.limbs(), self.y.limbs()].concat()
|
||||
[
|
||||
self.x
|
||||
.limbs(),
|
||||
self.y
|
||||
.limbs(),
|
||||
]
|
||||
.concat()
|
||||
}
|
||||
|
||||
pub fn to_projective(&self) -> Point_BN254 {
|
||||
@@ -198,13 +222,31 @@ impl PointAffineNoInfinity_BN254 {
|
||||
}
|
||||
|
||||
pub fn to_ark(&self) -> G1Affine_BN254 {
|
||||
G1Affine_BN254::new(Fq_BN254::new(self.x.to_ark()), Fq_BN254::new(self.y.to_ark()), false)
|
||||
G1Affine_BN254::new(
|
||||
Fq_BN254::new(
|
||||
self.x
|
||||
.to_ark(),
|
||||
),
|
||||
Fq_BN254::new(
|
||||
self.y
|
||||
.to_ark(),
|
||||
),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn to_ark_repr(&self) -> G1Affine_BN254 {
|
||||
G1Affine_BN254::new(
|
||||
Fq_BN254::from_repr(self.x.to_ark()).unwrap(),
|
||||
Fq_BN254::from_repr(self.y.to_ark()).unwrap(),
|
||||
Fq_BN254::from_repr(
|
||||
self.x
|
||||
.to_ark(),
|
||||
)
|
||||
.unwrap(),
|
||||
Fq_BN254::from_repr(
|
||||
self.y
|
||||
.to_ark(),
|
||||
)
|
||||
.unwrap(),
|
||||
false,
|
||||
)
|
||||
}
|
||||
@@ -222,15 +264,9 @@ impl Point_BN254 {
|
||||
|
||||
pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
|
||||
Point_BN254 {
|
||||
x: BaseField_BN254 {
|
||||
s: get_fixed_limbs(x),
|
||||
},
|
||||
y: BaseField_BN254 {
|
||||
s: get_fixed_limbs(y),
|
||||
},
|
||||
z: BaseField_BN254 {
|
||||
s: get_fixed_limbs(z),
|
||||
},
|
||||
x: BaseField_BN254 { s: get_fixed_limbs(x) },
|
||||
y: BaseField_BN254 { s: get_fixed_limbs(y) },
|
||||
z: BaseField_BN254 { s: get_fixed_limbs(z) },
|
||||
}
|
||||
}
|
||||
|
||||
@@ -239,13 +275,19 @@ impl Point_BN254 {
|
||||
assert_eq!(l, 3 * BASE_LIMBS_BN254, "length must be 3 * {}", BASE_LIMBS_BN254);
|
||||
Point_BN254 {
|
||||
x: BaseField_BN254 {
|
||||
s: value[..BASE_LIMBS_BN254].try_into().unwrap(),
|
||||
s: value[..BASE_LIMBS_BN254]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
y: BaseField_BN254 {
|
||||
s: value[BASE_LIMBS_BN254..BASE_LIMBS_BN254 * 2].try_into().unwrap(),
|
||||
s: value[BASE_LIMBS_BN254..BASE_LIMBS_BN254 * 2]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
z: BaseField_BN254 {
|
||||
s: value[BASE_LIMBS_BN254 * 2..].try_into().unwrap(),
|
||||
s: value[BASE_LIMBS_BN254 * 2..]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -253,16 +295,21 @@ impl Point_BN254 {
|
||||
pub fn to_affine(&self) -> PointAffineNoInfinity_BN254 {
|
||||
let ark_affine = self.to_ark_affine();
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: BaseField_BN254::from_ark(ark_affine.x.into_repr()),
|
||||
y: BaseField_BN254::from_ark(ark_affine.y.into_repr()),
|
||||
x: BaseField_BN254::from_ark(
|
||||
ark_affine
|
||||
.x
|
||||
.into_repr(),
|
||||
),
|
||||
y: BaseField_BN254::from_ark(
|
||||
ark_affine
|
||||
.y
|
||||
.into_repr(),
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_xy_strip_z(&self) -> PointAffineNoInfinity_BN254 {
|
||||
PointAffineNoInfinity_BN254 {
|
||||
x: self.x,
|
||||
y: self.y,
|
||||
}
|
||||
PointAffineNoInfinity_BN254 { x: self.x, y: self.y }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -274,12 +321,10 @@ impl ScalarField_BN254 {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use ark_bn254::{Fr as Fr_BN254};
|
||||
|
||||
use crate::{utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec}, curves::bn254::{Point_BN254, ScalarField_BN254}};
|
||||
use crate::curves::bn254::{Point_BN254, ScalarField_BN254};
|
||||
|
||||
#[test]
|
||||
fn test_ark_scalar_convert() {
|
||||
@@ -302,11 +347,7 @@ mod tests {
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BN254::from_limbs(&[0; 8], &[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8]);
|
||||
assert_eq!(left, right);
|
||||
let right = Point_BN254::from_limbs(
|
||||
&[2, 0, 0, 0, 0, 0, 0, 0],
|
||||
&[0; 8],
|
||||
&[1, 0, 0, 0, 0, 0, 0, 0],
|
||||
);
|
||||
let right = Point_BN254::from_limbs(&[2, 0, 0, 0, 0, 0, 0, 0], &[0; 8], &[1, 0, 0, 0, 0, 0, 0, 0]);
|
||||
assert!(left != right);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
pub mod bls12_381;
|
||||
pub mod bls12_377;
|
||||
pub mod bls12_381;
|
||||
pub mod bn254;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
pub mod curves;
|
||||
pub mod test_bls12_377;
|
||||
pub mod test_bls12_381;
|
||||
pub mod test_bn254;
|
||||
pub mod utils;
|
||||
pub mod curves;
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
11
src/utils.rs
11
src/utils.rs
@@ -1,5 +1,5 @@
|
||||
use rand::RngCore;
|
||||
use rand::rngs::StdRng;
|
||||
use rand::RngCore;
|
||||
use rand::SeedableRng;
|
||||
|
||||
pub fn from_limbs<T>(limbs: Vec<u32>, chunk_size: usize, f: fn(&[u32]) -> T) -> Vec<T> {
|
||||
@@ -33,7 +33,8 @@ pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
|
||||
arr_u32
|
||||
}
|
||||
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> { //TOOD: this func is universal
|
||||
pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
|
||||
//TOOD: this func is universal
|
||||
let rng: Box<dyn RngCore> = match seed {
|
||||
Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
|
||||
None => Box::new(rand::thread_rng()),
|
||||
@@ -45,7 +46,7 @@ pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> { //TOOD: this func is uni
|
||||
mod tests {
|
||||
use ark_ff::BigInteger256;
|
||||
|
||||
use crate::curves::bls12_381::{ScalarField_BLS12_381 as ScalarField};
|
||||
use crate::curves::bls12_381::ScalarField_BLS12_381 as ScalarField;
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -54,7 +55,9 @@ mod tests {
|
||||
let arr_u32 = [1, 0x0fffffff, 3, 0x2fffffff, 5, 0x4fffffff, 7, 0x6fffffff];
|
||||
|
||||
let s = ScalarField::from_ark_transmute(BigInteger256::new(
|
||||
u32_vec_to_u64_vec(&arr_u32).try_into().unwrap(),
|
||||
u32_vec_to_u64_vec(&arr_u32)
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
))
|
||||
.limbs();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user