mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-11 00:18:08 -05:00
Compare commits
52 Commits
v2.4.0
...
sumcheck-e
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e02de2f485 | ||
|
|
7a32a88d6d | ||
|
|
4cc5d2d71c | ||
|
|
6e3eecd8ca | ||
|
|
0c340924eb | ||
|
|
9d461be0bd | ||
|
|
c123940abb | ||
|
|
af4ab88f3a | ||
|
|
1ae1fa892c | ||
|
|
bf89f96dff | ||
|
|
ebf9fc4740 | ||
|
|
b56187452f | ||
|
|
ba41d3e97b | ||
|
|
7fe2a11941 | ||
|
|
5144c15845 | ||
|
|
013f1efa09 | ||
|
|
b2b3702b20 | ||
|
|
5b8e22953b | ||
|
|
452aea71af | ||
|
|
356b13f650 | ||
|
|
2952401e40 | ||
|
|
cf9579d4f9 | ||
|
|
a50af2f06e | ||
|
|
bdf4f1ee24 | ||
|
|
9af6bb5666 | ||
|
|
da31adefce | ||
|
|
9b39e771ea | ||
|
|
828fc9c006 | ||
|
|
53395312e3 | ||
|
|
709e6a12e5 | ||
|
|
0e067b296f | ||
|
|
b9409d2109 | ||
|
|
19fe6f6d5d | ||
|
|
be4b636494 | ||
|
|
9a63f17e1b | ||
|
|
425dd2c38d | ||
|
|
0c6fe543fa | ||
|
|
c5ed01b52c | ||
|
|
57ac6a13dc | ||
|
|
7833324d9c | ||
|
|
2111c3a91a | ||
|
|
009a17af11 | ||
|
|
4b50103151 | ||
|
|
e24f74c97a | ||
|
|
fa5064784f | ||
|
|
f0b86b9c06 | ||
|
|
79719fcb0f | ||
|
|
792a2e5f97 | ||
|
|
f6fee8e201 | ||
|
|
f5492a97fb | ||
|
|
2ffda0ffac | ||
|
|
2af9e44219 |
2
.github/workflows/codespell.yml
vendored
2
.github/workflows/codespell.yml
vendored
@@ -4,7 +4,7 @@ on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
|
||||
jobs:
|
||||
spelling-checker:
|
||||
|
||||
40
.github/workflows/cpp_cuda.yml
vendored
40
.github/workflows/cpp_cuda.yml
vendored
@@ -4,11 +4,11 @@ on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]]; then echo "Please run clang-format"; exit 1; fi
|
||||
|
||||
test-linux:
|
||||
test-linux-curve:
|
||||
name: Test on Linux
|
||||
runs-on: [self-hosted, Linux, X64, icicle]
|
||||
needs: [check-changed-files, check-format]
|
||||
@@ -39,14 +39,36 @@ jobs:
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Build
|
||||
- name: Build curve
|
||||
working-directory: ./icicle
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: |
|
||||
mkdir -p build
|
||||
cmake -DBUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release -DCURVE=${{ matrix.curve }} -DG2_DEFINED=ON -S . -B build
|
||||
cmake --build build
|
||||
- name: Run C++ Tests
|
||||
working-directory: ./icicle/build
|
||||
mkdir -p build && rm -rf build/*
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DCURVE=${{ matrix.curve }} -DG2=ON -S . -B build
|
||||
cmake --build build -j
|
||||
- name: Run C++ curve Tests
|
||||
working-directory: ./icicle/build/tests
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: ctest
|
||||
|
||||
test-linux-field:
|
||||
name: Test on Linux
|
||||
runs-on: [self-hosted, Linux, X64, icicle]
|
||||
needs: [check-changed-files, check-format]
|
||||
strategy:
|
||||
matrix:
|
||||
field: [babybear]
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Build field
|
||||
working-directory: ./icicle
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: |
|
||||
mkdir -p build && rm -rf build/*
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DFIELD=${{ matrix.field }} -DEXT_FIELD=ON -S . -B build
|
||||
cmake --build build -j
|
||||
- name: Run C++ field Tests
|
||||
working-directory: ./icicle/build/tests
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: ctest
|
||||
4
.github/workflows/examples.yml
vendored
4
.github/workflows/examples.yml
vendored
@@ -11,11 +11,11 @@ on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
12
.github/workflows/golang.yml
vendored
12
.github/workflows/golang.yml
vendored
@@ -4,11 +4,11 @@ on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -50,13 +50,15 @@ jobs:
|
||||
- name: Build
|
||||
working-directory: ./wrappers/golang
|
||||
if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: ./build.sh ${{ matrix.curve }} ON ON # builds a single curve with G2 and ECNTT enabled
|
||||
run: ./build.sh ${{ matrix.curve }} -g2 -ecntt # builds a single curve with G2 and ECNTT enabled
|
||||
- name: Upload ICICLE lib artifacts
|
||||
uses: actions/upload-artifact@v4
|
||||
if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
with:
|
||||
name: icicle-builds-${{ matrix.curve }}-${{ github.workflow }}-${{ github.sha }}
|
||||
path: icicle/build/libingo_${{ matrix.curve }}.a
|
||||
path: |
|
||||
icicle/build/src/curves/libingo_curve_${{ matrix.curve }}.a
|
||||
icicle/build/src/fields/libingo_field_${{ matrix.curve }}.a
|
||||
retention-days: 1
|
||||
|
||||
test-linux:
|
||||
@@ -74,7 +76,7 @@ jobs:
|
||||
uses: actions/download-artifact@v4
|
||||
if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
with:
|
||||
path: ./icicle/build/
|
||||
path: ./icicle/build/src
|
||||
merge-multiple: true
|
||||
- name: Run Tests
|
||||
working-directory: ./wrappers/golang
|
||||
|
||||
10
.github/workflows/rust.yml
vendored
10
.github/workflows/rust.yml
vendored
@@ -4,11 +4,11 @@ on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
- V2
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -60,7 +60,11 @@ jobs:
|
||||
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
# Running tests from the root workspace will run all workspace members' tests by default
|
||||
# We need to limit the number of threads to avoid running out of memory on weaker machines
|
||||
run: cargo test --release --verbose --features=g2 -- --test-threads=2
|
||||
run: cargo test --workspace --exclude icicle-babybear --release --verbose --features=g2 -- --test-threads=2
|
||||
- name: Run baby bear tests
|
||||
working-directory: ./wrappers/rust/icicle-fields/icicle-babybear
|
||||
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: cargo test --release --verbose
|
||||
|
||||
build-windows:
|
||||
name: Build on Windows
|
||||
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -16,6 +16,6 @@
|
||||
**/Cargo.lock
|
||||
**/icicle/build/
|
||||
**/wrappers/rust/icicle-cuda-runtime/src/bindings.rs
|
||||
**/build
|
||||
**/build*
|
||||
**/icicle/appUtils/large_ntt/work
|
||||
icicle/appUtils/large_ntt/work/test_ntt
|
||||
|
||||
27
docs/docs/icicle/polynomials/ffi.uml
Normal file
27
docs/docs/icicle/polynomials/ffi.uml
Normal file
@@ -0,0 +1,27 @@
|
||||
@startuml
|
||||
skinparam componentStyle uml2
|
||||
|
||||
' Define Components
|
||||
component "C++ Template\nComponent" as CppTemplate {
|
||||
[Parameterizable Interface]
|
||||
}
|
||||
component "C API Wrapper\nComponent" as CApiWrapper {
|
||||
[C API Interface]
|
||||
}
|
||||
component "Rust Code\nComponent" as RustCode {
|
||||
[Macro Interface\n(Template Instantiation)]
|
||||
}
|
||||
|
||||
' Define Artifact
|
||||
artifact "Static Library\n«artifact»" as StaticLib
|
||||
|
||||
' Connections
|
||||
CppTemplate -down-> CApiWrapper : Instantiates
|
||||
CApiWrapper .down.> StaticLib : Compiles into
|
||||
RustCode -left-> StaticLib : Links against\nand calls via FFI
|
||||
|
||||
' Notes
|
||||
note right of CppTemplate : Generic C++\ntemplate implementation
|
||||
note right of CApiWrapper : Exposes C API for FFI\nto Rust/Go
|
||||
note right of RustCode : Uses macros to\ninstantiate templates
|
||||
@enduml
|
||||
86
docs/docs/icicle/polynomials/hw_backends.uml
Normal file
86
docs/docs/icicle/polynomials/hw_backends.uml
Normal file
@@ -0,0 +1,86 @@
|
||||
@startuml
|
||||
|
||||
' Define Interface for Polynomial Backend Operations
|
||||
interface IPolynomialBackend {
|
||||
+add()
|
||||
+subtract()
|
||||
+multiply()
|
||||
+divide()
|
||||
+evaluate()
|
||||
}
|
||||
|
||||
' Define Interface for Polynomial Context (State Management)
|
||||
interface IPolynomialContext {
|
||||
+initFromCoeffs()
|
||||
+initFromEvals()
|
||||
+getCoeffs()
|
||||
+getEvals()
|
||||
}
|
||||
|
||||
' PolynomialAPI now uses two strategies: Backend and Context
|
||||
class PolynomialAPI {
|
||||
-backendStrategy: IPolynomialBackend
|
||||
-contextStrategy: IPolynomialContext
|
||||
-setBackendStrategy(IPolynomialBackend)
|
||||
-setContextStrategy(IPolynomialContext)
|
||||
+add()
|
||||
+subtract()
|
||||
+multiply()
|
||||
+divide()
|
||||
+evaluate()
|
||||
}
|
||||
|
||||
' Backend Implementations
|
||||
class GPUPolynomialBackend implements IPolynomialBackend {
|
||||
#gpuResources: Resource
|
||||
+add()
|
||||
+subtract()
|
||||
+multiply()
|
||||
+divide()
|
||||
+evaluate()
|
||||
}
|
||||
|
||||
class ZPUPolynomialBackend implements IPolynomialBackend {
|
||||
#zpuResources: Resource
|
||||
+add()
|
||||
+subtract()
|
||||
+multiply()
|
||||
+divide()
|
||||
+evaluate()
|
||||
}
|
||||
|
||||
class TracerPolynomialBackend implements IPolynomialBackend {
|
||||
#traceData: Data
|
||||
+add()
|
||||
+subtract()
|
||||
+multiply()
|
||||
+divide()
|
||||
+evaluate()
|
||||
}
|
||||
|
||||
' Context Implementations (Placeholder for actual implementation)
|
||||
class GPUContext implements IPolynomialContext {
|
||||
+initFromCoeffs()
|
||||
+initFromEvals()
|
||||
+getCoeffs()
|
||||
+getEvals()
|
||||
}
|
||||
|
||||
class ZPUContext implements IPolynomialContext {
|
||||
+initFromCoeffs()
|
||||
+initFromEvals()
|
||||
+getCoeffs()
|
||||
+getEvals()
|
||||
}
|
||||
|
||||
class TracerContext implements IPolynomialContext {
|
||||
+initFromCoeffs()
|
||||
+initFromEvals()
|
||||
+getCoeffs()
|
||||
+getEvals()
|
||||
}
|
||||
|
||||
' Relationships
|
||||
PolynomialAPI o-- IPolynomialBackend : uses
|
||||
PolynomialAPI o-- IPolynomialContext : uses
|
||||
@enduml
|
||||
@@ -89,7 +89,6 @@ int main(int argc, char** argv)
|
||||
true, // is_a_on_device
|
||||
true, // is_b_on_device
|
||||
true, // is_result_on_device
|
||||
false, // is_montgomery
|
||||
false // is_async
|
||||
};
|
||||
CHK_IF_RETURN(vec_ops::Mul(GpuA, GpuB, NTT_SIZE, config, MulGpu));
|
||||
|
||||
@@ -8,12 +8,11 @@ icicle-cuda-runtime = { path = "../../../wrappers/rust/icicle-cuda-runtime" }
|
||||
icicle-core = { path = "../../../wrappers/rust/icicle-core" }
|
||||
icicle-bn254 = { path = "../../../wrappers/rust/icicle-curves/icicle-bn254", features = ["g2"] }
|
||||
icicle-bls12-377 = { path = "../../../wrappers/rust/icicle-curves/icicle-bls12-377" }
|
||||
ark-bn254 = { version = "0.4.0", optional = true}
|
||||
ark-bls12-377 = { version = "0.4.0", optional = true}
|
||||
ark-ec = { version = "0.4.0", optional = true}
|
||||
ark-bn254 = { version = "0.4.0", optional = true }
|
||||
ark-bls12-377 = { version = "0.4.0", optional = true }
|
||||
ark-ec = { version = "0.4.0", optional = true }
|
||||
clap = { version = "<=4.4.12", features = ["derive"] }
|
||||
|
||||
[features]
|
||||
arkworks = ["ark-bn254", "ark-bls12-377", "ark-ec", "icicle-core/arkworks", "icicle-bn254/arkworks", "icicle-bls12-377/arkworks"]
|
||||
profile = []
|
||||
g2 = []
|
||||
|
||||
@@ -4,7 +4,10 @@ use icicle_bls12_377::curve::{
|
||||
CurveCfg as BLS12377CurveCfg, G1Projective as BLS12377G1Projective, ScalarCfg as BLS12377ScalarCfg,
|
||||
};
|
||||
|
||||
use icicle_cuda_runtime::{memory::HostOrDeviceSlice, stream::CudaStream};
|
||||
use icicle_cuda_runtime::{
|
||||
memory::{DeviceVec, HostSlice},
|
||||
stream::CudaStream,
|
||||
};
|
||||
|
||||
use icicle_core::{curve::Curve, msm, traits::GenerateRandom};
|
||||
|
||||
@@ -57,18 +60,18 @@ fn main() {
|
||||
log_size, size
|
||||
);
|
||||
// Setting Bn254 points and scalars
|
||||
let points = HostOrDeviceSlice::Host(upper_points[..size].to_vec());
|
||||
let g2_points = HostOrDeviceSlice::Host(g2_upper_points[..size].to_vec());
|
||||
let scalars = HostOrDeviceSlice::Host(upper_scalars[..size].to_vec());
|
||||
let points = HostSlice::from_slice(&upper_points[..size]);
|
||||
let g2_points = HostSlice::from_slice(&g2_upper_points[..size]);
|
||||
let scalars = HostSlice::from_slice(&upper_scalars[..size]);
|
||||
|
||||
// Setting bls12377 points and scalars
|
||||
// let points_bls12377 = &upper_points_bls12377[..size];
|
||||
let points_bls12377 = HostOrDeviceSlice::Host(upper_points_bls12377[..size].to_vec()); // &upper_points_bls12377[..size];
|
||||
let scalars_bls12377 = HostOrDeviceSlice::Host(upper_scalars_bls12377[..size].to_vec());
|
||||
let points_bls12377 = HostSlice::from_slice(&upper_points_bls12377[..size]); // &upper_points_bls12377[..size];
|
||||
let scalars_bls12377 = HostSlice::from_slice(&upper_scalars_bls12377[..size]);
|
||||
|
||||
println!("Configuring bn254 MSM...");
|
||||
let mut msm_results: HostOrDeviceSlice<'_, G1Projective> = HostOrDeviceSlice::cuda_malloc(1).unwrap();
|
||||
let mut g2_msm_results: HostOrDeviceSlice<'_, G2Projective> = HostOrDeviceSlice::cuda_malloc(1).unwrap();
|
||||
let mut msm_results = DeviceVec::<G1Projective>::cuda_malloc(1).unwrap();
|
||||
let mut g2_msm_results = DeviceVec::<G2Projective>::cuda_malloc(1).unwrap();
|
||||
let stream = CudaStream::create().unwrap();
|
||||
let g2_stream = CudaStream::create().unwrap();
|
||||
let mut cfg = msm::MSMConfig::default();
|
||||
@@ -82,8 +85,7 @@ fn main() {
|
||||
g2_cfg.is_async = true;
|
||||
|
||||
println!("Configuring bls12377 MSM...");
|
||||
let mut msm_results_bls12377: HostOrDeviceSlice<'_, BLS12377G1Projective> =
|
||||
HostOrDeviceSlice::cuda_malloc(1).unwrap();
|
||||
let mut msm_results_bls12377 = DeviceVec::<BLS12377G1Projective>::cuda_malloc(1).unwrap();
|
||||
let stream_bls12377 = CudaStream::create().unwrap();
|
||||
let mut cfg_bls12377 = msm::MSMConfig::default();
|
||||
cfg_bls12377
|
||||
@@ -94,7 +96,7 @@ fn main() {
|
||||
println!("Executing bn254 MSM on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
msm::msm(&scalars, &points, &cfg, &mut msm_results).unwrap();
|
||||
msm::msm(scalars, points, &cfg, &mut msm_results[..]).unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"ICICLE BN254 MSM on size 2^{log_size} took: {} ms",
|
||||
@@ -102,16 +104,16 @@ fn main() {
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
msm::msm(&scalars, &g2_points, &g2_cfg, &mut g2_msm_results).unwrap();
|
||||
msm::msm(scalars, g2_points, &g2_cfg, &mut g2_msm_results[..]).unwrap();
|
||||
|
||||
println!("Executing bls12377 MSM on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
msm::msm(
|
||||
&scalars_bls12377,
|
||||
&points_bls12377,
|
||||
scalars_bls12377,
|
||||
points_bls12377,
|
||||
&cfg_bls12377,
|
||||
&mut msm_results_bls12377,
|
||||
&mut msm_results_bls12377[..],
|
||||
)
|
||||
.unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
@@ -134,10 +136,10 @@ fn main() {
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
msm_results
|
||||
.copy_to_host(&mut msm_host_result[..])
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut msm_host_result[..]))
|
||||
.unwrap();
|
||||
g2_msm_results
|
||||
.copy_to_host(&mut g2_msm_host_result[..])
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut g2_msm_host_result[..]))
|
||||
.unwrap();
|
||||
println!("bn254 result: {:#?}", msm_host_result);
|
||||
println!("G2 bn254 result: {:#?}", g2_msm_host_result);
|
||||
@@ -146,7 +148,7 @@ fn main() {
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
msm_results_bls12377
|
||||
.copy_to_host(&mut msm_host_result_bls12377[..])
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut msm_host_result_bls12377[..]))
|
||||
.unwrap();
|
||||
println!("bls12377 result: {:#?}", msm_host_result_bls12377);
|
||||
|
||||
@@ -154,23 +156,19 @@ fn main() {
|
||||
{
|
||||
println!("Checking against arkworks...");
|
||||
let ark_points: Vec<Bn254G1Affine> = points
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|&point| point.to_ark())
|
||||
.collect();
|
||||
let ark_scalars: Vec<Bn254Fr> = scalars
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
|
||||
let ark_points_bls12377: Vec<Bls12377G1Affine> = points_bls12377
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|point| point.to_ark())
|
||||
.collect();
|
||||
let ark_scalars_bls12377: Vec<Bls12377Fr> = scalars_bls12377
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
|
||||
@@ -2,7 +2,11 @@ use icicle_bn254::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_bls12_377::curve::{ScalarCfg as BLS12377ScalarCfg, ScalarField as BLS12377ScalarField};
|
||||
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostOrDeviceSlice, stream::CudaStream};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
memory::{DeviceVec, HostSlice},
|
||||
stream::CudaStream,
|
||||
};
|
||||
|
||||
use icicle_core::{
|
||||
ntt::{self, NTT},
|
||||
@@ -41,14 +45,13 @@ fn main() {
|
||||
);
|
||||
// Setting Bn254 points and scalars
|
||||
println!("Generating random inputs on host for bn254...");
|
||||
let scalars = HostOrDeviceSlice::Host(ScalarCfg::generate_random(size));
|
||||
let mut ntt_results: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::cuda_malloc(size).unwrap();
|
||||
let scalars = ScalarCfg::generate_random(size);
|
||||
let mut ntt_results = DeviceVec::<ScalarField>::cuda_malloc(size).unwrap();
|
||||
|
||||
// Setting bls12377 points and scalars
|
||||
println!("Generating random inputs on host for bls12377...");
|
||||
let scalars_bls12377 = HostOrDeviceSlice::Host(BLS12377ScalarCfg::generate_random(size));
|
||||
let mut ntt_results_bls12377: HostOrDeviceSlice<'_, BLS12377ScalarField> =
|
||||
HostOrDeviceSlice::cuda_malloc(size).unwrap();
|
||||
let scalars_bls12377 = BLS12377ScalarCfg::generate_random(size);
|
||||
let mut ntt_results_bls12377 = DeviceVec::<BLS12377ScalarField>::cuda_malloc(size).unwrap();
|
||||
|
||||
println!("Setting up bn254 Domain...");
|
||||
let icicle_omega = <Bn254Fr as FftField>::get_root_of_unity(
|
||||
@@ -86,7 +89,13 @@ fn main() {
|
||||
println!("Executing bn254 NTT on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
ntt::ntt(&scalars, ntt::NTTDir::kForward, &cfg, &mut ntt_results).unwrap();
|
||||
ntt::ntt(
|
||||
HostSlice::from_slice(&scalars),
|
||||
ntt::NTTDir::kForward,
|
||||
&cfg,
|
||||
&mut ntt_results[..],
|
||||
)
|
||||
.unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"ICICLE BN254 NTT on size 2^{log_size} took: {} μs",
|
||||
@@ -99,10 +108,10 @@ fn main() {
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
ntt::ntt(
|
||||
&scalars_bls12377,
|
||||
HostSlice::from_slice(&scalars_bls12377),
|
||||
ntt::NTTDir::kForward,
|
||||
&cfg_bls12377,
|
||||
&mut ntt_results_bls12377,
|
||||
&mut ntt_results_bls12377[..],
|
||||
)
|
||||
.unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
@@ -119,7 +128,7 @@ fn main() {
|
||||
.unwrap();
|
||||
let mut host_bn254_results = vec![ScalarField::zero(); size];
|
||||
ntt_results
|
||||
.copy_to_host(&mut host_bn254_results[..])
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut host_bn254_results[..]))
|
||||
.unwrap();
|
||||
|
||||
stream_bls12377
|
||||
@@ -127,19 +136,17 @@ fn main() {
|
||||
.unwrap();
|
||||
let mut host_bls12377_results = vec![BLS12377ScalarField::zero(); size];
|
||||
ntt_results_bls12377
|
||||
.copy_to_host(&mut host_bls12377_results[..])
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut host_bls12377_results[..]))
|
||||
.unwrap();
|
||||
|
||||
println!("Checking against arkworks...");
|
||||
let mut ark_scalars: Vec<Bn254Fr> = scalars
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
let bn254_domain = <Radix2EvaluationDomain<Bn254Fr> as EvaluationDomain<Bn254Fr>>::new(size).unwrap();
|
||||
|
||||
let mut ark_scalars_bls12377: Vec<Bls12377Fr> = scalars_bls12377
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
|
||||
@@ -4,7 +4,7 @@ use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
|
||||
use icicle_core::poseidon::{load_optimized_poseidon_constants, poseidon_hash_many, PoseidonConfig};
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
use std::time::Instant;
|
||||
@@ -25,23 +25,29 @@ fn main() {
|
||||
|
||||
println!("Running Icicle Examples: Rust Poseidon Hash");
|
||||
let arity = 2u32;
|
||||
println!("---------------------- Loading optimized Poseidon constants for arity={} ------------------------", arity);
|
||||
println!(
|
||||
"---------------------- Loading optimized Poseidon constants for arity={} ------------------------",
|
||||
arity
|
||||
);
|
||||
let ctx = DeviceContext::default();
|
||||
let constants = load_optimized_poseidon_constants::<F>(arity, &ctx).unwrap();
|
||||
let config = PoseidonConfig::default();
|
||||
|
||||
println!("---------------------- Input size 2^{}={} ------------------------", size, test_size);
|
||||
let inputs = vec![F::one(); test_size * arity as usize];
|
||||
let outputs = vec![F::zero(); test_size];
|
||||
let mut input_slice = HostOrDeviceSlice::on_host(inputs);
|
||||
let mut output_slice = HostOrDeviceSlice::on_host(outputs);
|
||||
println!(
|
||||
"---------------------- Input size 2^{}={} ------------------------",
|
||||
size, test_size
|
||||
);
|
||||
let mut inputs = vec![F::one(); test_size * arity as usize];
|
||||
let mut outputs = vec![F::zero(); test_size];
|
||||
let input_slice = HostSlice::from_mut_slice(&mut inputs);
|
||||
let output_slice = HostSlice::from_mut_slice(&mut outputs);
|
||||
|
||||
println!("Executing BLS12-381 Poseidon Hash on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
poseidon_hash_many::<F>(
|
||||
&mut input_slice,
|
||||
&mut output_slice,
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
@@ -49,5 +55,10 @@ fn main() {
|
||||
)
|
||||
.unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
println!("ICICLE BLS12-381 Poseidon Hash on size 2^{size} took: {} μs", start.elapsed().as_micros());
|
||||
}
|
||||
println!(
|
||||
"ICICLE BLS12-381 Poseidon Hash on size 2^{size} took: {} μs",
|
||||
start
|
||||
.elapsed()
|
||||
.as_micros()
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1,169 +1,59 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
# GoogleTest requires at least C++14
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
|
||||
if("$ENV{ICICLE_PIC}" STREQUAL "OFF" OR ICICLE_PIC STREQUAL "OFF")
|
||||
message(WARNING "Note that PIC (position-independent code) is disabled.")
|
||||
else()
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
|
||||
# add the target cuda architectures
|
||||
# each additional architecture increases the compilation time and output file size
|
||||
if(${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
find_program(_nvidia_smi "nvidia-smi")
|
||||
|
||||
if(_nvidia_smi)
|
||||
set(DETECT_GPU_COUNT_NVIDIA_SMI 0)
|
||||
|
||||
# execute nvidia-smi -L to get a short list of GPUs available
|
||||
exec_program(${_nvidia_smi_path} ARGS -L
|
||||
OUTPUT_VARIABLE _nvidia_smi_out
|
||||
RETURN_VALUE _nvidia_smi_ret)
|
||||
|
||||
# process the stdout of nvidia-smi
|
||||
if(_nvidia_smi_ret EQUAL 0)
|
||||
# convert string with newlines to list of strings
|
||||
string(REGEX REPLACE "\n" ";" _nvidia_smi_out "${_nvidia_smi_out}")
|
||||
|
||||
foreach(_line ${_nvidia_smi_out})
|
||||
if(_line MATCHES "^GPU [0-9]+:")
|
||||
math(EXPR DETECT_GPU_COUNT_NVIDIA_SMI "${DETECT_GPU_COUNT_NVIDIA_SMI}+1")
|
||||
|
||||
# the UUID is not very useful for the user, remove it
|
||||
string(REGEX REPLACE " \\(UUID:.*\\)" "" _gpu_info "${_line}")
|
||||
|
||||
if(NOT _gpu_info STREQUAL "")
|
||||
list(APPEND DETECT_GPU_INFO "${_gpu_info}")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
check_num_gpu_info(${DETECT_GPU_COUNT_NVIDIA_SMI} DETECT_GPU_INFO)
|
||||
set(DETECT_GPU_COUNT ${DETECT_GPU_COUNT_NVIDIA_SMI})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ##
|
||||
if(DETECT_GPU_COUNT GREATER 0)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # do native
|
||||
else()
|
||||
# no GPUs found, like on Github CI runners
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50) # some safe value
|
||||
endif()
|
||||
endif()
|
||||
|
||||
project(icicle LANGUAGES CUDA CXX)
|
||||
# Check CUDA version and, if possible, enable multi-threaded compilation
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.2")
|
||||
message(STATUS "Using multi-threaded CUDA compilation.")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --split-compile 0")
|
||||
else()
|
||||
message(STATUS "Can't use multi-threaded CUDA compilation.")
|
||||
endif()
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -lineinfo")
|
||||
include_directories("${CMAKE_SOURCE_DIR}")
|
||||
|
||||
include(cmake/Common.cmake)
|
||||
include(cmake/FieldsCommon.cmake)
|
||||
include(cmake/CurvesCommon.cmake)
|
||||
|
||||
# when adding a new curve/field, append its name to the end of this list
|
||||
set(SUPPORTED_CURVES bn254;bls12_381;bls12_377;bw6_761;grumpkin)
|
||||
set(SUPPORTED_CURVES_WITH_POSEIDON bn254;bls12_381;bls12_377;bw6_761;grumpkin)
|
||||
SET(SUPPORTED_CURVES_WITHOUT_NTT grumpkin)
|
||||
set_env()
|
||||
set_gpu_env()
|
||||
|
||||
set(IS_CURVE_SUPPORTED FALSE)
|
||||
set(I 0)
|
||||
foreach (SUPPORTED_CURVE ${SUPPORTED_CURVES})
|
||||
math(EXPR I "${I} + 1")
|
||||
if (CURVE STREQUAL SUPPORTED_CURVE)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DCURVE_ID=${I}")
|
||||
set(IS_CURVE_SUPPORTED TRUE)
|
||||
endif ()
|
||||
endforeach()
|
||||
option(DEVMODE "Enable development mode" OFF)
|
||||
option(EXT_FIELD "Build extension field" OFF)
|
||||
option(G2 "Build G2" OFF)
|
||||
option(ECNTT "Build ECNTT" OFF)
|
||||
option(BUILD_HASH "Build hash functions" OFF)
|
||||
option(BUILD_TESTS "Build unit tests" OFF)
|
||||
option(BUILD_BENCHMARKS "Build benchmarks" OFF)
|
||||
# add options here
|
||||
|
||||
if (NOT IS_CURVE_SUPPORTED)
|
||||
message( FATAL_ERROR "The value of CURVE variable: ${CURVE} is not one of the supported curves: ${SUPPORTED_CURVES}" )
|
||||
if((DEFINED CURVE) AND (DEFINED FIELD))
|
||||
message( FATAL_ERROR "CURVE and FIELD cannot be defined at the same time" )
|
||||
endif ()
|
||||
|
||||
if (DEVMODE STREQUAL "ON")
|
||||
if (DEVMODE)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -O0 --ptxas-options=-O0 --ptxas-options=-allow-expensive-optimizations=false -DDEVMODE=ON")
|
||||
endif ()
|
||||
|
||||
if (G2_DEFINED STREQUAL "ON")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DG2_DEFINED=ON")
|
||||
if(DEFINED FIELD)
|
||||
check_field()
|
||||
add_subdirectory(src/fields)
|
||||
endif ()
|
||||
|
||||
if (ECNTT_DEFINED STREQUAL "ON")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DECNTT_DEFINED=ON")
|
||||
if(DEFINED CURVE)
|
||||
check_curve()
|
||||
set(FIELD ${CURVE})
|
||||
add_subdirectory(src/fields)
|
||||
add_subdirectory(src/curves)
|
||||
endif ()
|
||||
|
||||
option(BUILD_TESTS "Build tests" OFF)
|
||||
|
||||
if (NOT BUILD_TESTS)
|
||||
|
||||
message(STATUS "Building without tests.")
|
||||
|
||||
if (CURVE IN_LIST SUPPORTED_CURVES_WITH_POSEIDON)
|
||||
list(APPEND ICICLE_SOURCES appUtils/poseidon/poseidon.cu)
|
||||
list(APPEND ICICLE_SOURCES appUtils/tree/merkle.cu)
|
||||
endif()
|
||||
|
||||
if (NOT CURVE IN_LIST SUPPORTED_CURVES_WITHOUT_NTT)
|
||||
list(APPEND ICICLE_SOURCES appUtils/ntt/ntt.cu)
|
||||
list(APPEND ICICLE_SOURCES appUtils/ntt/kernel_ntt.cu)
|
||||
if(ECNTT_DEFINED STREQUAL "ON")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DECNTT_DEFINED=ON")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_library(
|
||||
icicle
|
||||
utils/vec_ops.cu
|
||||
utils/mont.cu
|
||||
primitives/field.cu
|
||||
primitives/projective.cu
|
||||
appUtils/msm/msm.cu
|
||||
${ICICLE_SOURCES}
|
||||
)
|
||||
set_target_properties(icicle PROPERTIES OUTPUT_NAME "ingo_${CURVE}")
|
||||
target_compile_definitions(icicle PRIVATE CURVE=${CURVE})
|
||||
|
||||
else()
|
||||
|
||||
message(STATUS "Building tests.")
|
||||
|
||||
include(FetchContent)
|
||||
FetchContent_Declare(
|
||||
googletest
|
||||
URL https://github.com/google/googletest/archive/refs/tags/v1.13.0.zip
|
||||
)
|
||||
# For Windows: Prevent overriding the parent project's compiler/linker settings
|
||||
|
||||
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
|
||||
FetchContent_MakeAvailable(googletest)
|
||||
|
||||
enable_testing()
|
||||
|
||||
add_executable(
|
||||
runner
|
||||
tests/runner.cu
|
||||
)
|
||||
|
||||
target_link_libraries(
|
||||
runner
|
||||
GTest::gtest_main
|
||||
)
|
||||
|
||||
include(GoogleTest)
|
||||
set_target_properties(runner PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
gtest_discover_tests(runner)
|
||||
|
||||
if (G2)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DG2")
|
||||
endif ()
|
||||
|
||||
if (EXT_FIELD)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
|
||||
endif ()
|
||||
|
||||
if(BUILD_HASH)
|
||||
add_subdirectory(src/hash)
|
||||
endif ()
|
||||
|
||||
if (BUILD_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
if (BUILD_BENCHMARKS)
|
||||
add_subdirectory(benchmarks)
|
||||
endif()
|
||||
|
||||
5
icicle/benchmarks/CMakeLists.txt
Normal file
5
icicle/benchmarks/CMakeLists.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
|
||||
add_executable(benches benches.cu)
|
||||
target_link_libraries(benches benchmark::benchmark)
|
||||
target_include_directories(benches PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
find_package(benchmark REQUIRED)
|
||||
25
icicle/benchmarks/README.md
Normal file
25
icicle/benchmarks/README.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# How to use benchmarks
|
||||
|
||||
ICICLE uses [google benchmarks](https://github.com/google/benchmark) to measure the performance of primitives.
|
||||
|
||||
To run benchmarks, make sure you have everything installed to run ICICLE (see top-level README for that). Next, you need to install google benchmarks library as described in their [documentation](https://github.com/google/benchmark?tab=readme-ov-file#installation). When running benchmarks, export the path to this installation:
|
||||
|
||||
```
|
||||
export CMAKE_PREFIX_PATH=$CMAKE_PREFIX_PATH:<path-to-google-benchmarks-build-folder>
|
||||
```
|
||||
|
||||
Then to benchmark field arithmetic, say, on `baby_bear` field, run:
|
||||
|
||||
```
|
||||
cmake -UCURVE -UFIELD -UG2 -UEXT_FIELD -DFIELD=babybear -DEXT_FIELD=ON -S . -B build;
|
||||
cmake --build build;
|
||||
build/benches --benchmark_counters_tabular=true
|
||||
```
|
||||
|
||||
`-U` parameters are needed to clear variables from previous runs and `EXT_FIELD` can be disabled if benhcmarking the extension field is not needed. To benchmark a curve, say, `bn254`, change the first `cmake` call to:
|
||||
|
||||
```
|
||||
cmake -UCURVE -UFIELD -UG2 -UEXT_FIELD -DCURVE=bn254 -S . -B build;
|
||||
```
|
||||
|
||||
Benchmarks measure throughput of very cheap operations like field multiplication or EC addition by repeating them very many times in parallel, so throughput is the main metric to look at.
|
||||
6
icicle/benchmarks/benches.cu
Normal file
6
icicle/benchmarks/benches.cu
Normal file
@@ -0,0 +1,6 @@
|
||||
#include "field_benchmarks.cu"
|
||||
#ifdef CURVE_ID
|
||||
#include "curve_benchmarks.cu"
|
||||
#endif
|
||||
|
||||
BENCHMARK_MAIN();
|
||||
79
icicle/benchmarks/curve_benchmarks.cu
Normal file
79
icicle/benchmarks/curve_benchmarks.cu
Normal file
@@ -0,0 +1,79 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include "utils/test_functions.cuh"
|
||||
#include "curves/curve_config.cuh"
|
||||
|
||||
using namespace curve_config;
|
||||
using namespace benchmark;
|
||||
|
||||
static void BM_MixedECAdd(State& state)
|
||||
{
|
||||
constexpr int N = 128;
|
||||
int n = state.range(0) / N;
|
||||
projective_t* points1;
|
||||
affine_t* points2;
|
||||
assert(!cudaMalloc(&points1, n * sizeof(projective_t)));
|
||||
assert(!cudaMalloc(&points2, n * sizeof(affine_t)));
|
||||
|
||||
projective_t* h_points1 = (projective_t*)malloc(n * sizeof(projective_t));
|
||||
affine_t* h_points2 = (affine_t*)malloc(n * sizeof(affine_t));
|
||||
projective_t::RandHostMany(h_points1, n);
|
||||
projective_t::RandHostManyAffine(h_points2, n);
|
||||
cudaMemcpy(points1, h_points1, sizeof(projective_t) * n, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(points2, h_points2, sizeof(affine_t) * n, cudaMemcpyHostToDevice);
|
||||
|
||||
for (auto _ : state) {
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start);
|
||||
assert((vec_add<projective_t, affine_t, N>(points1, points2, points1, n)) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(0) == cudaSuccess);
|
||||
cudaEventRecord(stop);
|
||||
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, start, stop);
|
||||
|
||||
state.SetIterationTime((double)(milliseconds / 1000));
|
||||
}
|
||||
state.counters["Throughput"] = Counter(state.range(0), Counter::kIsRate | Counter::kIsIterationInvariant);
|
||||
cudaFree(points1);
|
||||
cudaFree(points2);
|
||||
}
|
||||
|
||||
static void BM_FullECAdd(benchmark::State& state)
|
||||
{
|
||||
constexpr int N = 128;
|
||||
int n = state.range(0) / N;
|
||||
projective_t* points1;
|
||||
projective_t* points2;
|
||||
assert(!cudaMalloc(&points1, n * sizeof(projective_t)));
|
||||
assert(!cudaMalloc(&points2, n * sizeof(projective_t)));
|
||||
|
||||
projective_t* h_points1 = (projective_t*)malloc(n * sizeof(projective_t));
|
||||
projective_t* h_points2 = (projective_t*)malloc(n * sizeof(projective_t));
|
||||
projective_t::RandHostMany(h_points1, n);
|
||||
projective_t::RandHostMany(h_points2, n);
|
||||
cudaMemcpy(points1, h_points1, sizeof(projective_t) * n, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(points2, h_points2, sizeof(projective_t) * n, cudaMemcpyHostToDevice);
|
||||
|
||||
for (auto _ : state) {
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start);
|
||||
assert((vec_add<projective_t, projective_t, N>(points1, points2, points1, n)) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(0) == cudaSuccess);
|
||||
cudaEventRecord(stop);
|
||||
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, start, stop);
|
||||
|
||||
state.SetIterationTime((double)(milliseconds / 1000));
|
||||
}
|
||||
state.counters["Throughput"] = Counter(state.range(0), Counter::kIsRate | Counter::kIsIterationInvariant);
|
||||
cudaFree(points1);
|
||||
cudaFree(points2);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_FullECAdd)->Range(1 << 27, 1 << 27)->Unit(benchmark::kMillisecond);
|
||||
BENCHMARK(BM_MixedECAdd)->Range(1 << 27, 1 << 27)->Unit(benchmark::kMillisecond);
|
||||
108
icicle/benchmarks/field_benchmarks.cu
Normal file
108
icicle/benchmarks/field_benchmarks.cu
Normal file
@@ -0,0 +1,108 @@
|
||||
#include <benchmark/benchmark.h>
|
||||
#include "utils/test_functions.cuh"
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
using namespace benchmark;
|
||||
|
||||
template <class T>
|
||||
static void BM_FieldAdd(State& state)
|
||||
{
|
||||
constexpr int N = 256;
|
||||
int n = state.range(0) / N;
|
||||
T* scalars1;
|
||||
T* scalars2;
|
||||
assert(!cudaMalloc(&scalars1, n * sizeof(T)));
|
||||
assert(!cudaMalloc(&scalars2, n * sizeof(T)));
|
||||
|
||||
assert(device_populate_random<T>(scalars1, n) == cudaSuccess);
|
||||
assert(device_populate_random<T>(scalars2, n) == cudaSuccess);
|
||||
|
||||
for (auto _ : state) {
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start);
|
||||
assert((vec_add<T, T, N>(scalars1, scalars2, scalars1, n)) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(0) == cudaSuccess);
|
||||
cudaEventRecord(stop);
|
||||
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, start, stop);
|
||||
|
||||
state.SetIterationTime((double)(milliseconds / 1000));
|
||||
}
|
||||
state.counters["Throughput"] = Counter(state.range(0), Counter::kIsRate | Counter::kIsIterationInvariant);
|
||||
cudaFree(scalars1);
|
||||
cudaFree(scalars2);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static void BM_FieldMul(State& state)
|
||||
{
|
||||
constexpr int N = 128;
|
||||
int n = state.range(0) / N;
|
||||
T* scalars1;
|
||||
T* scalars2;
|
||||
assert(!cudaMalloc(&scalars1, n * sizeof(T)));
|
||||
assert(!cudaMalloc(&scalars2, n * sizeof(T)));
|
||||
|
||||
assert(device_populate_random<T>(scalars1, n) == cudaSuccess);
|
||||
assert(device_populate_random<T>(scalars2, n) == cudaSuccess);
|
||||
|
||||
for (auto _ : state) {
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start);
|
||||
assert((vec_mul<T, T, N>(scalars1, scalars2, scalars1, n)) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(0) == cudaSuccess);
|
||||
cudaEventRecord(stop);
|
||||
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, start, stop);
|
||||
|
||||
state.SetIterationTime((double)(milliseconds / 1000));
|
||||
}
|
||||
state.counters["Throughput"] = Counter(state.range(0), Counter::kIsRate | Counter::kIsIterationInvariant);
|
||||
cudaFree(scalars1);
|
||||
cudaFree(scalars2);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static void BM_FieldSqr(State& state)
|
||||
{
|
||||
constexpr int N = 128;
|
||||
int n = state.range(0) / N;
|
||||
T* scalars;
|
||||
assert(!cudaMalloc(&scalars, n * sizeof(T)));
|
||||
|
||||
assert(device_populate_random<T>(scalars, n) == cudaSuccess);
|
||||
|
||||
for (auto _ : state) {
|
||||
cudaEvent_t start, stop;
|
||||
cudaEventCreate(&start);
|
||||
cudaEventCreate(&stop);
|
||||
cudaEventRecord(start);
|
||||
assert((field_vec_sqr<T, N>(scalars, scalars, n)) == cudaSuccess);
|
||||
assert(cudaStreamSynchronize(0) == cudaSuccess);
|
||||
cudaEventRecord(stop);
|
||||
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, start, stop);
|
||||
|
||||
state.SetIterationTime((double)(milliseconds / 1000));
|
||||
}
|
||||
state.counters["Throughput"] = Counter(state.range(0), Counter::kIsRate | Counter::kIsIterationInvariant);
|
||||
cudaFree(scalars);
|
||||
}
|
||||
|
||||
BENCHMARK(BM_FieldAdd<scalar_t>)->Range(1 << 28, 1 << 28)->Unit(kMicrosecond);
|
||||
BENCHMARK(BM_FieldMul<scalar_t>)->Range(1 << 27, 1 << 27)->Unit(kMicrosecond);
|
||||
BENCHMARK(BM_FieldSqr<scalar_t>)->Range(1 << 27, 1 << 27)->Unit(kMicrosecond);
|
||||
|
||||
#ifdef EXT_FIELD
|
||||
BENCHMARK(BM_FieldAdd<extension_t>)->Range(1 << 28, 1 << 28)->Unit(kMicrosecond);
|
||||
BENCHMARK(BM_FieldMul<extension_t>)->Range(1 << 27, 1 << 27)->Unit(kMicrosecond);
|
||||
BENCHMARK(BM_FieldSqr<extension_t>)->Range(1 << 27, 1 << 27)->Unit(kMicrosecond);
|
||||
#endif
|
||||
72
icicle/cmake/Common.cmake
Normal file
72
icicle/cmake/Common.cmake
Normal file
@@ -0,0 +1,72 @@
|
||||
function(set_env)
|
||||
set(CMAKE_CXX_STANDARD 17 PARENT_SCOPE)
|
||||
set(CMAKE_CUDA_STANDARD 17 PARENT_SCOPE)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE PARENT_SCOPE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE PARENT_SCOPE)
|
||||
|
||||
if("$ENV{ICICLE_PIC}" STREQUAL "OFF" OR ICICLE_PIC STREQUAL "OFF")
|
||||
message(WARNING "Note that PIC (position-independent code) is disabled.")
|
||||
else()
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(set_gpu_env)
|
||||
# add the target cuda architectures
|
||||
# each additional architecture increases the compilation time and output file size
|
||||
if(${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH} PARENT_SCOPE)
|
||||
else()
|
||||
find_program(_nvidia_smi "nvidia-smi")
|
||||
|
||||
if(_nvidia_smi)
|
||||
set(DETECT_GPU_COUNT_NVIDIA_SMI 0)
|
||||
|
||||
# execute nvidia-smi -L to get a short list of GPUs available
|
||||
exec_program(${_nvidia_smi_path} ARGS -L
|
||||
OUTPUT_VARIABLE _nvidia_smi_out
|
||||
RETURN_VALUE _nvidia_smi_ret)
|
||||
|
||||
# process the stdout of nvidia-smi
|
||||
if(_nvidia_smi_ret EQUAL 0)
|
||||
# convert string with newlines to list of strings
|
||||
string(REGEX REPLACE "\n" ";" _nvidia_smi_out "${_nvidia_smi_out}")
|
||||
|
||||
foreach(_line ${_nvidia_smi_out})
|
||||
if(_line MATCHES "^GPU [0-9]+:")
|
||||
math(EXPR DETECT_GPU_COUNT_NVIDIA_SMI "${DETECT_GPU_COUNT_NVIDIA_SMI}+1")
|
||||
|
||||
# the UUID is not very useful for the user, remove it
|
||||
string(REGEX REPLACE " \\(UUID:.*\\)" "" _gpu_info "${_line}")
|
||||
|
||||
if(NOT _gpu_info STREQUAL "")
|
||||
list(APPEND DETECT_GPU_INFO "${_gpu_info}")
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
check_num_gpu_info(${DETECT_GPU_COUNT_NVIDIA_SMI} DETECT_GPU_INFO)
|
||||
set(DETECT_GPU_COUNT ${DETECT_GPU_COUNT_NVIDIA_SMI})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# ##
|
||||
if(DETECT_GPU_COUNT GREATER 0)
|
||||
set(CMAKE_CUDA_ARCHITECTURES native PARENT_SCOPE) # do native
|
||||
else()
|
||||
# no GPUs found, like on Github CI runners
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 PARENT_SCOPE) # some safe value
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Check CUDA version and, if possible, enable multi-threaded compilation
|
||||
if(CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL "12.2")
|
||||
message(STATUS "Using multi-threaded CUDA compilation.")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --split-compile 0" PARENT_SCOPE)
|
||||
else()
|
||||
message(STATUS "Can't use multi-threaded CUDA compilation.")
|
||||
endif()
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr" PARENT_SCOPE)
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "" PARENT_SCOPE)
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -lineinfo" PARENT_SCOPE)
|
||||
endfunction()
|
||||
17
icicle/cmake/CurvesCommon.cmake
Normal file
17
icicle/cmake/CurvesCommon.cmake
Normal file
@@ -0,0 +1,17 @@
|
||||
function(check_curve)
|
||||
set(SUPPORTED_CURVES bn254;bls12_381;bls12_377;bw6_761;grumpkin)
|
||||
|
||||
set(IS_CURVE_SUPPORTED FALSE)
|
||||
set(I 0)
|
||||
foreach (SUPPORTED_CURVE ${SUPPORTED_CURVES})
|
||||
math(EXPR I "${I} + 1")
|
||||
if (CURVE STREQUAL SUPPORTED_CURVE)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DCURVE_ID=${I} -DFIELD_ID=${I}" PARENT_SCOPE)
|
||||
set(IS_CURVE_SUPPORTED TRUE)
|
||||
endif ()
|
||||
endforeach()
|
||||
|
||||
if (NOT IS_CURVE_SUPPORTED)
|
||||
message( FATAL_ERROR "The value of CURVE variable: ${CURVE} is not one of the supported curves: ${SUPPORTED_CURVES}" )
|
||||
endif ()
|
||||
endfunction()
|
||||
17
icicle/cmake/FieldsCommon.cmake
Normal file
17
icicle/cmake/FieldsCommon.cmake
Normal file
@@ -0,0 +1,17 @@
|
||||
function(check_field)
|
||||
set(SUPPORTED_FIELDS babybear)
|
||||
|
||||
set(IS_FIELD_SUPPORTED FALSE)
|
||||
set(I 1000)
|
||||
foreach (SUPPORTED_FIELD ${SUPPORTED_FIELDS})
|
||||
math(EXPR I "${I} + 1")
|
||||
if (FIELD STREQUAL SUPPORTED_FIELD)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DFIELD_ID=${I}" PARENT_SCOPE)
|
||||
set(IS_FIELD_SUPPORTED TRUE)
|
||||
endif ()
|
||||
endforeach()
|
||||
|
||||
if (NOT IS_FIELD_SUPPORTED)
|
||||
message( FATAL_ERROR "The value of FIELD variable: ${FIELD} is not one of the supported fields: ${SUPPORTED_FIELDS}" )
|
||||
endif ()
|
||||
endfunction()
|
||||
@@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include "field.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include <iostream>
|
||||
|
||||
template <class FF>
|
||||
class Affine
|
||||
@@ -33,4 +35,13 @@ public:
|
||||
os << "x: " << point.x << "; y: " << point.y;
|
||||
return os;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
template <class FF>
|
||||
struct SharedMemory<Affine<FF>> {
|
||||
__device__ Affine<FF>* getPointer()
|
||||
{
|
||||
extern __shared__ Affine<FF> s_affine_[];
|
||||
return s_affine_;
|
||||
}
|
||||
};
|
||||
@@ -1,36 +1,38 @@
|
||||
#pragma once
|
||||
#ifndef INDEX_H
|
||||
#define INDEX_H
|
||||
#ifndef CURVE_CONFIG_H
|
||||
#define CURVE_CONFIG_H
|
||||
|
||||
#define BN254 1
|
||||
#define BLS12_381 2
|
||||
#define BLS12_377 3
|
||||
#define BW6_761 4
|
||||
#define GRUMPKIN 5
|
||||
|
||||
#include "primitives/field.cuh"
|
||||
#include "primitives/projective.cuh"
|
||||
#if defined(G2_DEFINED)
|
||||
#include "primitives/extension_field.cuh"
|
||||
#endif
|
||||
#include "fields/id.h"
|
||||
#include "curves/projective.cuh"
|
||||
|
||||
#if CURVE_ID == BN254
|
||||
#include "bn254_params.cuh"
|
||||
#include "curves/params/bn254.cuh"
|
||||
using namespace bn254;
|
||||
|
||||
#elif CURVE_ID == BLS12_381
|
||||
#include "bls12_381_params.cuh"
|
||||
#include "curves/params/bls12_381.cuh"
|
||||
using namespace bls12_381;
|
||||
|
||||
#elif CURVE_ID == BLS12_377
|
||||
#include "bls12_377_params.cuh"
|
||||
#include "curves/params/bls12_377.cuh"
|
||||
using namespace bls12_377;
|
||||
|
||||
#elif CURVE_ID == BW6_761
|
||||
#include "bw6_761_params.cuh"
|
||||
#include "curves/params/bw6_761.cuh"
|
||||
using namespace bw6_761;
|
||||
|
||||
#elif CURVE_ID == GRUMPKIN
|
||||
#include "grumpkin_params.cuh"
|
||||
#include "curves/params/grumpkin.cuh"
|
||||
using namespace grumpkin;
|
||||
#endif
|
||||
|
||||
#include "fields/field_config.cuh"
|
||||
using field_config::scalar_t;
|
||||
|
||||
#ifdef G2
|
||||
#include "fields/quadratic_extension.cuh"
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @namespace curve_config
|
||||
* Namespace with type definitions for short Weierstrass pairing-friendly [elliptic
|
||||
@@ -38,14 +40,11 @@ using namespace grumpkin;
|
||||
* with the `-DCURVE` env variable passed during build.
|
||||
*/
|
||||
namespace curve_config {
|
||||
/**
|
||||
* Scalar field of the curve. Is always a prime field.
|
||||
*/
|
||||
typedef Field<fp_config> scalar_t;
|
||||
/**
|
||||
* Base field of G1 curve. Is always a prime field.
|
||||
*/
|
||||
typedef Field<fq_config> point_field_t;
|
||||
|
||||
static constexpr point_field_t generator_x = point_field_t{g1_gen_x};
|
||||
static constexpr point_field_t generator_y = point_field_t{g1_gen_y};
|
||||
static constexpr point_field_t b = point_field_t{weierstrass_b};
|
||||
@@ -59,7 +58,7 @@ namespace curve_config {
|
||||
*/
|
||||
typedef Affine<point_field_t> affine_t;
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
#ifdef G2
|
||||
#if CURVE_ID == BW6_761
|
||||
typedef point_field_t g2_point_field_t;
|
||||
static constexpr g2_point_field_t g2_generator_x = g2_point_field_t{g2_gen_x};
|
||||
@@ -74,6 +73,7 @@ namespace curve_config {
|
||||
static constexpr g2_point_field_t g2_b =
|
||||
g2_point_field_t{point_field_t{weierstrass_b_g2_re}, point_field_t{weierstrass_b_g2_im}};
|
||||
#endif
|
||||
|
||||
/**
|
||||
* [Projective representation](https://hyperelliptic.org/EFD/g1p/auto-shortw-projective.html) of G2 curve.
|
||||
*/
|
||||
40
icicle/include/curves/params/bls12_377.cuh
Normal file
40
icicle/include/curves/params/bls12_377.cuh
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
#ifndef BLS12_377_PARAMS_H
|
||||
#define BLS12_377_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/snark_fields/bls12_377_base.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512,
|
||||
0xbd37cb5c, 0x188282c8, 0xaa9d41bb, 0x85951e2c,
|
||||
0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36,
|
||||
0x4fb82305, 0x6d182ad4, 0xca3e52d9, 0xbd7fb348,
|
||||
0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52,
|
||||
0x57db6b9b, 0x7ea501f5, 0x203e5031, 0xc565f071,
|
||||
0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984,
|
||||
0x0799c9de, 0xe7223ece, 0x6651cecb, 0x532777ee,
|
||||
0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888,
|
||||
0xf832d204, 0xe458c282, 0x74b49a58, 0xde03ed72,
|
||||
0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f,
|
||||
0xcee304c2, 0x2463b01a, 0x3d591bf1, 0x61ef11ac,
|
||||
0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
|
||||
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
|
||||
} // namespace bls12_377
|
||||
|
||||
#endif
|
||||
40
icicle/include/curves/params/bls12_381.cuh
Normal file
40
icicle/include/curves/params/bls12_381.cuh
Normal file
@@ -0,0 +1,40 @@
|
||||
#pragma once
|
||||
#ifndef BLS12_381_PARAMS_H
|
||||
#define BLS12_381_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/snark_fields/bls12_381_base.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f,
|
||||
0x171bac58, 0xa14e3a3f, 0x9774b905, 0xc3688c4f,
|
||||
0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744,
|
||||
0x2c04b3ed, 0x00db18cb, 0xd5d00af6, 0xfcf5e095,
|
||||
0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326,
|
||||
0x7ae3d177, 0xb4510b64, 0xfa403b02, 0xc6e47ad4,
|
||||
0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112,
|
||||
0xdc7f5049, 0xb5da61bb, 0x9920b61a, 0x596bd0d0,
|
||||
0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc,
|
||||
0x5160d12c, 0x6d429a69, 0x8cbdd3a7, 0xadfd9baa,
|
||||
0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27,
|
||||
0x572e99ab, 0x267492ab, 0x85a763af, 0xcb3e287e,
|
||||
0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
} // namespace bls12_381
|
||||
|
||||
#endif
|
||||
31
icicle/include/curves/params/bn254.cuh
Normal file
31
icicle/include/curves/params/bn254.cuh
Normal file
@@ -0,0 +1,31 @@
|
||||
#pragma once
|
||||
#ifndef BN254_PARAMS_H
|
||||
#define BN254_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/snark_fields/bn254_base.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4,
|
||||
0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933,
|
||||
0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769,
|
||||
0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133,
|
||||
0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x24a138e5, 0x3267e6dc, 0x59dbefa3, 0xb5b4c5e5, 0x1be06ac3, 0x81be1899, 0xceb8aaae, 0x2b149d40};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x85c315d2, 0xe4a2bd06, 0xe52d1852, 0xa74fa084, 0xeed8fdf4, 0xcd2cafad, 0x3af0fed4, 0x009713b0};
|
||||
} // namespace bn254
|
||||
|
||||
#endif
|
||||
37
icicle/include/curves/params/bw6_761.cuh
Normal file
37
icicle/include/curves/params/bw6_761.cuh
Normal file
@@ -0,0 +1,37 @@
|
||||
#pragma once
|
||||
#ifndef BW6_761_PARAMS_H
|
||||
#define BW6_761_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/snark_fields/bw6_761_base.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {
|
||||
0x66e5b43d, 0x4088f3af, 0xa6af603f, 0x055928ac, 0x56133e82, 0x6750dd03, 0x280ca27f, 0x03758f9a,
|
||||
0xc9ea0971, 0x5bd71fa0, 0x47729b90, 0xa17a54ce, 0x94c2e746, 0x11dbfcd2, 0xc15520ac, 0x79017ffa,
|
||||
0x85f56fc7, 0xee05c54b, 0x551b27f0, 0xe6a0cfb7, 0xa477beae, 0xb277ce98, 0x0ea190c8, 0x01075b02};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {
|
||||
0xb4e95363, 0xbafc8f2d, 0x0b20d2a1, 0xad1cb2be, 0xcad0fb93, 0xb2b08119, 0xb3053253, 0x9f9df141,
|
||||
0x6fc2cdd4, 0xbe3fb90b, 0x717a4c55, 0xcc685d31, 0x71b5b806, 0xc5b8fa17, 0xaf7e0dba, 0x265909f1,
|
||||
0xa2e573a3, 0x1a7348d2, 0x884c9ec6, 0x0f952589, 0x45cc2a42, 0xe6fd637b, 0x0a6fc574, 0x0058b84e};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x = {
|
||||
0xcd025f1c, 0xa830c194, 0xe1bf995b, 0x6410cf4f, 0xc2ad54b0, 0x00e96efb, 0x3cd208d7, 0xce6948cb,
|
||||
0x00e1b6ba, 0x963317a3, 0xac70e7c7, 0xc5bbcae9, 0xf09feb58, 0x734ec3f1, 0xab3da268, 0x26b41c5d,
|
||||
0x13890f6d, 0x4c062010, 0xc5a7115f, 0xd61053aa, 0x69d660f9, 0xc852a82e, 0x41d9b816, 0x01101332};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y = {
|
||||
0x28c73b61, 0xeb70a167, 0xf9eac689, 0x91ec0594, 0x3c5a02a5, 0x58aa2d3a, 0x504affc7, 0x3ea96fcd,
|
||||
0xffa82300, 0x8906c170, 0xd2c712b8, 0x64f293db, 0x33293fef, 0x94c97eb7, 0x0b95a59c, 0x0a1d86c8,
|
||||
0x53ffe316, 0x81a78e27, 0xcec2181c, 0x26b7cf9a, 0xe4b6d2dc, 0x8179eb10, 0x7761369f, 0x0017c335};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {
|
||||
0x0000008a, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
static constexpr storage<fq_config::limbs_count> g2_weierstrass_b = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
} // namespace bw6_761
|
||||
|
||||
#endif
|
||||
@@ -2,13 +2,11 @@
|
||||
#ifndef GRUMPKIN_PARAMS_H
|
||||
#define GRUMPKIN_PARAMS_H
|
||||
|
||||
#include "utils/storage.cuh"
|
||||
#include "bn254_params.cuh"
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/snark_fields/bn254_scalar.cuh"
|
||||
|
||||
namespace grumpkin {
|
||||
typedef bn254::fq_config fp_config;
|
||||
typedef bn254::fp_config fq_config;
|
||||
|
||||
// G1 generator
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
@@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include "affine.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
|
||||
template <typename FF, class SCALAR_FF, const FF& B_VALUE, const FF& GENERATOR_X, const FF& GENERATOR_Y>
|
||||
class Projective
|
||||
@@ -8,6 +9,9 @@ class Projective
|
||||
friend Affine<FF>;
|
||||
|
||||
public:
|
||||
typedef Affine<FF> Aff;
|
||||
typedef SCALAR_FF Scalar;
|
||||
|
||||
static constexpr unsigned SCALAR_FF_NBITS = SCALAR_FF::NBITS;
|
||||
static constexpr unsigned FF_NBITS = FF::NBITS;
|
||||
|
||||
@@ -23,7 +27,10 @@ public:
|
||||
return {point.x * denom, point.y * denom};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF>& point) { return {point.x, point.y, FF::one()}; }
|
||||
static HOST_DEVICE_INLINE Projective from_affine(const Affine<FF>& point)
|
||||
{
|
||||
return point == Affine<FF>::zero() ? zero() : Projective{point.x, point.y, FF::one()};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Projective ToMontgomery(const Projective& point)
|
||||
{
|
||||
@@ -221,3 +228,12 @@ public:
|
||||
out[i] = (i % size < 100) ? to_affine(rand_host()) : out[i - 100];
|
||||
}
|
||||
};
|
||||
|
||||
template <typename FF, class SCALAR_FF, const FF& B_VALUE, const FF& GENERATOR_X, const FF& GENERATOR_Y>
|
||||
struct SharedMemory<Projective<FF, SCALAR_FF, B_VALUE, GENERATOR_X, GENERATOR_Y>> {
|
||||
__device__ Projective<FF, SCALAR_FF, B_VALUE, GENERATOR_X, GENERATOR_Y>* getPointer()
|
||||
{
|
||||
extern __shared__ Projective<FF, SCALAR_FF, B_VALUE, GENERATOR_X, GENERATOR_Y> s_projective_[];
|
||||
return s_projective_;
|
||||
}
|
||||
};
|
||||
@@ -18,10 +18,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "utils/error_handler.cuh"
|
||||
#include "utils/host_math.cuh"
|
||||
#include "utils/ptx.cuh"
|
||||
#include "utils/storage.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
#include "host_math.cuh"
|
||||
#include "ptx.cuh"
|
||||
#include "storage.cuh"
|
||||
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <random>
|
||||
@@ -244,10 +247,10 @@ public:
|
||||
add_sub_u32_device(const uint32_t* x, const uint32_t* y, uint32_t* r, size_t n = (TLC >> 1))
|
||||
{
|
||||
r[0] = SUBTRACT ? ptx::sub_cc(x[0], y[0]) : ptx::add_cc(x[0], y[0]);
|
||||
for (unsigned i = 1; i < (CARRY_OUT ? n : n - 1); i++)
|
||||
for (unsigned i = 1; i < n; i++)
|
||||
r[i] = SUBTRACT ? ptx::subc_cc(x[i], y[i]) : ptx::addc_cc(x[i], y[i]);
|
||||
if (!CARRY_OUT) {
|
||||
r[n - 1] = SUBTRACT ? ptx::subc(x[n - 1], y[n - 1]) : ptx::addc(x[n - 1], y[n - 1]);
|
||||
ptx::addc(0, 0);
|
||||
return 0;
|
||||
}
|
||||
return SUBTRACT ? ptx::subc(0, 0) : ptx::addc(0, 0);
|
||||
@@ -466,67 +469,84 @@ public:
|
||||
*/
|
||||
static DEVICE_INLINE void multiply_msb_raw_device(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
{
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* even = rs.limbs;
|
||||
__align__(16) uint32_t odd[2 * TLC - 2];
|
||||
if constexpr (TLC > 1) {
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* even = rs.limbs;
|
||||
__align__(16) uint32_t odd[2 * TLC - 2];
|
||||
|
||||
even[TLC - 1] = ptx::mul_hi(a[TLC - 2], b[0]);
|
||||
odd[TLC - 2] = ptx::mul_lo(a[TLC - 1], b[0]);
|
||||
odd[TLC - 1] = ptx::mul_hi(a[TLC - 1], b[0]);
|
||||
size_t i;
|
||||
UNROLL
|
||||
for (i = 2; i < TLC - 1; i += 2) {
|
||||
mad_row_msb<true>(&even[TLC - 2], &odd[TLC - 2], &a[TLC - i - 1], b[i - 1], i + 1);
|
||||
mad_row_msb<false>(&odd[TLC - 2], &even[TLC - 2], &a[TLC - i - 2], b[i], i + 2);
|
||||
even[TLC - 1] = ptx::mul_hi(a[TLC - 2], b[0]);
|
||||
odd[TLC - 2] = ptx::mul_lo(a[TLC - 1], b[0]);
|
||||
odd[TLC - 1] = ptx::mul_hi(a[TLC - 1], b[0]);
|
||||
size_t i;
|
||||
UNROLL
|
||||
for (i = 2; i < TLC - 1; i += 2) {
|
||||
mad_row_msb<true>(&even[TLC - 2], &odd[TLC - 2], &a[TLC - i - 1], b[i - 1], i + 1);
|
||||
mad_row_msb<false>(&odd[TLC - 2], &even[TLC - 2], &a[TLC - i - 2], b[i], i + 2);
|
||||
}
|
||||
mad_row(&even[TLC], &odd[TLC - 2], a, b[TLC - 1]);
|
||||
|
||||
// merge |even| and |odd|
|
||||
ptx::add_cc(even[TLC - 1], odd[TLC - 2]);
|
||||
for (i = TLC - 1; i < 2 * TLC - 2; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], 0);
|
||||
} else {
|
||||
multiply_raw_device(as, bs, rs);
|
||||
}
|
||||
mad_row(&even[TLC], &odd[TLC - 2], a, b[TLC - 1]);
|
||||
|
||||
// merge |even| and |odd|
|
||||
ptx::add_cc(even[TLC - 1], odd[TLC - 2]);
|
||||
for (i = TLC - 1; i < 2 * TLC - 2; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* A function that computes the low half of the fused multiply-and-add \f$ rs = as \cdot bs + cs \f$.
|
||||
* A function that computes the low half of the fused multiply-and-add \f$ rs = as \cdot bs + cs \f$ where
|
||||
* \f$ bs = 2^{32*nof_limbs} \f$.
|
||||
*
|
||||
* For efficiency, this method does not include terms that are too large. Namely, limb product \f$ a_i \cdot b_j \f$
|
||||
* is excluded if \f$ i + j > TLC - 1 \f$ and only the lower half is included if \f$ i + j = TLC - 1 \f$. All other
|
||||
* limb products are included.
|
||||
*/
|
||||
static DEVICE_INLINE void
|
||||
multiply_and_add_lsb_raw_device(const ff_storage& as, const ff_storage& bs, ff_storage& cs, ff_storage& rs)
|
||||
multiply_and_add_lsb_neg_modulus_raw_device(const ff_storage& as, ff_storage& cs, ff_storage& rs)
|
||||
{
|
||||
ff_storage bs = get_neg_modulus();
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* c = cs.limbs;
|
||||
uint32_t* even = rs.limbs;
|
||||
__align__(16) uint32_t odd[TLC - 1];
|
||||
size_t i;
|
||||
// `b[0]` is \f$ 2^{32} \f$ minus the last limb of prime modulus. Because most scalar (and some base) primes
|
||||
// are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
|
||||
// less efficient SASS generated by nvcc, so this case needed separate handling.
|
||||
if (b[0] == UINT32_MAX) {
|
||||
add_sub_u32_device<true, false>(cs.limbs, a, even, TLC);
|
||||
for (i = 0; i < TLC - 1; i++)
|
||||
odd[i] = a[i];
|
||||
} else {
|
||||
mul_n_and_add(even, a, b[0], cs.limbs, TLC);
|
||||
mul_n(odd, a + 1, b[0], TLC - 1);
|
||||
}
|
||||
mad_row_lsb(&even[2], &odd[0], a, b[1], TLC - 1);
|
||||
UNROLL
|
||||
for (i = 2; i < TLC - 1; i += 2) {
|
||||
mad_row_lsb(&odd[i], &even[i], a, b[i], TLC - i);
|
||||
mad_row_lsb(&even[i + 2], &odd[i], a, b[i + 1], TLC - i - 1);
|
||||
}
|
||||
|
||||
// merge |even| and |odd|
|
||||
even[1] = ptx::add_cc(even[1], odd[0]);
|
||||
for (i = 1; i < TLC - 2; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], odd[i]);
|
||||
if constexpr (TLC > 2) {
|
||||
__align__(16) uint32_t odd[TLC - 1];
|
||||
size_t i;
|
||||
// `b[0]` is \f$ 2^{32} \f$ minus the last limb of prime modulus. Because most scalar (and some base) primes
|
||||
// are necessarily NTT-friendly, `b[0]` often turns out to be \f$ 2^{32} - 1 \f$. This actually leads to
|
||||
// less efficient SASS generated by nvcc, so this case needed separate handling.
|
||||
if (b[0] == UINT32_MAX) {
|
||||
add_sub_u32_device<true, false>(c, a, even, TLC);
|
||||
for (i = 0; i < TLC - 1; i++)
|
||||
odd[i] = a[i];
|
||||
} else {
|
||||
mul_n_and_add(even, a, b[0], c, TLC);
|
||||
mul_n(odd, a + 1, b[0], TLC - 1);
|
||||
}
|
||||
mad_row_lsb(&even[2], &odd[0], a, b[1], TLC - 1);
|
||||
UNROLL
|
||||
for (i = 2; i < TLC - 1; i += 2) {
|
||||
mad_row_lsb(&odd[i], &even[i], a, b[i], TLC - i);
|
||||
mad_row_lsb(&even[i + 2], &odd[i], a, b[i + 1], TLC - i - 1);
|
||||
}
|
||||
|
||||
// merge |even| and |odd|
|
||||
even[1] = ptx::add_cc(even[1], odd[0]);
|
||||
for (i = 1; i < TLC - 2; i++)
|
||||
even[i + 1] = ptx::addc_cc(even[i + 1], odd[i]);
|
||||
even[i + 1] = ptx::addc(even[i + 1], odd[i]);
|
||||
} else if (TLC == 2) {
|
||||
even[0] = ptx::mad_lo(a[0], b[0], c[0]);
|
||||
even[1] = ptx::mad_hi(a[0], b[0], c[0]);
|
||||
even[1] = ptx::mad_lo(a[0], b[1], even[1]);
|
||||
even[1] = ptx::mad_lo(a[1], b[0], even[1]);
|
||||
} else if (TLC == 1) {
|
||||
even[0] = ptx::mad_lo(a[0], b[0], c[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -599,29 +619,47 @@ public:
|
||||
const uint32_t* a = as.limbs;
|
||||
const uint32_t* b = bs.limbs;
|
||||
uint32_t* r = rs.limbs;
|
||||
// Next two lines multiply high and low halves of operands (\f$ a_{lo} \cdot b_{lo}; a_{hi} \cdot b_{hi} \$f) and
|
||||
// write the results into `r`.
|
||||
multiply_short_raw_device(a, b, r);
|
||||
multiply_short_raw_device(&a[TLC >> 1], &b[TLC >> 1], &r[TLC]);
|
||||
__align__(16) uint32_t middle_part[TLC];
|
||||
__align__(16) uint32_t diffs[TLC];
|
||||
// Differences of halves \f$ a_{hi} - a_{lo}; b_{lo} - b_{hi} \$f are written into `diffs`, signs written to
|
||||
// `carry1` and `carry2`.
|
||||
uint32_t carry1 = add_sub_u32_device<true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
// Compute the "middle part" of Karatsuba: \f$ a_{lo} \cdot b_{hi} + b_{lo} \cdot a_{hi} \f$.
|
||||
// This is where the assumption about unset high bit of `a` and `b` is relevant.
|
||||
multiply_and_add_short_raw_device(diffs, &diffs[TLC >> 1], middle_part, r, &r[TLC]);
|
||||
// Corrections that need to be performed when differences are negative.
|
||||
// Again, carry doesn't need to be propagated due to unset high bits of `a` and `b`.
|
||||
if (carry1) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
// Now that middle part is fully correct, it can be added to the result.
|
||||
add_sub_u32_device<false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1], TLC);
|
||||
if constexpr (TLC > 2) {
|
||||
// Next two lines multiply high and low halves of operands (\f$ a_{lo} \cdot b_{lo}; a_{hi} \cdot b_{hi} \$f) and
|
||||
// write the results into `r`.
|
||||
multiply_short_raw_device(a, b, r);
|
||||
multiply_short_raw_device(&a[TLC >> 1], &b[TLC >> 1], &r[TLC]);
|
||||
__align__(16) uint32_t middle_part[TLC];
|
||||
__align__(16) uint32_t diffs[TLC];
|
||||
// Differences of halves \f$ a_{hi} - a_{lo}; b_{lo} - b_{hi} \$f are written into `diffs`, signs written to
|
||||
// `carry1` and `carry2`.
|
||||
uint32_t carry1 = add_sub_u32_device<true, true>(&a[TLC >> 1], a, diffs);
|
||||
uint32_t carry2 = add_sub_u32_device<true, true>(b, &b[TLC >> 1], &diffs[TLC >> 1]);
|
||||
// Compute the "middle part" of Karatsuba: \f$ a_{lo} \cdot b_{hi} + b_{lo} \cdot a_{hi} \f$.
|
||||
// This is where the assumption about unset high bit of `a` and `b` is relevant.
|
||||
multiply_and_add_short_raw_device(diffs, &diffs[TLC >> 1], middle_part, r, &r[TLC]);
|
||||
// Corrections that need to be performed when differences are negative.
|
||||
// Again, carry doesn't need to be propagated due to unset high bits of `a` and `b`.
|
||||
if (carry1) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], &diffs[TLC >> 1], &middle_part[TLC >> 1]);
|
||||
if (carry2) add_sub_u32_device<true, false>(&middle_part[TLC >> 1], diffs, &middle_part[TLC >> 1]);
|
||||
// Now that middle part is fully correct, it can be added to the result.
|
||||
add_sub_u32_device<false, true>(&r[TLC >> 1], middle_part, &r[TLC >> 1], TLC);
|
||||
|
||||
// Carry from adding middle part has to be propagated to the highest limb.
|
||||
for (size_t i = TLC + (TLC >> 1); i < 2 * TLC; i++)
|
||||
r[i] = ptx::addc_cc(r[i], 0);
|
||||
// Carry from adding middle part has to be propagated to the highest limb.
|
||||
for (size_t i = TLC + (TLC >> 1); i < 2 * TLC; i++)
|
||||
r[i] = ptx::addc_cc(r[i], 0);
|
||||
} else if (TLC == 2) {
|
||||
__align__(8) uint32_t odd[2];
|
||||
r[0] = ptx::mul_lo(a[0], b[0]);
|
||||
r[1] = ptx::mul_hi(a[0], b[0]);
|
||||
r[2] = ptx::mul_lo(a[1], b[1]);
|
||||
r[3] = ptx::mul_hi(a[1], b[1]);
|
||||
odd[0] = ptx::mul_lo(a[0], b[1]);
|
||||
odd[1] = ptx::mul_hi(a[0], b[1]);
|
||||
odd[0] = ptx::mad_lo(a[1], b[0], odd[0]);
|
||||
odd[1] = ptx::mad_hi(a[1], b[0], odd[1]);
|
||||
r[1] = ptx::add_cc(r[1], odd[0]);
|
||||
r[2] = ptx::addc_cc(r[2], odd[1]);
|
||||
r[3] = ptx::addc(r[3], 0);
|
||||
} else if (TLC == 1) {
|
||||
r[0] = ptx::mul_lo(a[0], b[0]);
|
||||
r[1] = ptx::mul_hi(a[0], b[0]);
|
||||
}
|
||||
}
|
||||
|
||||
static HOST_INLINE void multiply_raw_host(const ff_storage& as, const ff_storage& bs, ff_wide_storage& rs)
|
||||
@@ -647,13 +685,13 @@ public:
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE void
|
||||
multiply_and_add_lsb_raw(const ff_storage& as, const ff_storage& bs, ff_storage& cs, ff_storage& rs)
|
||||
multiply_and_add_lsb_neg_modulus_raw(const ff_storage& as, ff_storage& cs, ff_storage& rs)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
return multiply_and_add_lsb_raw_device(as, bs, cs, rs);
|
||||
return multiply_and_add_lsb_neg_modulus_raw_device(as, cs, rs);
|
||||
#else
|
||||
Wide r_wide = {};
|
||||
multiply_raw_host(as, bs, r_wide.limbs_storage);
|
||||
multiply_raw_host(as, get_neg_modulus(), r_wide.limbs_storage);
|
||||
Field r = Wide::get_lower(r_wide);
|
||||
add_limbs<false>(cs, r.limbs_storage, rs);
|
||||
#endif
|
||||
@@ -784,7 +822,7 @@ public:
|
||||
Field xs_lo = Wide::get_lower(xs);
|
||||
// Here we need to compute the lsb of `xs - l \cdot p` and to make use of fused multiply-and-add, we rewrite it as
|
||||
// `xs + l \cdot (2^{32 \cdot TLC}-p)` which is the same as original (up to higher limbs which we don't care about).
|
||||
multiply_and_add_lsb_raw(l_hi.limbs_storage, get_neg_modulus(), xs_lo.limbs_storage, r.limbs_storage);
|
||||
multiply_and_add_lsb_neg_modulus_raw(l_hi.limbs_storage, xs_lo.limbs_storage, r.limbs_storage);
|
||||
ff_storage r_reduced = {};
|
||||
uint32_t carry;
|
||||
// As mentioned, either 2 or 1 reduction can be performed depending on the field in question.
|
||||
@@ -888,21 +926,24 @@ public:
|
||||
return rs;
|
||||
}
|
||||
|
||||
// Assumes the number is even!
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE Field div2(const Field& xs)
|
||||
{
|
||||
const uint32_t* x = xs.limbs_storage.limbs;
|
||||
Field rs = {};
|
||||
uint32_t* r = rs.limbs_storage.limbs;
|
||||
if constexpr (TLC > 1) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
UNROLL
|
||||
UNROLL
|
||||
#endif
|
||||
for (unsigned i = 0; i < TLC - 1; i++) {
|
||||
for (unsigned i = 0; i < TLC - 1; i++) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
r[i] = __funnelshift_rc(x[i], x[i + 1], 1);
|
||||
r[i] = __funnelshift_rc(x[i], x[i + 1], 1);
|
||||
#else
|
||||
r[i] = (x[i] >> 1) | (x[i + 1] << 31);
|
||||
r[i] = (x[i] >> 1) | (x[i + 1] << 31);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
r[TLC - 1] = x[TLC - 1] >> 1;
|
||||
return sub_modulus<MODULUS_MULTIPLE>(rs);
|
||||
@@ -962,4 +1003,13 @@ struct std::hash<Field<CONFIG>> {
|
||||
hash ^= std::hash<uint32_t>()(key.limbs_storage.limbs[i]) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
template <class CONFIG>
|
||||
struct SharedMemory<Field<CONFIG>> {
|
||||
__device__ Field<CONFIG>* getPointer()
|
||||
{
|
||||
extern __shared__ Field<CONFIG> s_scalar_[];
|
||||
return s_scalar_;
|
||||
}
|
||||
};
|
||||
51
icicle/include/fields/field_config.cuh
Normal file
51
icicle/include/fields/field_config.cuh
Normal file
@@ -0,0 +1,51 @@
|
||||
#pragma once
|
||||
#ifndef FIELD_CONFIG_H
|
||||
#define FIELD_CONFIG_H
|
||||
|
||||
#include "fields/id.h"
|
||||
#include "fields/field.cuh"
|
||||
|
||||
#if FIELD_ID == BN254
|
||||
#include "fields/snark_fields/bn254_scalar.cuh"
|
||||
using bn254::fp_config;
|
||||
#elif FIELD_ID == BLS12_381
|
||||
#include "fields/snark_fields/bls12_381_scalar.cuh"
|
||||
using bls12_381::fp_config;
|
||||
#elif FIELD_ID == BLS12_377
|
||||
#include "fields/snark_fields/bls12_377_scalar.cuh"
|
||||
using bls12_377::fp_config;
|
||||
#elif FIELD_ID == BW6_761
|
||||
#include "fields/snark_fields/bls12_377_base.cuh"
|
||||
typedef bls12_377::fq_config fp_config;
|
||||
#elif FIELD_ID == GRUMPKIN
|
||||
#include "fields/snark_fields/bn254_base.cuh"
|
||||
typedef bn254::fq_config fp_config;
|
||||
|
||||
#elif FIELD_ID == BABY_BEAR
|
||||
#include "fields/stark_fields/baby_bear.cuh"
|
||||
using baby_bear::fp_config;
|
||||
#ifdef EXT_FIELD
|
||||
#include "fields/quartic_extension.cuh"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/**
|
||||
* @namespace field_config
|
||||
* Namespace with type definitions for finite fields. Here, concrete types are created in accordance
|
||||
* with the `-DFIELD` env variable passed during build.
|
||||
*/
|
||||
namespace field_config {
|
||||
/**
|
||||
* Scalar field. Is always a prime field.
|
||||
*/
|
||||
typedef Field<fp_config> scalar_t;
|
||||
|
||||
#ifdef EXT_FIELD
|
||||
/**
|
||||
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
||||
*/
|
||||
typedef ExtensionField<fp_config> extension_t;
|
||||
#endif
|
||||
} // namespace field_config
|
||||
|
||||
#endif
|
||||
@@ -4,7 +4,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
#include "common.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
namespace host_math {
|
||||
|
||||
// return x + y with uint32_t operands
|
||||
13
icicle/include/fields/id.h
Normal file
13
icicle/include/fields/id.h
Normal file
@@ -0,0 +1,13 @@
|
||||
#pragma once
|
||||
#ifndef FIELD_ID_H
|
||||
#define FIELD_ID_H
|
||||
|
||||
#define BN254 1
|
||||
#define BLS12_381 2
|
||||
#define BLS12_377 3
|
||||
#define BW6_761 4
|
||||
#define GRUMPKIN 5
|
||||
|
||||
#define BABY_BEAR 1001
|
||||
|
||||
#endif
|
||||
@@ -1,12 +1,15 @@
|
||||
#pragma once
|
||||
|
||||
#include "field.cuh"
|
||||
#include "common.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
|
||||
template <typename CONFIG>
|
||||
class ExtensionField
|
||||
{
|
||||
private:
|
||||
friend Field<CONFIG>;
|
||||
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
@@ -47,6 +50,12 @@ public:
|
||||
|
||||
static HOST_INLINE ExtensionField rand_host() { return ExtensionField{FF::rand_host(), FF::rand_host()}; }
|
||||
|
||||
static void RandHostMany(ExtensionField* out, int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
out[i] = rand_host();
|
||||
}
|
||||
|
||||
template <unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sub_modulus(const ExtensionField& xs)
|
||||
{
|
||||
@@ -69,15 +78,47 @@ public:
|
||||
return ExtensionField{xs.real - ys.real, xs.imaginary - ys.imaginary};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(FF xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs + ys.real, ys.imaginary};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(FF xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs - ys.real, FF::neg(ys.imaginary)};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const FF& ys)
|
||||
{
|
||||
return ExtensionField{xs.real + ys, xs.imaginary};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const FF& ys)
|
||||
{
|
||||
return ExtensionField{xs.real - ys, xs.imaginary};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys)
|
||||
{
|
||||
FWide real_prod = FF::mul_wide(xs.real, ys.real);
|
||||
FWide imaginary_prod = FF::mul_wide(xs.imaginary, ys.imaginary);
|
||||
FWide prod_of_sums = FF::mul_wide(xs.real + xs.imaginary, ys.real + ys.imaginary);
|
||||
FWide i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FWide::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionWide{real_prod + i_sq_times_im, prod_of_sums - real_prod - imaginary_prod};
|
||||
FWide nonresidue_times_im = FF::template mul_unsigned<CONFIG::nonresidue>(imaginary_prod);
|
||||
nonresidue_times_im = CONFIG::nonresidue_is_negative ? FWide::neg(nonresidue_times_im) : nonresidue_times_im;
|
||||
return ExtensionWide{real_prod + nonresidue_times_im, prod_of_sums - real_prod - imaginary_prod};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const FF& ys)
|
||||
{
|
||||
return ExtensionWide{FF::mul_wide(xs.real, ys), FF::mul_wide(xs.imaginary, ys)};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const FF& xs, const ExtensionField& ys)
|
||||
{
|
||||
return mul_wide(ys, xs);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
@@ -87,7 +128,8 @@ public:
|
||||
FF::template reduce<MODULUS_MULTIPLE>(xs.real), FF::template reduce<MODULUS_MULTIPLE>(xs.imaginary)};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator*(const ExtensionField& xs, const ExtensionField& ys)
|
||||
template <class T1, class T2>
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator*(const T1& xs, const T2& ys)
|
||||
{
|
||||
ExtensionWide xy = mul_wide(xs, ys);
|
||||
return reduce(xy);
|
||||
@@ -111,9 +153,9 @@ public:
|
||||
FF imaginary_prod = FF::template mul_const<mul_imaginary>(xs_imaginary);
|
||||
FF re_im = FF::template mul_const<mul_real>(xs_imaginary);
|
||||
FF im_re = FF::template mul_const<mul_imaginary>(xs_real);
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(imaginary_prod);
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
return ExtensionField{real_prod + i_sq_times_im, re_im + im_re};
|
||||
FF nonresidue_times_im = FF::template mul_unsigned<CONFIG::nonresidue>(imaginary_prod);
|
||||
nonresidue_times_im = CONFIG::nonresidue_is_negative ? FF::neg(nonresidue_times_im) : nonresidue_times_im;
|
||||
return ExtensionField{real_prod + nonresidue_times_im, re_im + im_re};
|
||||
}
|
||||
|
||||
template <uint32_t multiplier, unsigned REDUCTION_SIZE = 1>
|
||||
@@ -142,14 +184,23 @@ public:
|
||||
return ExtensionField{FF::neg(xs.real), FF::neg(xs.imaginary)};
|
||||
}
|
||||
|
||||
// inverse assumes that xs is nonzero
|
||||
// inverse of zero is set to be zero which is what we want most of the time
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs)
|
||||
{
|
||||
ExtensionField xs_conjugate = {xs.real, FF::neg(xs.imaginary)};
|
||||
FF i_sq_times_im = FF::template mul_unsigned<CONFIG::i_squared>(FF::sqr(xs.imaginary));
|
||||
i_sq_times_im = CONFIG::i_squared_is_negative ? FF::neg(i_sq_times_im) : i_sq_times_im;
|
||||
FF nonresidue_times_im = FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr(xs.imaginary));
|
||||
nonresidue_times_im = CONFIG::nonresidue_is_negative ? FF::neg(nonresidue_times_im) : nonresidue_times_im;
|
||||
// TODO: wide here
|
||||
FF xs_norm_squared = FF::sqr(xs.real) - i_sq_times_im;
|
||||
FF xs_norm_squared = FF::sqr(xs.real) - nonresidue_times_im;
|
||||
return xs_conjugate * ExtensionField{FF::inverse(xs_norm_squared), FF::zero()};
|
||||
}
|
||||
};
|
||||
|
||||
template <class CONFIG>
|
||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
||||
__device__ ExtensionField<CONFIG>* getPointer()
|
||||
{
|
||||
extern __shared__ ExtensionField<CONFIG> s_ext2_scalar_[];
|
||||
return s_ext2_scalar_;
|
||||
}
|
||||
};
|
||||
257
icicle/include/fields/quartic_extension.cuh
Normal file
257
icicle/include/fields/quartic_extension.cuh
Normal file
@@ -0,0 +1,257 @@
|
||||
#pragma once
|
||||
|
||||
#include "field.cuh"
|
||||
#include "gpu-utils/modifiers.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
|
||||
template <typename CONFIG>
|
||||
class ExtensionField
|
||||
{
|
||||
private:
|
||||
typedef typename Field<CONFIG>::Wide FWide;
|
||||
|
||||
struct ExtensionWide {
|
||||
FWide real;
|
||||
FWide im1;
|
||||
FWide im2;
|
||||
FWide im3;
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator+(ExtensionWide xs, const ExtensionWide& ys)
|
||||
{
|
||||
return ExtensionWide{xs.real + ys.real, xs.im1 + ys.im1, xs.im2 + ys.im2, xs.im3 + ys.im3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionWide operator-(ExtensionWide xs, const ExtensionWide& ys)
|
||||
{
|
||||
return ExtensionWide{xs.real - ys.real, xs.im1 - ys.im1, xs.im2 - ys.im2, xs.im3 - ys.im3};
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
typedef Field<CONFIG> FF;
|
||||
static constexpr unsigned TLC = 4 * CONFIG::limbs_count;
|
||||
|
||||
FF real;
|
||||
FF im1;
|
||||
FF im2;
|
||||
FF im3;
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField zero()
|
||||
{
|
||||
return ExtensionField{FF::zero(), FF::zero(), FF::zero(), FF::zero()};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField one()
|
||||
{
|
||||
return ExtensionField{FF::one(), FF::zero(), FF::zero(), FF::zero()};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField ToMontgomery(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
xs.real * FF{CONFIG::montgomery_r}, xs.im1 * FF{CONFIG::montgomery_r}, xs.im2 * FF{CONFIG::montgomery_r},
|
||||
xs.im3 * FF{CONFIG::montgomery_r}};
|
||||
}
|
||||
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField FromMontgomery(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
xs.real * FF{CONFIG::montgomery_r_inv}, xs.im1 * FF{CONFIG::montgomery_r_inv},
|
||||
xs.im2 * FF{CONFIG::montgomery_r_inv}, xs.im3 * FF{CONFIG::montgomery_r_inv}};
|
||||
}
|
||||
|
||||
static HOST_INLINE ExtensionField rand_host()
|
||||
{
|
||||
return ExtensionField{FF::rand_host(), FF::rand_host(), FF::rand_host(), FF::rand_host()};
|
||||
}
|
||||
|
||||
static void RandHostMany(ExtensionField* out, int size)
|
||||
{
|
||||
for (int i = 0; i < size; i++)
|
||||
out[i] = rand_host();
|
||||
}
|
||||
|
||||
template <unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sub_modulus(const ExtensionField& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
FF::sub_modulus<REDUCTION_SIZE>(&xs.real), FF::sub_modulus<REDUCTION_SIZE>(&xs.im1),
|
||||
FF::sub_modulus<REDUCTION_SIZE>(&xs.im2), FF::sub_modulus<REDUCTION_SIZE>(&xs.im3)};
|
||||
}
|
||||
|
||||
friend std::ostream& operator<<(std::ostream& os, const ExtensionField& xs)
|
||||
{
|
||||
os << "{ Real: " << xs.real << " }; { Im1: " << xs.im1 << " }; { Im2: " << xs.im2 << " }; { Im3: " << xs.im3
|
||||
<< " };";
|
||||
return os;
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs.real + ys.real, xs.im1 + ys.im1, xs.im2 + ys.im2, xs.im3 + ys.im3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs.real - ys.real, xs.im1 - ys.im1, xs.im2 - ys.im2, xs.im3 - ys.im3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(FF xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs + ys.real, ys.im1, ys.im2, ys.im3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(FF xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionField{xs - ys.real, FF::neg(ys.im1), FF::neg(ys.im2), FF::neg(ys.im3)};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator+(ExtensionField xs, const FF& ys)
|
||||
{
|
||||
return ExtensionField{xs.real + ys, xs.im1, xs.im2, xs.im3};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator-(ExtensionField xs, const FF& ys)
|
||||
{
|
||||
return ExtensionField{xs.real - ys, xs.im1, xs.im2, xs.im3};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const ExtensionField& ys)
|
||||
{
|
||||
if (CONFIG::nonresidue_is_negative)
|
||||
return ExtensionWide{
|
||||
FF::mul_wide(xs.real, ys.real) -
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(
|
||||
FF::mul_wide(xs.im1, ys.im3) + FF::mul_wide(xs.im2, ys.im2) + FF::mul_wide(xs.im3, ys.im1)),
|
||||
FF::mul_wide(xs.real, ys.im1) + FF::mul_wide(xs.im1, ys.real) -
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, ys.im3) + FF::mul_wide(xs.im3, ys.im2)),
|
||||
FF::mul_wide(xs.real, ys.im2) + FF::mul_wide(xs.im1, ys.im1) + FF::mul_wide(xs.im2, ys.real) -
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, ys.im3)),
|
||||
FF::mul_wide(xs.real, ys.im3) + FF::mul_wide(xs.im1, ys.im2) + FF::mul_wide(xs.im2, ys.im1) +
|
||||
FF::mul_wide(xs.im3, ys.real)};
|
||||
else
|
||||
return ExtensionWide{
|
||||
FF::mul_wide(xs.real, ys.real) +
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(
|
||||
FF::mul_wide(xs.im1, ys.im3) + FF::mul_wide(xs.im2, ys.im2) + FF::mul_wide(xs.im3, ys.im1)),
|
||||
FF::mul_wide(xs.real, ys.im1) + FF::mul_wide(xs.im1, ys.real) +
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, ys.im3) + FF::mul_wide(xs.im3, ys.im2)),
|
||||
FF::mul_wide(xs.real, ys.im2) + FF::mul_wide(xs.im1, ys.im1) + FF::mul_wide(xs.im2, ys.real) +
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, ys.im3)),
|
||||
FF::mul_wide(xs.real, ys.im3) + FF::mul_wide(xs.im1, ys.im2) + FF::mul_wide(xs.im2, ys.im1) +
|
||||
FF::mul_wide(xs.im3, ys.real)};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const ExtensionField& xs, const FF& ys)
|
||||
{
|
||||
return ExtensionWide{
|
||||
FF::mul_wide(xs.real, ys), FF::mul_wide(xs.im1, ys), FF::mul_wide(xs.im2, ys), FF::mul_wide(xs.im3, ys)};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide mul_wide(const FF& xs, const ExtensionField& ys)
|
||||
{
|
||||
return ExtensionWide{
|
||||
FF::mul_wide(xs, ys.real), FF::mul_wide(xs, ys.im1), FF::mul_wide(xs, ys.im2), FF::mul_wide(xs, ys.im3)};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField reduce(const ExtensionWide& xs)
|
||||
{
|
||||
return ExtensionField{
|
||||
FF::template reduce<MODULUS_MULTIPLE>(xs.real), FF::template reduce<MODULUS_MULTIPLE>(xs.im1),
|
||||
FF::template reduce<MODULUS_MULTIPLE>(xs.im2), FF::template reduce<MODULUS_MULTIPLE>(xs.im3)};
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
friend HOST_DEVICE_INLINE ExtensionField operator*(const T1& xs, const T2& ys)
|
||||
{
|
||||
ExtensionWide xy = mul_wide(xs, ys);
|
||||
return reduce(xy);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const ExtensionField& xs, const ExtensionField& ys)
|
||||
{
|
||||
return (xs.real == ys.real) && (xs.im1 == ys.im1) && (xs.im2 == ys.im2) && (xs.im3 == ys.im3);
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator!=(const ExtensionField& xs, const ExtensionField& ys) { return !(xs == ys); }
|
||||
|
||||
template <uint32_t multiplier, unsigned REDUCTION_SIZE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField mul_unsigned(const ExtensionField& xs)
|
||||
{
|
||||
return {
|
||||
FF::template mul_unsigned<multiplier>(xs.real), FF::template mul_unsigned<multiplier>(xs.im1),
|
||||
FF::template mul_unsigned<multiplier>(xs.im2), FF::template mul_unsigned<multiplier>(xs.im3)};
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionWide sqr_wide(const ExtensionField& xs)
|
||||
{
|
||||
// TODO: change to a more efficient squaring
|
||||
return mul_wide<MODULUS_MULTIPLE>(xs, xs);
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField sqr(const ExtensionField& xs)
|
||||
{
|
||||
// TODO: change to a more efficient squaring
|
||||
return xs * xs;
|
||||
}
|
||||
|
||||
template <unsigned MODULUS_MULTIPLE = 1>
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField neg(const ExtensionField& xs)
|
||||
{
|
||||
return {FF::neg(xs.real), FF::neg(xs.im1), FF::neg(xs.im2), FF::neg(xs.im3)};
|
||||
}
|
||||
|
||||
// inverse of zero is set to be zero which is what we want most of the time
|
||||
static constexpr HOST_DEVICE_INLINE ExtensionField inverse(const ExtensionField& xs)
|
||||
{
|
||||
FF x, x0, x2;
|
||||
if (CONFIG::nonresidue_is_negative) {
|
||||
x0 = FF::reduce(
|
||||
FF::sqr_wide(xs.real) +
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im1, xs.im3 + xs.im3) - FF::sqr_wide(xs.im2)));
|
||||
x2 = FF::reduce(
|
||||
FF::mul_wide(xs.real, xs.im2 + xs.im2) - FF::sqr_wide(xs.im1) +
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(xs.im3)));
|
||||
x = FF::reduce(FF::sqr_wide(x0) + FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(x2)));
|
||||
} else {
|
||||
x0 = FF::reduce(
|
||||
FF::sqr_wide(xs.real) -
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im1, xs.im3 + xs.im3) - FF::sqr_wide(xs.im2)));
|
||||
x2 = FF::reduce(
|
||||
FF::mul_wide(xs.real, xs.im2 + xs.im2) - FF::sqr_wide(xs.im1) -
|
||||
FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(xs.im3)));
|
||||
x = FF::reduce(FF::sqr_wide(x0) - FF::template mul_unsigned<CONFIG::nonresidue>(FF::sqr_wide(x2)));
|
||||
}
|
||||
FF x_inv = FF::inverse(x);
|
||||
x0 = x0 * x_inv;
|
||||
x2 = x2 * x_inv;
|
||||
return {
|
||||
FF::reduce(
|
||||
(CONFIG::nonresidue_is_negative
|
||||
? (FF::mul_wide(xs.real, x0) + FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, x2)))
|
||||
: (FF::mul_wide(xs.real, x0)) - FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im2, x2)))),
|
||||
FF::reduce(
|
||||
(CONFIG::nonresidue_is_negative
|
||||
? FWide::neg(FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, x2)))
|
||||
: FF::template mul_unsigned<CONFIG::nonresidue>(FF::mul_wide(xs.im3, x2))) -
|
||||
FF::mul_wide(xs.im1, x0)),
|
||||
FF::reduce(FF::mul_wide(xs.im2, x0) - FF::mul_wide(xs.real, x2)),
|
||||
FF::reduce(FF::mul_wide(xs.im1, x2) - FF::mul_wide(xs.im3, x0)),
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
template <class CONFIG>
|
||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
||||
__device__ ExtensionField<CONFIG>* getPointer()
|
||||
{
|
||||
extern __shared__ ExtensionField<CONFIG> s_ext4_scalar_[];
|
||||
return s_ext4_scalar_;
|
||||
}
|
||||
};
|
||||
@@ -1,196 +1,10 @@
|
||||
#pragma once
|
||||
#ifndef BLS12_377_PARAMS_H
|
||||
#define BLS12_377_PARAMS_H
|
||||
#ifndef BLS12_377_BASE_PARAMS_H
|
||||
#define BLS12_377_BASE_PARAMS_H
|
||||
|
||||
#include "utils/storage.cuh"
|
||||
#include "fields/storage.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 47;
|
||||
static constexpr unsigned modulus_bit_count = 253;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
|
||||
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
|
||||
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xf5ee7fff, 0x2ffffffe, 0xa6558901,
|
||||
0xa3c84ffe, 0x9f4bb2e1, 0x65d35aa9, 0xed549aa1};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3,
|
||||
0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7,
|
||||
0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f,
|
||||
0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b,
|
||||
0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f,
|
||||
0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95,
|
||||
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000},
|
||||
{0xfbfa0a01, 0x0f830f7e, 0xd75769a0, 0x20f8b46c, 0xf05d5033, 0x7108bd18, 0x0788de01, 0x07405e08},
|
||||
{0x60b9bdae, 0xc78085a6, 0x789094f5, 0x3116ec22, 0xce87d660, 0x0a02a81d, 0xc2a94856, 0x0ead8236},
|
||||
{0x3e83a7cc, 0x6ffc39d9, 0x958a0a74, 0x117d996e, 0x0b92e8c9, 0xc242289d, 0x29d977d6, 0x0484efb4},
|
||||
{0x0111ec3f, 0x15455b00, 0xc5f6be6f, 0x6b62d7af, 0x337f2d07, 0xfcba0365, 0x43fccd26, 0x0f151842},
|
||||
{0xc31ec69b, 0x57951b2e, 0x2a37ce1f, 0x3e0a4be7, 0xcf3b198a, 0x960aeb4a, 0x341fd5cd, 0x04fb0673},
|
||||
{0xa921851f, 0x71c1b78e, 0x7808f239, 0x3c26340c, 0x976fb990, 0xbcc8f69b, 0xe880dc71, 0x06a5edb2},
|
||||
{0xc0f5679e, 0x7619eab5, 0x0dc0b9cd, 0x1f4cd10e, 0xbf6a480a, 0x7e1b70aa, 0x7f5461bb, 0x0ffc66da},
|
||||
{0xec5cbab2, 0x8159806d, 0x498264a3, 0x14ea1333, 0xe3abfaa6, 0x56bbe1d8, 0x02aa031f, 0x09d2b5c4},
|
||||
{0xc010c48a, 0xd2aa9562, 0x3b004b60, 0x447e5c11, 0x11e243bb, 0xd5a21c13, 0x0ab418b1, 0x01eab23e},
|
||||
{0xacff6986, 0x08715ee8, 0xa93924d0, 0xab01878a, 0x6e9ae5c4, 0xbfbc5e71, 0x26b08d6e, 0x0f8000bf},
|
||||
{0x3ddbc679, 0x06bc13b0, 0x615256ce, 0x7269a1f1, 0x1f5221a2, 0xf7716fbf, 0x8c66c14f, 0x0fa1f02c},
|
||||
{0x906f531f, 0xdd40f131, 0x30728eff, 0xb06b29c7, 0x88839294, 0xc891fd19, 0x646978e8, 0x04e88447},
|
||||
{0x6e259cdc, 0xb1e4b769, 0x00514e5e, 0xbcb0b709, 0x05113e7f, 0x74edb7c0, 0xe92e22af, 0x10c88511},
|
||||
{0x240ede5b, 0xebb2e898, 0x42cd84c6, 0xc2639185, 0x9408f956, 0xf79e8391, 0x94e87a7d, 0x06872fa1},
|
||||
{0x260678ff, 0xf8522249, 0xa8de9973, 0x6148cb16, 0x5a4e8d56, 0x5750f3f4, 0xbaeaf0c3, 0x0e805156},
|
||||
{0x3d766f80, 0x1b4b71cf, 0x1069012d, 0x47d21195, 0x9151ebec, 0x5635235f, 0x2b13c808, 0x093f7d91},
|
||||
{0x4637701d, 0x0848f958, 0x4c8353af, 0x8a750076, 0x0ef6174a, 0x485f4e4f, 0xf38db632, 0x078d97a1},
|
||||
{0x66a16869, 0x50c487c1, 0xd1fd4525, 0x380a66ab, 0x265e8539, 0xd455a01a, 0x064b5334, 0x0cd62875},
|
||||
{0x3358eb25, 0xdbc547bc, 0x722037db, 0x8909d398, 0x5e705b6d, 0x8b7075b5, 0x9bdaf407, 0x02694bb2},
|
||||
{0xf45b9621, 0x102fbfb0, 0xf04faac0, 0xe80f4241, 0x7ca61177, 0x0b830bfd, 0x7033169d, 0x10521892},
|
||||
{0xcc943028, 0xed2576ad, 0xfa4c6090, 0x846e49bc, 0x0049d8e6, 0xc74c1865, 0x665d7be5, 0x0e9c5a12},
|
||||
{0xafeb494b, 0x97319dcd, 0x1d78404c, 0xab30c83e, 0xf26ffe90, 0x452d8a48, 0xa36452c7, 0x0bfc2e92},
|
||||
{0xedc626c3, 0xf30e312d, 0xcf1f3a94, 0x8367a7ca, 0x917a1b28, 0x621e15e1, 0xf2e93b82, 0x07cd59f8},
|
||||
{0xf02ba42c, 0x553085d9, 0x1119b10d, 0x59662159, 0x6b8ea03f, 0xaa670958, 0x7ce92983, 0x066f6f5f},
|
||||
{0x4dd87a5e, 0xf423a283, 0xd9a4c364, 0x1fe46601, 0xbfdc7e9b, 0xda4addbf, 0x3bf94b2b, 0x0a7f2bd8},
|
||||
{0xe5f8848a, 0x270a2326, 0xa727567d, 0x97d14afa, 0x48746fc7, 0x1a3a5a4e, 0xa42f077a, 0x0044e4b1},
|
||||
{0x20b7298a, 0xd7652451, 0x65013b06, 0xc7c9a0b7, 0xad0d8457, 0x479b82a9, 0x0c99f5ce, 0x0bef1e5a},
|
||||
{0x1912f7fa, 0x77d7da1d, 0x299fd7d6, 0xbcb7a5b2, 0x142a4480, 0x705e45dd, 0xb492dbd8, 0x0dc835fd},
|
||||
{0xa0234d2d, 0xe943054c, 0xe5f5be5e, 0x673b0ee0, 0x5048a19a, 0xcdd48e41, 0xabc3cb99, 0x0997d277},
|
||||
{0xa9966ac4, 0x1ae0ea67, 0xda83fb3b, 0x4e2dbb1c, 0x0b51380e, 0xf77cf749, 0xb28a7670, 0x048b4b0e},
|
||||
{0xb14361d4, 0x7f1db43f, 0x25ab6d51, 0x7927e578, 0x383bf21e, 0xb43e52a5, 0xd27fa99f, 0x077595e9},
|
||||
{0xa90a2740, 0xfe3ca4f0, 0x512a7c7a, 0xd259ff36, 0xb41fe696, 0xbca3176a, 0xf33132ce, 0x05bd5ea3},
|
||||
{0xf284f768, 0xdeee484b, 0xe26a0475, 0x2a02e015, 0x88d968c2, 0xf0eb4925, 0x82a391c9, 0x0620ce9e},
|
||||
{0xbd83a3da, 0xd3b69b29, 0xe02ce197, 0x9543950f, 0xc2f87783, 0x80799665, 0xc15be215, 0x11ce8199},
|
||||
{0x1b29736e, 0x8f267f19, 0x1d5a0c3a, 0xa2e04d58, 0x1ae99514, 0x76803064, 0x57f7c806, 0x12129439},
|
||||
{0xf32d6bac, 0xa0b973d4, 0xf0d81b72, 0xae951889, 0x2e2daa0a, 0x51dbe098, 0x40d9af8f, 0x04679474},
|
||||
{0x22df9f13, 0x56313de8, 0x599e7536, 0xe2e75200, 0x6d163e50, 0xa1b4fce7, 0xc8111763, 0x0aec2172},
|
||||
{0x355dd694, 0x4258374d, 0x44c76a20, 0x5c31e8ac, 0xaa5fd062, 0x9b473969, 0x1a37b6b4, 0x0a693d77},
|
||||
{0x44ddbbdc, 0xbafb92a6, 0x26b01974, 0x63c7a02d, 0x5f28a274, 0x0ff86e13, 0x867f2e29, 0x0a7b462a},
|
||||
{0xd5fba57b, 0x90684fea, 0xe0defe98, 0xed237883, 0x030ae924, 0xc502b692, 0xe7a1ec2c, 0x08aa58e8},
|
||||
{0x5e9020dd, 0xade9d4b4, 0x87db8813, 0x489259d2, 0x25051238, 0x5ddce740, 0xb5bc4d11, 0x0c775db1},
|
||||
{0x293f8481, 0xd52cc17a, 0x6f133205, 0x041178fb, 0xb2961832, 0xbbc70d18, 0x481760cd, 0x073d34d1},
|
||||
{0xfdacff58, 0x8215b91d, 0x98331645, 0xd8d9177d, 0x439e803c, 0xe85223ad, 0xcca42c1f, 0x04aa8ef0},
|
||||
{0x01ab3a4d, 0x006f60fa, 0x814ba450, 0xe6600e15, 0xdf9eb147, 0xbde4df36, 0x33760d7b, 0x055d58fa},
|
||||
{0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3, 0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e},
|
||||
{0xd60fb046, 0xc9fa190c, 0xc5b4674e, 0xdb5c179b, 0xbc7b8726, 0x2b2bce0b, 0xbf6e69bf, 0x0e4eb338},
|
||||
{0x8ffc4ed5, 0x74732d1f, 0xb7f2eefc, 0x42d9f590, 0xa24dd4dd, 0xf70461e5, 0xef64676f, 0x03b6eba4},
|
||||
{0x102bbab0, 0x5a21f98a, 0x8d8e2efb, 0xa6a147a9, 0x7612906f, 0x0eb4f005, 0x47d8d2e3, 0x0e1a5481},
|
||||
{0xd01e5aa8, 0x6e509add, 0x6e3f123d, 0xe1582468, 0x8274db24, 0xbd6313ee, 0xd173a634, 0x05d5836e},
|
||||
{0xe975c0cf, 0x6aab3344, 0x6f1dc38e, 0xca362e0e, 0x1dd1743a, 0x2fe72cda, 0xc1b4c4c2, 0x0c1c956e},
|
||||
{0xec89a64f, 0x59fe97a0, 0xe8de5d4c, 0x579617d7, 0xc9c1ea7b, 0x256a305b, 0x53fa131b, 0x01ffae4e},
|
||||
{0x29bcb088, 0x463a73ff, 0xe1438e80, 0xee9e9a5e, 0x3c9369e4, 0x2a00951f, 0x80a32052, 0x09711183},
|
||||
{0x4bec8dd2, 0xa36899db, 0x96393687, 0x2946872e, 0x842df3c8, 0xd4b5734f, 0x5f5cd8fb, 0x0834098f},
|
||||
{0xe3c711b9, 0x4bc485f6, 0x648d1d7e, 0xf43a2598, 0xee88abaa, 0x7f981a0e, 0xec6a3f27, 0x0c88c9c3},
|
||||
{0x49046b52, 0x42bcc6c2, 0x56ab9ecc, 0xcc77294a, 0xe4df3ddd, 0x02ecb41a, 0x67f76726, 0x0e567d22},
|
||||
{0x91c64fc2, 0x1cc56cc3, 0xd16a490b, 0x8cb71e65, 0x14fac366, 0x984be37e, 0xa25d7ba5, 0x0a08e032},
|
||||
{0xd4f5941e, 0x966d9739, 0xe5772a73, 0x5805deb6, 0x5c1f970c, 0xe4eb0d33, 0xbdf35409, 0x039715db},
|
||||
{0xcc6518ac, 0x8419686c, 0x9c7a2366, 0x96dec3a8, 0x71724384, 0xefbfcac6, 0xaf34c239, 0x0c44b99a},
|
||||
{0xc18ff4fd, 0xcb66fe1b, 0x86c8d586, 0x588e18b3, 0x1dfab57c, 0xc6e6d2a3, 0x7d7d4efd, 0x10918ad2},
|
||||
{0x97a18f58, 0x56d6cf22, 0xd0d7abd9, 0x11710758, 0x5eb7a9c5, 0xd1a6608b, 0xc4937e38, 0x04059bdb},
|
||||
{0x4b1b63a9, 0x12998cbc, 0xcf420c9f, 0x0f780c6c, 0x129289ad, 0xa5e48723, 0x240a141d, 0x0a3a1223},
|
||||
{0x00db2b48, 0xa43c0e02, 0x933d10ee, 0x76585489, 0xc0ba6a80, 0x12d64af1, 0x2fad8d8e, 0x01940f43},
|
||||
{0x1d75bec9, 0xe29ef6c0, 0xd4b0183b, 0xead287a2, 0xedfd3795, 0x75a017cf, 0x64427c8e, 0x107f8d0f},
|
||||
{0xa26c8c12, 0xa6f4e1d1, 0xf6610f7e, 0x13571553, 0x56701caf, 0xd95e5df6, 0x2263d69d, 0x050e7b89},
|
||||
{0xc161761f, 0x271d7caf, 0xc369a371, 0xf1001d6f, 0x00e60f51, 0x65286415, 0xb74d14b8, 0x00b918f9},
|
||||
{0x03ad3139, 0x01d3f431, 0xa137ce16, 0xe56f6002, 0x1deb42e8, 0x97f53369, 0xaa37cddd, 0x033fa9ac},
|
||||
{0x60cf1330, 0x840f913b, 0x1df5ed87, 0x5610cde6, 0x72b36ddf, 0x858381b0, 0x6f64e0b7, 0x109bf66c},
|
||||
{0x930cee0b, 0x432d3626, 0xf26e8ba3, 0x55ed3efb, 0x14c5457f, 0x802eebcc, 0xe2310f22, 0x00d300e3},
|
||||
{0x4b9ac952, 0x3d29f5ba, 0xc8ea8f94, 0x7c7f2662, 0xcefc3052, 0x736ccb63, 0x0981f3cb, 0x04bfce2f},
|
||||
{0x5d4e643c, 0x3da791ea, 0x85bff013, 0xb6a956ef, 0xd73de6a3, 0x86c629a8, 0x6b8c48a9, 0x0a5a5f55},
|
||||
{0x49c6284a, 0x9ba6aa00, 0xeacbdc63, 0x0b8429fb, 0xedafdf37, 0x9b9c6c5b, 0xad0c78c6, 0x009907e8},
|
||||
{0x3e47b53f, 0x50380ce2, 0x3a9613fc, 0x6ea3c2d3, 0x4c87ab50, 0xfe743105, 0xd192221c, 0x07871979},
|
||||
{0xe978594b, 0x4ddd3320, 0x3abe3f79, 0xe5f36fbe, 0xe4dcff8e, 0x5dba9ef2, 0x7105148f, 0x0bfc27e2},
|
||||
{0x498fb549, 0xd5993cd5, 0x09da9272, 0x718adcee, 0x72bd5bc0, 0x9e03cbb4, 0xc592813f, 0x07206942},
|
||||
{0x78fd3239, 0xaf29730b, 0x40c3e723, 0xbd907ac9, 0x77f214f7, 0x5dcc0aad, 0xb05fb3a1, 0x02d958da},
|
||||
{0xdf80223d, 0x55f432c9, 0x11a2fed9, 0x23daf2f6, 0x41ae8c34, 0x9e43e003, 0x95f22373, 0x0d51533b},
|
||||
{0x7998b62c, 0xbb53132b, 0x22c9b4aa, 0x064a9186, 0x71d61334, 0xd56de253, 0x04e416f6, 0x10fcf25f},
|
||||
{0xdddb58ec, 0x41f8042f, 0x10886d85, 0x7dd54384, 0x622ff4b4, 0x19544f90, 0x050cc539, 0x02f0b49a},
|
||||
{0xa39b02a3, 0x8a3de898, 0xdc94422c, 0x068b2992, 0xf493db31, 0x1c5f019a, 0x11b0f668, 0x066b1790},
|
||||
{0x78500f1a, 0x98310dd7, 0x735ccb27, 0x1c6050bf, 0xb2081df4, 0x07b6fa7f, 0xfa0f1e20, 0x003edf24},
|
||||
{0x89b0ca6f, 0xb4d938e2, 0x2c897570, 0x0214eb59, 0x2d4cf27a, 0x56c45327, 0x3ed546a4, 0x10a2f358},
|
||||
{0xef01ed78, 0xf2828212, 0xf103c9ca, 0xa66094ac, 0x7a2d5573, 0xdceb481d, 0x8af46aab, 0x0190fcde},
|
||||
{0x526bf9fc, 0x023031cc, 0x79c209ba, 0x0e4136c0, 0x3ec42e5c, 0xe5234df1, 0x1d455234, 0x00cb9592},
|
||||
{0x33bf2a1c, 0x842b0c9c, 0xa29b9236, 0x1fd43c95, 0xc06795d3, 0x6b37a603, 0x0c1b712a, 0x00017b17},
|
||||
{0xaf858193, 0x2b955be2, 0x5fb5e378, 0xa513d8be, 0xa326aeb9, 0x88c4ebeb, 0xf3d45990, 0x00c378e2},
|
||||
{0x6464580f, 0x33e6c8c0, 0x3c4aa09f, 0x9d560eb3, 0xcc98f404, 0xb3f1a899, 0x8ca24b48, 0x012c1ea5},
|
||||
{0xe3b4dc56, 0xa0594a67, 0x91b698e1, 0xc8e6b582, 0x8df78057, 0x711cadbf, 0x396466f8, 0x0049abdf},
|
||||
{0x4ffa086a, 0xecc89610, 0xca06afc6, 0x4db82291, 0x8f3a6426, 0x9ae7c68c, 0x2a874432, 0x0b3dae8c},
|
||||
{0x3b3625b6, 0x1e62401f, 0x28471e5a, 0xd0692164, 0x5cad6b77, 0xb85aa9ec, 0xaa95acf2, 0x063e4b66},
|
||||
{0xb9112c51, 0x2542c2b2, 0x6e23b3ce, 0x36ead8da, 0x76476754, 0x9a268d13, 0xa1ad7cf1, 0x121f44ad}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af},
|
||||
{0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06},
|
||||
{0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2},
|
||||
{0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08},
|
||||
{0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33},
|
||||
{0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9},
|
||||
{0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93},
|
||||
{0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9},
|
||||
{0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab},
|
||||
{0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85},
|
||||
{0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1},
|
||||
{0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8},
|
||||
{0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003},
|
||||
{0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1},
|
||||
{0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007},
|
||||
{0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3},
|
||||
{0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08},
|
||||
{0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3},
|
||||
{0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309},
|
||||
{0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433},
|
||||
{0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9},
|
||||
{0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513},
|
||||
{0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539},
|
||||
{0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b},
|
||||
{0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555},
|
||||
{0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559},
|
||||
{0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c},
|
||||
{0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d},
|
||||
{0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e},
|
||||
{0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e},
|
||||
{0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e},
|
||||
{0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e},
|
||||
{0x7af74001, 0xa2117fff, 0x232ac481, 0x2b8e9efe, 0x2bdd8972, 0x139dfa73, 0x90d6f2a7, 0x12ab655e},
|
||||
{0xbd7ba001, 0x56117fff, 0x79956241, 0xc29c8afe, 0xc40a9cb9, 0xba2923c8, 0x9581cbfe, 0x12ab655e},
|
||||
{0xdebdd001, 0x30117fff, 0xa4cab121, 0x8e2380fe, 0x9021265d, 0x8d6eb873, 0x97d738aa, 0x12ab655e},
|
||||
{0xef5ee801, 0x1d117fff, 0xba655891, 0x73e6fbfe, 0xf62c6b2f, 0x771182c8, 0x9901ef00, 0x12ab655e},
|
||||
{0xf7af7401, 0x13917fff, 0xc532ac49, 0x66c8b97e, 0xa9320d98, 0x6be2e7f3, 0x99974a2b, 0x12ab655e},
|
||||
{0xfbd7ba01, 0x0ed17fff, 0xca995625, 0xe039983e, 0x02b4decc, 0xe64b9a89, 0x99e1f7c0, 0x12ab655e},
|
||||
{0xfdebdd01, 0x0c717fff, 0xcd4cab13, 0x1cf2079e, 0xaf764767, 0xa37ff3d3, 0x9a074e8b, 0x12ab655e},
|
||||
{0xfef5ee81, 0x0b417fff, 0xcea6558a, 0x3b4e3f4e, 0x05d6fbb4, 0x021a2079, 0x9a19f9f1, 0x12ab655e},
|
||||
{0xff7af741, 0x8aa97fff, 0xcf532ac5, 0xca7c5b26, 0xb10755da, 0xb16736cb, 0x9a234fa3, 0x12ab655e},
|
||||
{0xffbd7ba1, 0x4a5d7fff, 0xcfa99563, 0x12136912, 0x069f82ee, 0x090dc1f5, 0x9a27fa7d, 0x12ab655e},
|
||||
{0xffdebdd1, 0x2a377fff, 0xcfd4cab2, 0xb5def008, 0xb16b9977, 0xb4e10789, 0x9a2a4fe9, 0x12ab655e},
|
||||
{0xffef5ee9, 0x9a247fff, 0xcfea6559, 0x87c4b383, 0x06d1a4bc, 0x0acaaa54, 0x9a2b7aa0, 0x12ab655e},
|
||||
{0xfff7af75, 0x521affff, 0x4ff532ad, 0xf0b79541, 0x3184aa5e, 0x35bf7bb9, 0x9a2c0ffb, 0x12ab655e},
|
||||
{0xfffbd7bb, 0x2e163fff, 0x0ffa9957, 0x25310620, 0xc6de2d30, 0xcb39e46b, 0x9a2c5aa8, 0x12ab655e},
|
||||
{0xfffdebde, 0x1c13dfff, 0x6ffd4cac, 0xbf6dbe8f, 0x118aee98, 0x95f718c5, 0x9a2c7fff, 0x12ab655e}}};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned omegas_count = 48;
|
||||
@@ -521,41 +335,11 @@ namespace bls12_377 {
|
||||
{0xfffdebde, 0x0ff7ffff, 0x0fffa3d3, 0x8e4c751f, 0x6bcccc32, 0xb7275e5b, 0xdc08ab03, 0x0321276d, 0x28f6304f,
|
||||
0xdd22a6ac, 0x17c50a31, 0x01ae3a46}}};
|
||||
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 5;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 5;
|
||||
// true if nonresidue is negative
|
||||
static constexpr bool nonresidue_is_negative = true;
|
||||
};
|
||||
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xb21be9ef, 0xeab9b16e, 0xffcd394e, 0xd5481512,
|
||||
0xbd37cb5c, 0x188282c8, 0xaa9d41bb, 0x85951e2c,
|
||||
0xbf87ff54, 0xc8fc6225, 0xfe740a67, 0x008848de};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x559c8ea6, 0xfd82de55, 0x34a9591a, 0xc2fe3d36,
|
||||
0x4fb82305, 0x6d182ad4, 0xca3e52d9, 0xbd7fb348,
|
||||
0x30afeec4, 0x1f674f5d, 0xc5102eff, 0x01914a69};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0x7c005196, 0x74e3e48f, 0xbb535402, 0x71889f52,
|
||||
0x57db6b9b, 0x7ea501f5, 0x203e5031, 0xc565f071,
|
||||
0xa3841d01, 0xc89630a2, 0x71c785fe, 0x018480be};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x6ea16afe, 0xb26bfefa, 0xbff76fe6, 0x5cf89984,
|
||||
0x0799c9de, 0xe7223ece, 0x6651cecb, 0x532777ee,
|
||||
0xb1b140d5, 0x70dc5a51, 0xe7004031, 0x00ea6040};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x09fd4ddf, 0xf0940944, 0x6d8c7c2e, 0xf2cf8888,
|
||||
0xf832d204, 0xe458c282, 0x74b49a58, 0xde03ed72,
|
||||
0xcbb2efb4, 0xd960736b, 0x5d446f7b, 0x00690d66};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0x85eb8f93, 0xd9a1cdd1, 0x5e52270b, 0x4279b83f,
|
||||
0xcee304c2, 0x2463b01a, 0x3d591bf1, 0x61ef11ac,
|
||||
0x151a70aa, 0x9e549da3, 0xd2835518, 0x00f8169f};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x9999999a, 0x1c9ed999, 0x1ccccccd, 0x0dd39e5c, 0x3c6bf800, 0x129207b6,
|
||||
0xcd5fd889, 0xdc7b4f91, 0x7460c589, 0x43bd0373, 0xdb0fd6f3, 0x010222f6};
|
||||
} // namespace bls12_377
|
||||
|
||||
#endif
|
||||
#endif
|
||||
195
icicle/include/fields/snark_fields/bls12_377_scalar.cuh
Normal file
195
icicle/include/fields/snark_fields/bls12_377_scalar.cuh
Normal file
@@ -0,0 +1,195 @@
|
||||
#pragma once
|
||||
#ifndef BLS12_377_SCALAR_PARAMS_H
|
||||
#define BLS12_377_SCALAR_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
|
||||
namespace bls12_377 {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned omegas_count = 47;
|
||||
static constexpr unsigned modulus_bit_count = 253;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe,
|
||||
0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0x00000002, 0x14230000, 0xa0000002, 0xb354edfd,
|
||||
0xb86f6002, 0xc1689a3c, 0x34594aac, 0x2556cabd};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000004, 0x28460000, 0x40000004, 0x66a9dbfb,
|
||||
0x70dec005, 0x82d13479, 0x68b29559, 0x4aad957a};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0xffffffff, 0xf5ee7fff, 0x2ffffffe, 0xa6558901,
|
||||
0xa3c84ffe, 0x9f4bb2e1, 0x65d35aa9, 0xed549aa1};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x00000001, 0x14230000, 0xe0000002, 0xc7dd4d2f, 0x8585d003, 0x08ee1bd4, 0xe57fc56e, 0x7e7557e3,
|
||||
0x483a709d, 0x1fdebb41, 0x5678f4e6, 0x8ea77334, 0xc19c3ec5, 0xd717de29, 0xe2340781, 0x015c8d01};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x00000002, 0x28460000, 0xc0000004, 0x8fba9a5f, 0x0b0ba007, 0x11dc37a9, 0xcaff8adc, 0xfceaafc7,
|
||||
0x9074e13a, 0x3fbd7682, 0xacf1e9cc, 0x1d4ee668, 0x83387d8b, 0xae2fbc53, 0xc4680f03, 0x02b91a03};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x00000004, 0x508c0000, 0x80000008, 0x1f7534bf, 0x1617400f, 0x23b86f52, 0x95ff15b8, 0xf9d55f8f,
|
||||
0x20e9c275, 0x7f7aed05, 0x59e3d398, 0x3a9dccd1, 0x0670fb16, 0x5c5f78a7, 0x88d01e07, 0x05723407};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x151e79ea, 0xf5204c21, 0x8d69e258, 0xfd0a180b,
|
||||
0xfaa80548, 0xe4e51e49, 0xc40b2c9e, 0x36d9491e};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xfffffff3, 0x7d1c7fff, 0x6ffffff2, 0x7257f50f,
|
||||
0x512c0fee, 0x16d81575, 0x2bbb9a9d, 0x0d4bda32};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x1beeec02, 0x4122dd1a, 0x74fee875, 0xbd1eae95,
|
||||
0x27b28e2f, 0x838557e2, 0x2290c02c, 0x07b30191};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000001, 0x8f1a4000, 0xb0000001, 0xcf664765, 0x970dec00, 0x23ed1347, 0x00000000, 0x00000000},
|
||||
{0xfbfa0a01, 0x0f830f7e, 0xd75769a0, 0x20f8b46c, 0xf05d5033, 0x7108bd18, 0x0788de01, 0x07405e08},
|
||||
{0x60b9bdae, 0xc78085a6, 0x789094f5, 0x3116ec22, 0xce87d660, 0x0a02a81d, 0xc2a94856, 0x0ead8236},
|
||||
{0x3e83a7cc, 0x6ffc39d9, 0x958a0a74, 0x117d996e, 0x0b92e8c9, 0xc242289d, 0x29d977d6, 0x0484efb4},
|
||||
{0x0111ec3f, 0x15455b00, 0xc5f6be6f, 0x6b62d7af, 0x337f2d07, 0xfcba0365, 0x43fccd26, 0x0f151842},
|
||||
{0xc31ec69b, 0x57951b2e, 0x2a37ce1f, 0x3e0a4be7, 0xcf3b198a, 0x960aeb4a, 0x341fd5cd, 0x04fb0673},
|
||||
{0xa921851f, 0x71c1b78e, 0x7808f239, 0x3c26340c, 0x976fb990, 0xbcc8f69b, 0xe880dc71, 0x06a5edb2},
|
||||
{0xc0f5679e, 0x7619eab5, 0x0dc0b9cd, 0x1f4cd10e, 0xbf6a480a, 0x7e1b70aa, 0x7f5461bb, 0x0ffc66da},
|
||||
{0xec5cbab2, 0x8159806d, 0x498264a3, 0x14ea1333, 0xe3abfaa6, 0x56bbe1d8, 0x02aa031f, 0x09d2b5c4},
|
||||
{0xc010c48a, 0xd2aa9562, 0x3b004b60, 0x447e5c11, 0x11e243bb, 0xd5a21c13, 0x0ab418b1, 0x01eab23e},
|
||||
{0xacff6986, 0x08715ee8, 0xa93924d0, 0xab01878a, 0x6e9ae5c4, 0xbfbc5e71, 0x26b08d6e, 0x0f8000bf},
|
||||
{0x3ddbc679, 0x06bc13b0, 0x615256ce, 0x7269a1f1, 0x1f5221a2, 0xf7716fbf, 0x8c66c14f, 0x0fa1f02c},
|
||||
{0x906f531f, 0xdd40f131, 0x30728eff, 0xb06b29c7, 0x88839294, 0xc891fd19, 0x646978e8, 0x04e88447},
|
||||
{0x6e259cdc, 0xb1e4b769, 0x00514e5e, 0xbcb0b709, 0x05113e7f, 0x74edb7c0, 0xe92e22af, 0x10c88511},
|
||||
{0x240ede5b, 0xebb2e898, 0x42cd84c6, 0xc2639185, 0x9408f956, 0xf79e8391, 0x94e87a7d, 0x06872fa1},
|
||||
{0x260678ff, 0xf8522249, 0xa8de9973, 0x6148cb16, 0x5a4e8d56, 0x5750f3f4, 0xbaeaf0c3, 0x0e805156},
|
||||
{0x3d766f80, 0x1b4b71cf, 0x1069012d, 0x47d21195, 0x9151ebec, 0x5635235f, 0x2b13c808, 0x093f7d91},
|
||||
{0x4637701d, 0x0848f958, 0x4c8353af, 0x8a750076, 0x0ef6174a, 0x485f4e4f, 0xf38db632, 0x078d97a1},
|
||||
{0x66a16869, 0x50c487c1, 0xd1fd4525, 0x380a66ab, 0x265e8539, 0xd455a01a, 0x064b5334, 0x0cd62875},
|
||||
{0x3358eb25, 0xdbc547bc, 0x722037db, 0x8909d398, 0x5e705b6d, 0x8b7075b5, 0x9bdaf407, 0x02694bb2},
|
||||
{0xf45b9621, 0x102fbfb0, 0xf04faac0, 0xe80f4241, 0x7ca61177, 0x0b830bfd, 0x7033169d, 0x10521892},
|
||||
{0xcc943028, 0xed2576ad, 0xfa4c6090, 0x846e49bc, 0x0049d8e6, 0xc74c1865, 0x665d7be5, 0x0e9c5a12},
|
||||
{0xafeb494b, 0x97319dcd, 0x1d78404c, 0xab30c83e, 0xf26ffe90, 0x452d8a48, 0xa36452c7, 0x0bfc2e92},
|
||||
{0xedc626c3, 0xf30e312d, 0xcf1f3a94, 0x8367a7ca, 0x917a1b28, 0x621e15e1, 0xf2e93b82, 0x07cd59f8},
|
||||
{0xf02ba42c, 0x553085d9, 0x1119b10d, 0x59662159, 0x6b8ea03f, 0xaa670958, 0x7ce92983, 0x066f6f5f},
|
||||
{0x4dd87a5e, 0xf423a283, 0xd9a4c364, 0x1fe46601, 0xbfdc7e9b, 0xda4addbf, 0x3bf94b2b, 0x0a7f2bd8},
|
||||
{0xe5f8848a, 0x270a2326, 0xa727567d, 0x97d14afa, 0x48746fc7, 0x1a3a5a4e, 0xa42f077a, 0x0044e4b1},
|
||||
{0x20b7298a, 0xd7652451, 0x65013b06, 0xc7c9a0b7, 0xad0d8457, 0x479b82a9, 0x0c99f5ce, 0x0bef1e5a},
|
||||
{0x1912f7fa, 0x77d7da1d, 0x299fd7d6, 0xbcb7a5b2, 0x142a4480, 0x705e45dd, 0xb492dbd8, 0x0dc835fd},
|
||||
{0xa0234d2d, 0xe943054c, 0xe5f5be5e, 0x673b0ee0, 0x5048a19a, 0xcdd48e41, 0xabc3cb99, 0x0997d277},
|
||||
{0xa9966ac4, 0x1ae0ea67, 0xda83fb3b, 0x4e2dbb1c, 0x0b51380e, 0xf77cf749, 0xb28a7670, 0x048b4b0e},
|
||||
{0xb14361d4, 0x7f1db43f, 0x25ab6d51, 0x7927e578, 0x383bf21e, 0xb43e52a5, 0xd27fa99f, 0x077595e9},
|
||||
{0xa90a2740, 0xfe3ca4f0, 0x512a7c7a, 0xd259ff36, 0xb41fe696, 0xbca3176a, 0xf33132ce, 0x05bd5ea3},
|
||||
{0xf284f768, 0xdeee484b, 0xe26a0475, 0x2a02e015, 0x88d968c2, 0xf0eb4925, 0x82a391c9, 0x0620ce9e},
|
||||
{0xbd83a3da, 0xd3b69b29, 0xe02ce197, 0x9543950f, 0xc2f87783, 0x80799665, 0xc15be215, 0x11ce8199},
|
||||
{0x1b29736e, 0x8f267f19, 0x1d5a0c3a, 0xa2e04d58, 0x1ae99514, 0x76803064, 0x57f7c806, 0x12129439},
|
||||
{0xf32d6bac, 0xa0b973d4, 0xf0d81b72, 0xae951889, 0x2e2daa0a, 0x51dbe098, 0x40d9af8f, 0x04679474},
|
||||
{0x22df9f13, 0x56313de8, 0x599e7536, 0xe2e75200, 0x6d163e50, 0xa1b4fce7, 0xc8111763, 0x0aec2172},
|
||||
{0x355dd694, 0x4258374d, 0x44c76a20, 0x5c31e8ac, 0xaa5fd062, 0x9b473969, 0x1a37b6b4, 0x0a693d77},
|
||||
{0x44ddbbdc, 0xbafb92a6, 0x26b01974, 0x63c7a02d, 0x5f28a274, 0x0ff86e13, 0x867f2e29, 0x0a7b462a},
|
||||
{0xd5fba57b, 0x90684fea, 0xe0defe98, 0xed237883, 0x030ae924, 0xc502b692, 0xe7a1ec2c, 0x08aa58e8},
|
||||
{0x5e9020dd, 0xade9d4b4, 0x87db8813, 0x489259d2, 0x25051238, 0x5ddce740, 0xb5bc4d11, 0x0c775db1},
|
||||
{0x293f8481, 0xd52cc17a, 0x6f133205, 0x041178fb, 0xb2961832, 0xbbc70d18, 0x481760cd, 0x073d34d1},
|
||||
{0xfdacff58, 0x8215b91d, 0x98331645, 0xd8d9177d, 0x439e803c, 0xe85223ad, 0xcca42c1f, 0x04aa8ef0},
|
||||
{0x01ab3a4d, 0x006f60fa, 0x814ba450, 0xe6600e15, 0xdf9eb147, 0xbde4df36, 0x33760d7b, 0x055d58fa},
|
||||
{0xec2a895e, 0x476ef4a4, 0x63e3f04a, 0x9b506ee3, 0xd1a8a12f, 0x60c69477, 0x0cb92cc1, 0x11d4b7f6}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x00000000, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e},
|
||||
{0x00000000, 0x7af74000, 0x1fffffff, 0x8a442f99, 0xc529c400, 0x3cc739d6, 0x9a2ca556, 0x12ab655e},
|
||||
{0xd60fb046, 0xc9fa190c, 0xc5b4674e, 0xdb5c179b, 0xbc7b8726, 0x2b2bce0b, 0xbf6e69bf, 0x0e4eb338},
|
||||
{0x8ffc4ed5, 0x74732d1f, 0xb7f2eefc, 0x42d9f590, 0xa24dd4dd, 0xf70461e5, 0xef64676f, 0x03b6eba4},
|
||||
{0x102bbab0, 0x5a21f98a, 0x8d8e2efb, 0xa6a147a9, 0x7612906f, 0x0eb4f005, 0x47d8d2e3, 0x0e1a5481},
|
||||
{0xd01e5aa8, 0x6e509add, 0x6e3f123d, 0xe1582468, 0x8274db24, 0xbd6313ee, 0xd173a634, 0x05d5836e},
|
||||
{0xe975c0cf, 0x6aab3344, 0x6f1dc38e, 0xca362e0e, 0x1dd1743a, 0x2fe72cda, 0xc1b4c4c2, 0x0c1c956e},
|
||||
{0xec89a64f, 0x59fe97a0, 0xe8de5d4c, 0x579617d7, 0xc9c1ea7b, 0x256a305b, 0x53fa131b, 0x01ffae4e},
|
||||
{0x29bcb088, 0x463a73ff, 0xe1438e80, 0xee9e9a5e, 0x3c9369e4, 0x2a00951f, 0x80a32052, 0x09711183},
|
||||
{0x4bec8dd2, 0xa36899db, 0x96393687, 0x2946872e, 0x842df3c8, 0xd4b5734f, 0x5f5cd8fb, 0x0834098f},
|
||||
{0xe3c711b9, 0x4bc485f6, 0x648d1d7e, 0xf43a2598, 0xee88abaa, 0x7f981a0e, 0xec6a3f27, 0x0c88c9c3},
|
||||
{0x49046b52, 0x42bcc6c2, 0x56ab9ecc, 0xcc77294a, 0xe4df3ddd, 0x02ecb41a, 0x67f76726, 0x0e567d22},
|
||||
{0x91c64fc2, 0x1cc56cc3, 0xd16a490b, 0x8cb71e65, 0x14fac366, 0x984be37e, 0xa25d7ba5, 0x0a08e032},
|
||||
{0xd4f5941e, 0x966d9739, 0xe5772a73, 0x5805deb6, 0x5c1f970c, 0xe4eb0d33, 0xbdf35409, 0x039715db},
|
||||
{0xcc6518ac, 0x8419686c, 0x9c7a2366, 0x96dec3a8, 0x71724384, 0xefbfcac6, 0xaf34c239, 0x0c44b99a},
|
||||
{0xc18ff4fd, 0xcb66fe1b, 0x86c8d586, 0x588e18b3, 0x1dfab57c, 0xc6e6d2a3, 0x7d7d4efd, 0x10918ad2},
|
||||
{0x97a18f58, 0x56d6cf22, 0xd0d7abd9, 0x11710758, 0x5eb7a9c5, 0xd1a6608b, 0xc4937e38, 0x04059bdb},
|
||||
{0x4b1b63a9, 0x12998cbc, 0xcf420c9f, 0x0f780c6c, 0x129289ad, 0xa5e48723, 0x240a141d, 0x0a3a1223},
|
||||
{0x00db2b48, 0xa43c0e02, 0x933d10ee, 0x76585489, 0xc0ba6a80, 0x12d64af1, 0x2fad8d8e, 0x01940f43},
|
||||
{0x1d75bec9, 0xe29ef6c0, 0xd4b0183b, 0xead287a2, 0xedfd3795, 0x75a017cf, 0x64427c8e, 0x107f8d0f},
|
||||
{0xa26c8c12, 0xa6f4e1d1, 0xf6610f7e, 0x13571553, 0x56701caf, 0xd95e5df6, 0x2263d69d, 0x050e7b89},
|
||||
{0xc161761f, 0x271d7caf, 0xc369a371, 0xf1001d6f, 0x00e60f51, 0x65286415, 0xb74d14b8, 0x00b918f9},
|
||||
{0x03ad3139, 0x01d3f431, 0xa137ce16, 0xe56f6002, 0x1deb42e8, 0x97f53369, 0xaa37cddd, 0x033fa9ac},
|
||||
{0x60cf1330, 0x840f913b, 0x1df5ed87, 0x5610cde6, 0x72b36ddf, 0x858381b0, 0x6f64e0b7, 0x109bf66c},
|
||||
{0x930cee0b, 0x432d3626, 0xf26e8ba3, 0x55ed3efb, 0x14c5457f, 0x802eebcc, 0xe2310f22, 0x00d300e3},
|
||||
{0x4b9ac952, 0x3d29f5ba, 0xc8ea8f94, 0x7c7f2662, 0xcefc3052, 0x736ccb63, 0x0981f3cb, 0x04bfce2f},
|
||||
{0x5d4e643c, 0x3da791ea, 0x85bff013, 0xb6a956ef, 0xd73de6a3, 0x86c629a8, 0x6b8c48a9, 0x0a5a5f55},
|
||||
{0x49c6284a, 0x9ba6aa00, 0xeacbdc63, 0x0b8429fb, 0xedafdf37, 0x9b9c6c5b, 0xad0c78c6, 0x009907e8},
|
||||
{0x3e47b53f, 0x50380ce2, 0x3a9613fc, 0x6ea3c2d3, 0x4c87ab50, 0xfe743105, 0xd192221c, 0x07871979},
|
||||
{0xe978594b, 0x4ddd3320, 0x3abe3f79, 0xe5f36fbe, 0xe4dcff8e, 0x5dba9ef2, 0x7105148f, 0x0bfc27e2},
|
||||
{0x498fb549, 0xd5993cd5, 0x09da9272, 0x718adcee, 0x72bd5bc0, 0x9e03cbb4, 0xc592813f, 0x07206942},
|
||||
{0x78fd3239, 0xaf29730b, 0x40c3e723, 0xbd907ac9, 0x77f214f7, 0x5dcc0aad, 0xb05fb3a1, 0x02d958da},
|
||||
{0xdf80223d, 0x55f432c9, 0x11a2fed9, 0x23daf2f6, 0x41ae8c34, 0x9e43e003, 0x95f22373, 0x0d51533b},
|
||||
{0x7998b62c, 0xbb53132b, 0x22c9b4aa, 0x064a9186, 0x71d61334, 0xd56de253, 0x04e416f6, 0x10fcf25f},
|
||||
{0xdddb58ec, 0x41f8042f, 0x10886d85, 0x7dd54384, 0x622ff4b4, 0x19544f90, 0x050cc539, 0x02f0b49a},
|
||||
{0xa39b02a3, 0x8a3de898, 0xdc94422c, 0x068b2992, 0xf493db31, 0x1c5f019a, 0x11b0f668, 0x066b1790},
|
||||
{0x78500f1a, 0x98310dd7, 0x735ccb27, 0x1c6050bf, 0xb2081df4, 0x07b6fa7f, 0xfa0f1e20, 0x003edf24},
|
||||
{0x89b0ca6f, 0xb4d938e2, 0x2c897570, 0x0214eb59, 0x2d4cf27a, 0x56c45327, 0x3ed546a4, 0x10a2f358},
|
||||
{0xef01ed78, 0xf2828212, 0xf103c9ca, 0xa66094ac, 0x7a2d5573, 0xdceb481d, 0x8af46aab, 0x0190fcde},
|
||||
{0x526bf9fc, 0x023031cc, 0x79c209ba, 0x0e4136c0, 0x3ec42e5c, 0xe5234df1, 0x1d455234, 0x00cb9592},
|
||||
{0x33bf2a1c, 0x842b0c9c, 0xa29b9236, 0x1fd43c95, 0xc06795d3, 0x6b37a603, 0x0c1b712a, 0x00017b17},
|
||||
{0xaf858193, 0x2b955be2, 0x5fb5e378, 0xa513d8be, 0xa326aeb9, 0x88c4ebeb, 0xf3d45990, 0x00c378e2},
|
||||
{0x6464580f, 0x33e6c8c0, 0x3c4aa09f, 0x9d560eb3, 0xcc98f404, 0xb3f1a899, 0x8ca24b48, 0x012c1ea5},
|
||||
{0xe3b4dc56, 0xa0594a67, 0x91b698e1, 0xc8e6b582, 0x8df78057, 0x711cadbf, 0x396466f8, 0x0049abdf},
|
||||
{0x4ffa086a, 0xecc89610, 0xca06afc6, 0x4db82291, 0x8f3a6426, 0x9ae7c68c, 0x2a874432, 0x0b3dae8c},
|
||||
{0x3b3625b6, 0x1e62401f, 0x28471e5a, 0xd0692164, 0x5cad6b77, 0xb85aa9ec, 0xaa95acf2, 0x063e4b66},
|
||||
{0xb9112c51, 0x2542c2b2, 0x6e23b3ce, 0x36ead8da, 0x76476754, 0x9a268d13, 0xa1ad7cf1, 0x121f44ad}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x00000001, 0x8508c000, 0x68000000, 0xacd53b7f, 0x2e1bd800, 0x305a268f, 0x4d1652ab, 0x0955b2af},
|
||||
{0x00000001, 0xc78d2000, 0x1c000000, 0x033fd93f, 0xc529c401, 0xc88739d6, 0xf3a17c00, 0x0e008c06},
|
||||
{0x00000001, 0xe8cf5000, 0xf6000000, 0x2e75281e, 0x90b0ba01, 0x949dc37a, 0xc6e710ab, 0x1055f8b2},
|
||||
{0x00000001, 0xf9706800, 0xe3000000, 0x440fcf8e, 0x76743501, 0xfaa9084c, 0xb089db00, 0x1180af08},
|
||||
{0x00000001, 0x01c0f400, 0xd9800001, 0x4edd2346, 0x6955f281, 0xadaeaab5, 0xa55b402b, 0x12160a33},
|
||||
{0x00000001, 0x05e93a00, 0xd4c00001, 0x5443cd22, 0xe2c6d141, 0x07317be9, 0x1fc3f2c1, 0x1260b7c9},
|
||||
{0x00000001, 0x07fd5d00, 0xd2600001, 0x56f72210, 0x1f7f40a1, 0xb3f2e484, 0xdcf84c0b, 0x12860e93},
|
||||
{0x00000001, 0x09076e80, 0xd1300001, 0x5850cc87, 0x3ddb7851, 0x0a5398d1, 0x3b9278b1, 0x1298b9f9},
|
||||
{0x00000001, 0x098c7740, 0x50980001, 0x58fda1c3, 0xcd099429, 0xb583f2f7, 0xeadf8f03, 0x12a20fab},
|
||||
{0x00000001, 0x09cefba0, 0x104c0001, 0x59540c61, 0x14a0a215, 0x0b1c200b, 0x42861a2d, 0x12a6ba85},
|
||||
{0x00000001, 0x09f03dd0, 0xf0260001, 0x597f41af, 0xb86c290b, 0xb5e83694, 0xee595fc1, 0x12a90ff1},
|
||||
{0x00000001, 0x0a00dee8, 0x60130001, 0x5994dc57, 0x8a51ec86, 0x0b4e41d9, 0x4443028c, 0x12aa3aa8},
|
||||
{0x00000001, 0x0a092f74, 0x18098001, 0xd99fa9ab, 0xf344ce43, 0x3601477b, 0x6f37d3f1, 0x12aad003},
|
||||
{0x00000001, 0x0a0d57ba, 0xf404c001, 0x99a51054, 0x27be3f22, 0xcb5aca4d, 0x04b23ca3, 0x12ab1ab1},
|
||||
{0x00000001, 0x0a0f6bdd, 0xe2026001, 0xf9a7c3a9, 0xc1faf791, 0x16078bb5, 0xcf6f70fd, 0x12ab4007},
|
||||
{0x80000001, 0x0a1075ee, 0x59013001, 0xa9a91d54, 0x0f1953c9, 0xbb5dec6a, 0x34ce0b29, 0x12ab52b3},
|
||||
{0x40000001, 0x0a10faf7, 0x94809801, 0x81a9ca29, 0x35a881e5, 0x0e091cc4, 0xe77d5840, 0x12ab5c08},
|
||||
{0xa0000001, 0x0a113d7b, 0x32404c01, 0x6daa2094, 0x48f018f3, 0x375eb4f1, 0xc0d4fecb, 0x12ab60b3},
|
||||
{0xd0000001, 0x0a115ebd, 0x81202601, 0x63aa4bc9, 0xd293e47a, 0xcc098107, 0x2d80d210, 0x12ab6309},
|
||||
{0xe8000001, 0x0a116f5e, 0x28901301, 0xdeaa6164, 0x1765ca3d, 0x965ee713, 0xe3d6bbb3, 0x12ab6433},
|
||||
{0x74000001, 0x0a1177af, 0x7c480981, 0x9c2a6c31, 0xb9cebd1f, 0xfb899a18, 0x3f01b084, 0x12ab64c9},
|
||||
{0xba000001, 0x0a117bd7, 0x262404c1, 0x7aea7198, 0x8b033690, 0xae1ef39b, 0xec972aed, 0x12ab6513},
|
||||
{0xdd000001, 0x0a117deb, 0x7b120261, 0xea4a744b, 0xf39d7348, 0x0769a05c, 0x4361e822, 0x12ab6539},
|
||||
{0xee800001, 0x0a117ef5, 0x25890131, 0x21fa75a5, 0xa7ea91a5, 0x340ef6bd, 0xeec746bc, 0x12ab654b},
|
||||
{0xf7400001, 0x0a117f7a, 0xfac48099, 0x3dd27651, 0x021120d3, 0x4a61a1ee, 0x4479f609, 0x12ab6555},
|
||||
{0x7ba00001, 0x0a117fbd, 0x6562404d, 0x4bbe76a8, 0x2f24686a, 0xd58af786, 0xef534daf, 0x12ab6559},
|
||||
{0xbdd00001, 0x0a117fde, 0x9ab12027, 0xd2b476d3, 0x45ae0c35, 0x1b1fa252, 0x44bff983, 0x12ab655c},
|
||||
{0x5ee80001, 0x0a117fef, 0x35589014, 0x962f76e9, 0x50f2de1b, 0xbde9f7b8, 0x6f764f6c, 0x12ab655d},
|
||||
{0xaf740001, 0x8a117ff7, 0x02ac480a, 0x77ecf6f4, 0x5695470e, 0x8f4f226b, 0x04d17a61, 0x12ab655e},
|
||||
{0xd7ba0001, 0xca117ffb, 0x69562405, 0xe8cbb6f9, 0xd9667b87, 0xf801b7c4, 0x4f7f0fdb, 0x12ab655e},
|
||||
{0xebdd0001, 0x6a117ffd, 0x1cab1203, 0xa13b16fc, 0x9acf15c4, 0x2c5b0271, 0x74d5da99, 0x12ab655e},
|
||||
{0xf5ee8001, 0x3a117ffe, 0x76558902, 0xfd72c6fd, 0xfb8362e2, 0xc687a7c7, 0x87813ff7, 0x12ab655e},
|
||||
{0x7af74001, 0xa2117fff, 0x232ac481, 0x2b8e9efe, 0x2bdd8972, 0x139dfa73, 0x90d6f2a7, 0x12ab655e},
|
||||
{0xbd7ba001, 0x56117fff, 0x79956241, 0xc29c8afe, 0xc40a9cb9, 0xba2923c8, 0x9581cbfe, 0x12ab655e},
|
||||
{0xdebdd001, 0x30117fff, 0xa4cab121, 0x8e2380fe, 0x9021265d, 0x8d6eb873, 0x97d738aa, 0x12ab655e},
|
||||
{0xef5ee801, 0x1d117fff, 0xba655891, 0x73e6fbfe, 0xf62c6b2f, 0x771182c8, 0x9901ef00, 0x12ab655e},
|
||||
{0xf7af7401, 0x13917fff, 0xc532ac49, 0x66c8b97e, 0xa9320d98, 0x6be2e7f3, 0x99974a2b, 0x12ab655e},
|
||||
{0xfbd7ba01, 0x0ed17fff, 0xca995625, 0xe039983e, 0x02b4decc, 0xe64b9a89, 0x99e1f7c0, 0x12ab655e},
|
||||
{0xfdebdd01, 0x0c717fff, 0xcd4cab13, 0x1cf2079e, 0xaf764767, 0xa37ff3d3, 0x9a074e8b, 0x12ab655e},
|
||||
{0xfef5ee81, 0x0b417fff, 0xcea6558a, 0x3b4e3f4e, 0x05d6fbb4, 0x021a2079, 0x9a19f9f1, 0x12ab655e},
|
||||
{0xff7af741, 0x8aa97fff, 0xcf532ac5, 0xca7c5b26, 0xb10755da, 0xb16736cb, 0x9a234fa3, 0x12ab655e},
|
||||
{0xffbd7ba1, 0x4a5d7fff, 0xcfa99563, 0x12136912, 0x069f82ee, 0x090dc1f5, 0x9a27fa7d, 0x12ab655e},
|
||||
{0xffdebdd1, 0x2a377fff, 0xcfd4cab2, 0xb5def008, 0xb16b9977, 0xb4e10789, 0x9a2a4fe9, 0x12ab655e},
|
||||
{0xffef5ee9, 0x9a247fff, 0xcfea6559, 0x87c4b383, 0x06d1a4bc, 0x0acaaa54, 0x9a2b7aa0, 0x12ab655e},
|
||||
{0xfff7af75, 0x521affff, 0x4ff532ad, 0xf0b79541, 0x3184aa5e, 0x35bf7bb9, 0x9a2c0ffb, 0x12ab655e},
|
||||
{0xfffbd7bb, 0x2e163fff, 0x0ffa9957, 0x25310620, 0xc6de2d30, 0xcb39e46b, 0x9a2c5aa8, 0x12ab655e},
|
||||
{0xfffdebde, 0x1c13dfff, 0x6ffd4cac, 0xbf6dbe8f, 0x118aee98, 0x95f718c5, 0x9a2c7fff, 0x12ab655e}}};
|
||||
};
|
||||
} // namespace bls12_377
|
||||
|
||||
#endif
|
||||
61
icicle/include/fields/snark_fields/bls12_381_base.cuh
Normal file
61
icicle/include/fields/snark_fields/bls12_381_base.cuh
Normal file
@@ -0,0 +1,61 @@
|
||||
#pragma once
|
||||
#ifndef BLS12_381_BASE_PARAMS_H
|
||||
#define BLS12_381_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned modulus_bit_count = 381;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
|
||||
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd,
|
||||
0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709,
|
||||
0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
|
||||
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
|
||||
0x0d2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x00005555, 0x46010000, 0x4eac0000, 0xe1540001,
|
||||
0x094f09db, 0x98cf2d5f, 0x0c7aed40, 0x9b88b47b,
|
||||
0xbcb45328, 0xb4e45849, 0xc6801965, 0xe5feee15};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7,
|
||||
0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x0002fffd, 0x76090000, 0xc40c0002, 0xebf4000b,
|
||||
0x53c758ba, 0x5f489857, 0x70525745, 0x77ce5853,
|
||||
0xa256ec6d, 0x5c071a97, 0xfa80e493, 0x15f65ec3};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x380b4820, 0xf4d38259, 0xd898fafb, 0x7fe11274,
|
||||
0x14956dc8, 0x343ea979, 0x58a88de9, 0x1797ab14,
|
||||
0x3c4f538b, 0xed5e6427, 0xe8fb0ce9, 0x14fec701};
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
static constexpr bool nonresidue_is_negative = true;
|
||||
};
|
||||
} // namespace bls12_381
|
||||
|
||||
#endif
|
||||
@@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
#ifndef BLS12_381_PARAMS_H
|
||||
#define BLS12_381_PARAMS_H
|
||||
#ifndef BLS12_381_SCALAR_PARAMS_H
|
||||
#define BLS12_381_SCALAR_PARAMS_H
|
||||
|
||||
#include "utils/storage.cuh"
|
||||
#include "fields/storage.cuh"
|
||||
|
||||
namespace bls12_381 {
|
||||
struct fp_config {
|
||||
@@ -145,88 +145,6 @@ namespace bls12_381 {
|
||||
{0x00000003, 0x00034801, 0x588313f9, 0x4079f3f8, 0xa32e27f5, 0xdffedd77, 0x41c22ea1, 0x73eda752},
|
||||
{0x00000002, 0x0001a400, 0xac40b7fc, 0x4a1bcbfd, 0xd667fffd, 0x099c5abf, 0xb5afd5f5, 0x73eda752}}};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 12;
|
||||
static constexpr unsigned modulus_bit_count = 381;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe,
|
||||
0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xffff5556, 0x73fdffff, 0x62a7ffff, 0x3d57fffd,
|
||||
0xed61ec48, 0xce61a541, 0xe70a257e, 0xc8ee9709,
|
||||
0x869759ae, 0x96374f6c, 0x72ffcd34, 0x340223d4};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0xfffeaaac, 0xe7fbffff, 0xc54ffffe, 0x7aaffffa,
|
||||
0xdac3d890, 0x9cc34a83, 0xce144afd, 0x91dd2e13,
|
||||
0x0d2eb35d, 0x2c6e9ed9, 0xe5ff9a69, 0x680447a8};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x00005555, 0x46010000, 0x4eac0000, 0xe1540001,
|
||||
0x094f09db, 0x98cf2d5f, 0x0c7aed40, 0x9b88b47b,
|
||||
0xbcb45328, 0xb4e45849, 0xc6801965, 0xe5feee15};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xffffaaab, 0xb9feffff, 0xb153ffff, 0x1eabfffe, 0xf6b0f624, 0x6730d2a0, 0xf38512bf, 0x64774b84,
|
||||
0x434bacd7, 0x4b1ba7b6, 0x397fe69a, 0x1a0111ea, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x1c718e39, 0x26aa0000, 0x76382eab, 0x7ced6b1d, 0x62113cfd, 0x162c3383, 0x3e71b743, 0x66bf91ed,
|
||||
0x7091a049, 0x292e85a8, 0x86185c7b, 0x1d68619c, 0x0978ef01, 0xf5314933, 0x16ddca6e, 0x50a62cfd,
|
||||
0x349e8bd0, 0x66e59e49, 0x0e7046b4, 0xe2dc90e5, 0xa22f25e9, 0x4bd278ea, 0xb8c35fc7, 0x02a437a4};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x38e31c72, 0x4d540000, 0xec705d56, 0xf9dad63a, 0xc42279fa, 0x2c586706, 0x7ce36e86, 0xcd7f23da,
|
||||
0xe1234092, 0x525d0b50, 0x0c30b8f6, 0x3ad0c339, 0x12f1de02, 0xea629266, 0x2dbb94dd, 0xa14c59fa,
|
||||
0x693d17a0, 0xcdcb3c92, 0x1ce08d68, 0xc5b921ca, 0x445e4bd3, 0x97a4f1d5, 0x7186bf8e, 0x05486f49};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x71c638e4, 0x9aa80000, 0xd8e0baac, 0xf3b5ac75, 0x8844f3f5, 0x58b0ce0d, 0xf9c6dd0c, 0x9afe47b4,
|
||||
0xc2468125, 0xa4ba16a1, 0x186171ec, 0x75a18672, 0x25e3bc04, 0xd4c524cc, 0x5b7729bb, 0x4298b3f4,
|
||||
0xd27a2f41, 0x9b967924, 0x39c11ad1, 0x8b724394, 0x88bc97a7, 0x2f49e3aa, 0xe30d7f1d, 0x0a90de92};
|
||||
static constexpr storage<limbs_count> m = {0xd59646e8, 0xec4f881f, 0x8163c701, 0x4e65c59e, 0x80a19de7, 0x2f7d1dc7,
|
||||
0x7fda82a5, 0xa46e09d0, 0x331e9ae8, 0x38a0406c, 0xcf327917, 0x2760d74b};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0x0002fffd, 0x76090000, 0xc40c0002, 0xebf4000b,
|
||||
0x53c758ba, 0x5f489857, 0x70525745, 0x77ce5853,
|
||||
0xa256ec6d, 0x5c071a97, 0xfa80e493, 0x15f65ec3};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x380b4820, 0xf4d38259, 0xd898fafb, 0x7fe11274,
|
||||
0x14956dc8, 0x343ea979, 0x58a88de9, 0x1797ab14,
|
||||
0x3c4f538b, 0xed5e6427, 0xe8fb0ce9, 0x14fec701};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
};
|
||||
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0xdb22c6bb, 0xfb3af00a, 0xf97a1aef, 0x6c55e83f,
|
||||
0x171bac58, 0xa14e3a3f, 0x9774b905, 0xc3688c4f,
|
||||
0x4fa9ac0f, 0x2695638c, 0x3197d794, 0x17f1d3a7};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x46c5e7e1, 0x0caa2329, 0xa2888ae4, 0xd03cc744,
|
||||
0x2c04b3ed, 0x00db18cb, 0xd5d00af6, 0xfcf5e095,
|
||||
0x741d8ae4, 0xa09e30ed, 0xe3aaa0f1, 0x08b3f481};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xc121bdb8, 0xd48056c8, 0xa805bbef, 0x0bac0326,
|
||||
0x7ae3d177, 0xb4510b64, 0xfa403b02, 0xc6e47ad4,
|
||||
0x2dc51051, 0x26080527, 0xf08f0a91, 0x024aa2b2};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0x5d042b7e, 0xe5ac7d05, 0x13945d57, 0x334cf112,
|
||||
0xdc7f5049, 0xb5da61bb, 0x9920b61a, 0x596bd0d0,
|
||||
0x88274f65, 0x7dacd3a0, 0x52719f60, 0x13e02b60};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x08b82801, 0xe1935486, 0x3baca289, 0x923ac9cc,
|
||||
0x5160d12c, 0x6d429a69, 0x8cbdd3a7, 0xadfd9baa,
|
||||
0xda2e351a, 0x8cc9cdc6, 0x727d6e11, 0x0ce5d527};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xf05f79be, 0xaaa9075f, 0x5cec1da1, 0x3f370d27,
|
||||
0x572e99ab, 0x267492ab, 0x85a763af, 0xcb3e287e,
|
||||
0x2bc28b99, 0x32acd2b0, 0x2ea734cc, 0x0606c4a0};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000004, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
} // namespace bls12_381
|
||||
|
||||
#endif
|
||||
49
icicle/include/fields/snark_fields/bn254_base.cuh
Normal file
49
icicle/include/fields/snark_fields/bn254_base.cuh
Normal file
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
#ifndef BN254_BASE_PARAMS_H
|
||||
#define BN254_BASE_PARAMS_H
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522,
|
||||
0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45,
|
||||
0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x278302b9, 0xc3df73e9, 0x978e3572, 0x687e956e,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95,
|
||||
0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a,
|
||||
0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55,
|
||||
0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17,
|
||||
0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0x0a78eb28,
|
||||
0x7879462c, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x014afa37, 0xed84884a, 0x0278edf8, 0xeb202285,
|
||||
0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 1;
|
||||
// true if nonresidue is negative
|
||||
static constexpr bool nonresidue_is_negative = true;
|
||||
};
|
||||
} // namespace bn254
|
||||
|
||||
#endif
|
||||
@@ -1,8 +1,8 @@
|
||||
#pragma once
|
||||
#ifndef BN254_PARAMS_H
|
||||
#define BN254_PARAMS_H
|
||||
#ifndef BN254_SCALAR_PARAMS_H
|
||||
#define BN254_SCALAR_PARAMS_H
|
||||
|
||||
#include "utils/storage.cuh"
|
||||
#include "fields/storage.cuh"
|
||||
|
||||
namespace bn254 {
|
||||
struct fp_config {
|
||||
@@ -133,67 +133,6 @@ namespace bn254 {
|
||||
{0x73c14d83, 0x0cb3e36b, 0x733c6782, 0xf808dca3, 0x7778a18c, 0x921c407f, 0xd4a7d1cd, 0x30644e6c},
|
||||
{0xb1e0a6c2, 0xa84aec7f, 0xf67aec09, 0x101e6275, 0xfc7cfcf5, 0xa536431a, 0xdaecb8fb, 0x30644e6f}}};
|
||||
};
|
||||
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 8;
|
||||
static constexpr unsigned modulus_bit_count = 254;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
static constexpr storage<limbs_count> modulus = {0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91,
|
||||
0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xb0f9fa8e, 0x7841182d, 0xd0e3951a, 0x2f02d522,
|
||||
0x0302b0bb, 0x70a08b6d, 0xc2634053, 0x60c89ce5};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x61f3f51c, 0xf082305b, 0xa1c72a34, 0x5e05aa45,
|
||||
0x06056176, 0xe14116da, 0x84c680a6, 0xc19139cb};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x278302b9, 0xc3df73e9, 0x978e3572, 0x687e956e,
|
||||
0x7e7ea7a2, 0x47afba49, 0x1ece5fd6, 0xcf9bb18d};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {
|
||||
0xd87cfd47, 0x3c208c16, 0x6871ca8d, 0x97816a91, 0x8181585d, 0xb85045b6, 0xe131a029, 0x30644e72,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {
|
||||
0x275d69b1, 0x3b5458a2, 0x09eac101, 0xa602072d, 0x6d96cadc, 0x4a50189c, 0x7a1242c8, 0x04689e95,
|
||||
0x34c6b38d, 0x26edfa5c, 0x16375606, 0xb00b8551, 0x0348d21c, 0x599a6f7c, 0x763cbf9c, 0x0925c4b8};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {
|
||||
0x4ebad362, 0x76a8b144, 0x13d58202, 0x4c040e5a, 0xdb2d95b9, 0x94a03138, 0xf4248590, 0x08d13d2a,
|
||||
0x698d671a, 0x4ddbf4b8, 0x2c6eac0c, 0x60170aa2, 0x0691a439, 0xb334def8, 0xec797f38, 0x124b8970};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {
|
||||
0x9d75a6c4, 0xed516288, 0x27ab0404, 0x98081cb4, 0xb65b2b72, 0x29406271, 0xe8490b21, 0x11a27a55,
|
||||
0xd31ace34, 0x9bb7e970, 0x58dd5818, 0xc02e1544, 0x0d234872, 0x6669bdf0, 0xd8f2fe71, 0x249712e1};
|
||||
static constexpr storage<limbs_count> m = {0x19bf90e5, 0x6f3aed8a, 0x67cd4c08, 0xae965e17,
|
||||
0x68073013, 0xab074a58, 0x623a04a7, 0x54a47462};
|
||||
static constexpr storage<limbs_count> one = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xc58f0d9d, 0xd35d438d, 0xf5c70b3d, 0x0a78eb28,
|
||||
0x7879462c, 0x666ea36f, 0x9a07df2f, 0x0e0a77c1};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x014afa37, 0xed84884a, 0x0278edf8, 0xeb202285,
|
||||
0xb74492d9, 0xcf63e9cf, 0x59e5c639, 0x2e671571};
|
||||
// i^2, the square of the imaginary unit for the extension field
|
||||
static constexpr uint32_t i_squared = 1;
|
||||
// true if i^2 is negative
|
||||
static constexpr bool i_squared_is_negative = true;
|
||||
};
|
||||
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {0x00000001, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {0x00000002, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_re = {0xd992f6ed, 0x46debd5c, 0xf75edadd, 0x674322d4,
|
||||
0x5e5c4479, 0x426a0066, 0x121f1e76, 0x1800deef};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x_im = {0xaef312c2, 0x97e485b7, 0x35a9e712, 0xf1aa4933,
|
||||
0x31fb5d25, 0x7260bfb7, 0x920d483a, 0x198e9393};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_re = {0x66fa7daa, 0x4ce6cc01, 0x0c43d37b, 0xe3d1e769,
|
||||
0x8dcb408f, 0x4aab7180, 0xdb8c6deb, 0x12c85ea5};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y_im = {0xd122975b, 0x55acdadc, 0x70b38ef3, 0xbc4b3133,
|
||||
0x690c3395, 0xec9e99ad, 0x585ff075, 0x090689d0};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {0x00000003, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_re = {
|
||||
0x24a138e5, 0x3267e6dc, 0x59dbefa3, 0xb5b4c5e5, 0x1be06ac3, 0x81be1899, 0xceb8aaae, 0x2b149d40};
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b_g2_im = {
|
||||
0x85c315d2, 0xe4a2bd06, 0xe52d1852, 0xa74fa084, 0xeed8fdf4, 0xcd2cafad, 0x3af0fed4, 0x009713b0};
|
||||
} // namespace bn254
|
||||
|
||||
#endif
|
||||
@@ -1,13 +1,10 @@
|
||||
#pragma once
|
||||
#ifndef BW6_761_PARAMS_H
|
||||
#define BW6_761_PARAMS_H
|
||||
#ifndef BW6_761_BASE_BASE_H
|
||||
#define BW6_761_BASE_BASE_H
|
||||
|
||||
#include "utils/storage.cuh"
|
||||
#include "bls12_377_params.cuh"
|
||||
#include "fields/storage.cuh"
|
||||
|
||||
namespace bw6_761 {
|
||||
typedef bls12_377::fq_config fp_config;
|
||||
|
||||
struct fq_config {
|
||||
static constexpr unsigned limbs_count = 24;
|
||||
static constexpr unsigned modulus_bit_count = 761;
|
||||
@@ -77,33 +74,6 @@ namespace bw6_761 {
|
||||
0x7695ef18, 0x5e763565, 0x4fae56bb, 0x226022c2, 0xb70d7652, 0x80e7f067, 0x72116b89, 0x435a8b4a,
|
||||
0x5d84e0d4, 0xac258fd6, 0x4427c7b2, 0x47ee8ac5, 0xd04e621b, 0x478c4048, 0x2add3e93, 0x00e0aa7d};
|
||||
};
|
||||
|
||||
// G1 and G2 generators
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_x = {
|
||||
0x66e5b43d, 0x4088f3af, 0xa6af603f, 0x055928ac, 0x56133e82, 0x6750dd03, 0x280ca27f, 0x03758f9a,
|
||||
0xc9ea0971, 0x5bd71fa0, 0x47729b90, 0xa17a54ce, 0x94c2e746, 0x11dbfcd2, 0xc15520ac, 0x79017ffa,
|
||||
0x85f56fc7, 0xee05c54b, 0x551b27f0, 0xe6a0cfb7, 0xa477beae, 0xb277ce98, 0x0ea190c8, 0x01075b02};
|
||||
static constexpr storage<fq_config::limbs_count> g1_gen_y = {
|
||||
0xb4e95363, 0xbafc8f2d, 0x0b20d2a1, 0xad1cb2be, 0xcad0fb93, 0xb2b08119, 0xb3053253, 0x9f9df141,
|
||||
0x6fc2cdd4, 0xbe3fb90b, 0x717a4c55, 0xcc685d31, 0x71b5b806, 0xc5b8fa17, 0xaf7e0dba, 0x265909f1,
|
||||
0xa2e573a3, 0x1a7348d2, 0x884c9ec6, 0x0f952589, 0x45cc2a42, 0xe6fd637b, 0x0a6fc574, 0x0058b84e};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_x = {
|
||||
0xcd025f1c, 0xa830c194, 0xe1bf995b, 0x6410cf4f, 0xc2ad54b0, 0x00e96efb, 0x3cd208d7, 0xce6948cb,
|
||||
0x00e1b6ba, 0x963317a3, 0xac70e7c7, 0xc5bbcae9, 0xf09feb58, 0x734ec3f1, 0xab3da268, 0x26b41c5d,
|
||||
0x13890f6d, 0x4c062010, 0xc5a7115f, 0xd61053aa, 0x69d660f9, 0xc852a82e, 0x41d9b816, 0x01101332};
|
||||
static constexpr storage<fq_config::limbs_count> g2_gen_y = {
|
||||
0x28c73b61, 0xeb70a167, 0xf9eac689, 0x91ec0594, 0x3c5a02a5, 0x58aa2d3a, 0x504affc7, 0x3ea96fcd,
|
||||
0xffa82300, 0x8906c170, 0xd2c712b8, 0x64f293db, 0x33293fef, 0x94c97eb7, 0x0b95a59c, 0x0a1d86c8,
|
||||
0x53ffe316, 0x81a78e27, 0xcec2181c, 0x26b7cf9a, 0xe4b6d2dc, 0x8179eb10, 0x7761369f, 0x0017c335};
|
||||
|
||||
static constexpr storage<fq_config::limbs_count> weierstrass_b = {
|
||||
0x0000008a, 0xf49d0000, 0x70000082, 0xe6913e68, 0xeaf0a437, 0x160cf8ae, 0x5667a8f8, 0x98a116c2,
|
||||
0x73ebff2e, 0x71dcd3dc, 0x12f9fd90, 0x8689c8ed, 0x25b42304, 0x03cebaff, 0xe584e919, 0x707ba638,
|
||||
0x8087be41, 0x528275ef, 0x81d14688, 0xb926186a, 0x04faff3e, 0xd187c940, 0xfb83ce0a, 0x0122e824};
|
||||
static constexpr storage<fq_config::limbs_count> g2_weierstrass_b = {
|
||||
0x00000004, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||
} // namespace bw6_761
|
||||
|
||||
#endif
|
||||
#endif
|
||||
52
icicle/include/fields/stark_fields/baby_bear.cuh
Normal file
52
icicle/include/fields/stark_fields/baby_bear.cuh
Normal file
@@ -0,0 +1,52 @@
|
||||
#pragma once
|
||||
|
||||
#include "fields/storage.cuh"
|
||||
#include "fields/field.cuh"
|
||||
|
||||
namespace baby_bear {
|
||||
struct fp_config {
|
||||
static constexpr unsigned limbs_count = 1;
|
||||
static constexpr unsigned omegas_count = 28;
|
||||
static constexpr unsigned modulus_bit_count = 31;
|
||||
static constexpr unsigned num_of_reductions = 1;
|
||||
|
||||
static constexpr storage<limbs_count> modulus = {0x78000001};
|
||||
static constexpr storage<limbs_count> modulus_2 = {0xf0000002};
|
||||
static constexpr storage<limbs_count> modulus_4 = {0x00000000};
|
||||
static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
|
||||
static constexpr storage<2 * limbs_count> modulus_wide = {0x78000001, 0x00000000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared = {0xf0000001, 0x38400000};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_2 = {0xe0000002, 0x70800001};
|
||||
static constexpr storage<2 * limbs_count> modulus_squared_4 = {0xc0000004, 0xe1000003};
|
||||
|
||||
static constexpr storage<limbs_count> m = {0x88888887};
|
||||
static constexpr storage<limbs_count> one = {0x00000001};
|
||||
static constexpr storage<limbs_count> zero = {0x00000000};
|
||||
static constexpr storage<limbs_count> montgomery_r = {0xffffffe};
|
||||
static constexpr storage<limbs_count> montgomery_r_inv = {0x38400000};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega = {
|
||||
{{0x78000000}, {0x10faa3e0}, {0x6b615c47}, {0x21ceed5a}, {0x2c1c3348}, {0x36c54c86}, {0x701dd01c},
|
||||
{0x56a9a28e}, {0x03e4cabf}, {0x5bacde79}, {0x1eb53838}, {0x1cd781af}, {0x0961a0b7}, {0x65098a87},
|
||||
{0x77851a0b}, {0x5bcba331}, {0x053fc0f5}, {0x5bf816e5}, {0x4bb124ab}, {0x571e9d4e}, {0x313732cb},
|
||||
{0x28aca172}, {0x4e319b52}, {0x45692d95}, {0x14ff4ba1}, {0x00004951}, {0x00000089}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {
|
||||
{{0x78000000}, {0x67055c21}, {0x5ee99486}, {0x0bb4c4e4}, {0x4ab33b27}, {0x044b4497}, {0x410e23aa},
|
||||
{0x08a7ee2b}, {0x563cb93d}, {0x3d70b4b7}, {0x77d999f1}, {0x6ceb65b5}, {0x49e7f635}, {0x0eae3a8c},
|
||||
{0x238b8a78}, {0x70d71b0a}, {0x0eaacc45}, {0x5af0f193}, {0x47303308}, {0x573cbfad}, {0x29ff72c0},
|
||||
{0x05af9dac}, {0x00ef24df}, {0x26985530}, {0x22d1ce4b}, {0x08359375}, {0x2cabe994}}};
|
||||
|
||||
static constexpr storage_array<omegas_count, limbs_count> inv = {
|
||||
{{0x3c000001}, {0x5a000001}, {0x69000001}, {0x70800001}, {0x74400001}, {0x76200001}, {0x77100001},
|
||||
{0x77880001}, {0x77c40001}, {0x77e20001}, {0x77f10001}, {0x77f88001}, {0x77fc4001}, {0x77fe2001},
|
||||
{0x77ff1001}, {0x77ff8801}, {0x77ffc401}, {0x77ffe201}, {0x77fff101}, {0x77fff881}, {0x77fffc41},
|
||||
{0x77fffe21}, {0x77ffff11}, {0x77ffff89}, {0x77ffffc5}, {0x77ffffe3}, {0x77fffff2}}};
|
||||
|
||||
// nonresidue to generate the extension field
|
||||
static constexpr uint32_t nonresidue = 11;
|
||||
// true if nonresidue is negative.
|
||||
// TODO: we're very confused by plonky3 and risc0 having different nonresidues: 11 and -11 respectively
|
||||
static constexpr bool nonresidue_is_negative = true;
|
||||
};
|
||||
} // namespace baby_bear
|
||||
@@ -58,8 +58,6 @@
|
||||
#ifndef _SHAREDMEM_H_
|
||||
#define _SHAREDMEM_H_
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
|
||||
/** @brief Wrapper class for templatized dynamic shared memory arrays.
|
||||
*
|
||||
* This struct uses template specialization on the type \a T to declare
|
||||
@@ -77,8 +75,8 @@ struct SharedMemory {
|
||||
//! @returns Pointer to runtime-sized shared memory array
|
||||
__device__ T* getPointer()
|
||||
{
|
||||
extern __device__ void Error_UnsupportedType(); // Ensure that we won't compile any un-specialized types
|
||||
Error_UnsupportedType();
|
||||
// extern __device__ void Error_UnsupportedType(); // Ensure that we won't compile any un-specialized types
|
||||
// Error_UnsupportedType();
|
||||
return (T*)0;
|
||||
}
|
||||
// TODO: Use operator overloading to make this class look like a regular array
|
||||
@@ -214,24 +212,6 @@ struct SharedMemory<uchar4> {
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory<curve_config::scalar_t> {
|
||||
__device__ curve_config::scalar_t* getPointer()
|
||||
{
|
||||
extern __shared__ curve_config::scalar_t s_scalar_[];
|
||||
return s_scalar_;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct SharedMemory<curve_config::projective_t> {
|
||||
__device__ curve_config::projective_t* getPointer()
|
||||
{
|
||||
extern __shared__ curve_config::projective_t s_projective_[];
|
||||
return s_projective_;
|
||||
}
|
||||
};
|
||||
|
||||
#endif //_SHAREDMEM_H_
|
||||
|
||||
// Leave this at the end of the file
|
||||
@@ -3,8 +3,8 @@
|
||||
#define KECCAK_H
|
||||
|
||||
#include <cstdint>
|
||||
#include "utils/device_context.cuh"
|
||||
#include "utils/error_handler.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
namespace keccak {
|
||||
/**
|
||||
@@ -4,12 +4,11 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "primitives/affine.cuh"
|
||||
#include "primitives/field.cuh"
|
||||
#include "primitives/projective.cuh"
|
||||
#include "utils/device_context.cuh"
|
||||
#include "utils/error_handler.cuh"
|
||||
#include "curves/affine.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
/**
|
||||
* @namespace msm
|
||||
@@ -87,7 +86,7 @@ namespace msm {
|
||||
* @return Default value of [MSMConfig](@ref MSMConfig).
|
||||
*/
|
||||
template <typename A>
|
||||
MSMConfig DefaultMSMConfig();
|
||||
MSMConfig DefaultMSMConfig(const device_context::DeviceContext& ctx = device_context::get_default_device_context());
|
||||
|
||||
/**
|
||||
* A function that computes MSM: \f$ MSM(s_i, P_i) = \sum_{i=1}^N s_i \cdot P_i \f$.
|
||||
@@ -4,10 +4,9 @@
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "utils/device_context.cuh"
|
||||
#include "utils/error_handler.cuh"
|
||||
#include "utils/sharedmem.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
#include "utils/utils_kernels.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
@@ -56,6 +55,15 @@ namespace ntt {
|
||||
template <typename S>
|
||||
cudaError_t ReleaseDomain(device_context::DeviceContext& ctx);
|
||||
|
||||
/* Returns the basic root of unity Wn corresponding to the basic root used to initialize the domain.
|
||||
* Useful when computing NTT on cosets. In that case we must use the root W_2n that is between W_n and W_n+1.
|
||||
* @param logn log size of the required root.
|
||||
* @param ctx Details related to the device such as its id and stream id.
|
||||
* @return Wn root of unity corresponding to logn and the basic root used for initDomain(root)
|
||||
*/
|
||||
template <typename S>
|
||||
S GetRootOfUnity(uint64_t logn, device_context::DeviceContext& ctx);
|
||||
|
||||
/**
|
||||
* @enum NTTDir
|
||||
* Whether to perform normal forward NTT, or inverse NTT (iNTT). Mathematically, forward NTT computes polynomial
|
||||
@@ -130,7 +138,8 @@ namespace ntt {
|
||||
* @return Default value of [NTTConfig](@ref NTTConfig).
|
||||
*/
|
||||
template <typename S>
|
||||
NTTConfig<S> DefaultNTTConfig();
|
||||
NTTConfig<S>
|
||||
DefaultNTTConfig(const device_context::DeviceContext& ctx = device_context::get_default_device_context());
|
||||
|
||||
/**
|
||||
* A function that computes NTT or iNTT in-place. It's necessary to call [InitDomain](@ref InitDomain) with an
|
||||
@@ -3,7 +3,7 @@
|
||||
#define _NTT_IMPL_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "appUtils/ntt/ntt.cuh" // for enum Ordering
|
||||
#include "ntt/ntt.cuh" // for enum Ordering
|
||||
|
||||
namespace mxntt {
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
#pragma once
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "fields/field_config.cuh"
|
||||
#include "polynomials/polynomials.h"
|
||||
|
||||
using device_context::DeviceContext;
|
||||
|
||||
namespace polynomials {
|
||||
template <typename C = scalar_t, typename D = C, typename I = C>
|
||||
class CUDAPolynomialFactory : public AbstractPolynomialFactory<C, D, I>
|
||||
{
|
||||
std::vector<DeviceContext> m_device_contexts; // device-id --> device context
|
||||
std::vector<cudaStream_t> m_device_streams; // device-id --> device stream. Storing the streams here as workaround
|
||||
// since DeviceContext has a reference to a stream.
|
||||
|
||||
public:
|
||||
CUDAPolynomialFactory();
|
||||
~CUDAPolynomialFactory();
|
||||
std::shared_ptr<IPolynomialContext<C, D, I>> create_context() override;
|
||||
std::shared_ptr<IPolynomialBackend<C, D, I>> create_backend() override;
|
||||
};
|
||||
} // namespace polynomials
|
||||
49
icicle/include/polynomials/polynomial_abstract_factory.h
Normal file
49
icicle/include/polynomials/polynomial_abstract_factory.h
Normal file
@@ -0,0 +1,49 @@
|
||||
#pragma once
|
||||
|
||||
#include "polynomial_context.h"
|
||||
#include "polynomial_backend.h"
|
||||
#include <memory> // For std::shared_ptr
|
||||
|
||||
namespace polynomials {
|
||||
|
||||
/**
|
||||
* @brief Abstract factory for creating polynomial contexts and backends.
|
||||
*
|
||||
* The `AbstractPolynomialFactory` serves as an interface for factories capable of creating
|
||||
* instances of `IPolynomialContext` and `IPolynomialBackend`. This design allows for the
|
||||
* decoupling of object creation from their usage, facilitating the implementation of various
|
||||
* computational strategies (e.g., GPU, ZPU) without altering client code. Each concrete factory
|
||||
* is expected to provide tailored implementations of polynomial contexts and backends that
|
||||
* are optimized for specific computational environments.
|
||||
*
|
||||
* @tparam C Type of the coefficients.
|
||||
* @tparam D Domain type, representing the input space of the polynomial.
|
||||
* @tparam I Image type, representing the output space of the polynomial.
|
||||
*/
|
||||
template <typename C, typename D, typename I>
|
||||
class AbstractPolynomialFactory
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* @brief Creates and returns a shared pointer to an `IPolynomialContext` instance.
|
||||
*
|
||||
* @return std::shared_ptr<IPolynomialContext<C, D, I>> A shared pointer to the created
|
||||
* polynomial context instance.
|
||||
*/
|
||||
virtual std::shared_ptr<IPolynomialContext<C, D, I>> create_context() = 0;
|
||||
|
||||
/**
|
||||
* @brief Creates and returns a shared pointer to an `IPolynomialBackend` instance.
|
||||
*
|
||||
* @return std::shared_ptr<IPolynomialBackend<C, D, I>> A shared pointer to the created
|
||||
* polynomial backend instance.
|
||||
*/
|
||||
virtual std::shared_ptr<IPolynomialBackend<C, D, I>> create_backend() = 0;
|
||||
|
||||
/**
|
||||
* @brief Virtual destructor for the `AbstractPolynomialFactory`.
|
||||
*/
|
||||
virtual ~AbstractPolynomialFactory() = default;
|
||||
};
|
||||
|
||||
} // namespace polynomials
|
||||
72
icicle/include/polynomials/polynomial_backend.h
Normal file
72
icicle/include/polynomials/polynomial_backend.h
Normal file
@@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint> // for uint64_t, int64_t
|
||||
|
||||
namespace polynomials {
|
||||
|
||||
/**
|
||||
* @brief Interface for the polynomial computational backend.
|
||||
*
|
||||
* The `IPolynomialBackend` interface defines the set of operations for polynomial arithmetic
|
||||
* and manipulation that can be performed on a given computational device or platform (e.g., GPU, ZPU).
|
||||
* This interface abstracts the computational logic, allowing for implementation-specific optimizations
|
||||
* and hardware utilization. It interacts closely with `IPolynomialContext` to manage polynomial data
|
||||
* states and perform computations.
|
||||
*
|
||||
* @tparam C Type of the coefficients.
|
||||
* @tparam D Domain type, representing the input space of the polynomial.
|
||||
* @tparam I Image type, representing the output space of the polynomial.
|
||||
*/
|
||||
template <typename C, typename D, typename I>
|
||||
class IPolynomialBackend
|
||||
{
|
||||
public:
|
||||
IPolynomialBackend() = default;
|
||||
virtual ~IPolynomialBackend() = default;
|
||||
|
||||
typedef std::shared_ptr<IPolynomialContext<C, D, I>> PolyContext;
|
||||
|
||||
// Initialization methods
|
||||
virtual void from_coefficients(PolyContext p, uint64_t nof_coefficients, const C* coefficients = nullptr) = 0;
|
||||
virtual void from_rou_evaluations(PolyContext p, uint64_t nof_evaluations, const I* evaluations = nullptr) = 0;
|
||||
virtual void clone(PolyContext out, PolyContext in) = 0;
|
||||
|
||||
// Arithmetic operations
|
||||
virtual void add(PolyContext& out, PolyContext op_a, PolyContext op_b) = 0;
|
||||
virtual void subtract(PolyContext out, PolyContext op_a, PolyContext op_b) = 0;
|
||||
virtual void multiply(PolyContext out, PolyContext op_a, PolyContext op_b) = 0;
|
||||
virtual void multiply(PolyContext out, PolyContext p, D scalar) = 0; // scalar multiplication
|
||||
virtual void divide(PolyContext Quotient_out, PolyContext Remainder_out, PolyContext op_a, PolyContext op_b) = 0;
|
||||
virtual void quotient(PolyContext out, PolyContext op_a, PolyContext op_b) = 0;
|
||||
virtual void remainder(PolyContext out, PolyContext op_a, PolyContext op_b) = 0;
|
||||
virtual void divide_by_vanishing_polynomial(PolyContext out, PolyContext op_a, uint64_t vanishing_poly_degree) = 0;
|
||||
|
||||
// Operations specific to monomials
|
||||
virtual void add_monomial_inplace(PolyContext& poly, C monomial_coeff, uint64_t monomial) = 0;
|
||||
virtual void sub_monomial_inplace(PolyContext& poly, C monomial_coeff, uint64_t monomial) = 0;
|
||||
|
||||
// Utility methods
|
||||
virtual void slice(PolyContext out, PolyContext in, uint64_t offset, uint64_t stride, uint64_t size) = 0;
|
||||
virtual int64_t degree(PolyContext op) = 0;
|
||||
|
||||
// Method to access mutable storage within the context
|
||||
void* get_context_storage_mutable(PolyContext ctxt) { return ctxt->get_storage_mutable(); }
|
||||
const void* get_context_storage_immutable(PolyContext ctxt) { return ctxt->get_storage_immutable(); }
|
||||
|
||||
// Evaluation methods
|
||||
virtual I evaluate(PolyContext op, const D& domain_x) = 0;
|
||||
virtual void evaluate_on_domain(PolyContext op, const D* domain, uint64_t size, I* evaluations /*OUT*/) = 0;
|
||||
|
||||
// Methods to copy coefficients to host memory
|
||||
virtual C copy_coefficient_to_host(PolyContext op, uint64_t coeff_idx) = 0;
|
||||
virtual int64_t
|
||||
copy_coefficients_to_host(PolyContext op, C* host_coeffs, int64_t start_idx = 0, int64_t end_idx = -1) = 0;
|
||||
|
||||
// Methods to get views of coefficients and evaluations, including device id
|
||||
virtual std::tuple<IntegrityPointer<C>, uint64_t /*size*/, uint64_t /*device_id*/>
|
||||
get_coefficients_view(PolyContext p) = 0;
|
||||
virtual std::tuple<IntegrityPointer<I>, uint64_t /*size*/, uint64_t /*device_id*/>
|
||||
get_rou_evaluations_view(PolyContext p, uint64_t nof_evaluations = 0, bool is_reversed = false) = 0;
|
||||
};
|
||||
|
||||
} // namespace polynomials
|
||||
93
icicle/include/polynomials/polynomial_context.h
Normal file
93
icicle/include/polynomials/polynomial_context.h
Normal file
@@ -0,0 +1,93 @@
|
||||
#pragma once
|
||||
|
||||
#include <utility> // for std::pair
|
||||
#include <tuple> // for std::tuple
|
||||
#include <iostream> // for std::ostream
|
||||
#include <algorithm> // for std::max
|
||||
#include <cstdint> // for uint64_t, etc.
|
||||
#include <memory>
|
||||
#include "utils/integrity_pointer.h"
|
||||
|
||||
namespace polynomials {
|
||||
|
||||
template <typename Coeff, typename Domain, typename Image>
|
||||
class IPolynomialBackend;
|
||||
|
||||
/**
|
||||
* @brief Interface for polynomial context, encapsulating state, memory, and device context.
|
||||
*
|
||||
* This interface is designed to manage the state of polynomials including their coefficients and
|
||||
* evaluations in both natural and reversed order. It supports operations for converting between
|
||||
* these forms, allocating and releasing resources, and accessing the underlying data. The context
|
||||
* abstracts over the specifics of memory and execution context, allowing polynomials to be managed
|
||||
* in a way that is agnostic to the underlying hardware or software infrastructure.
|
||||
*
|
||||
* @tparam C Type of the coefficients.
|
||||
* @tparam D Domain type, representing the input space of the polynomial.
|
||||
* @tparam I Image type, representing the output space of the polynomial.
|
||||
*/
|
||||
template <typename C, typename D, typename I>
|
||||
class IPolynomialContext
|
||||
{
|
||||
public:
|
||||
friend class IPolynomialBackend<C, D, I>;
|
||||
|
||||
// Enumerates the possible states of a polynomial context.
|
||||
enum State { Invalid, Coefficients, EvaluationsOnRou_Natural, EvaluationsOnRou_Reversed };
|
||||
|
||||
// The size of the largest element among coefficients and evaluations.
|
||||
static constexpr size_t ElementSize = std::max(sizeof(C), sizeof(I));
|
||||
|
||||
/**
|
||||
* @brief Construct a new IPolynomialContext object.
|
||||
*/
|
||||
IPolynomialContext() : m_id{s_id++} {}
|
||||
|
||||
/**
|
||||
* @brief Virtual destructor for IPolynomialContext.
|
||||
*/
|
||||
virtual ~IPolynomialContext() = default;
|
||||
|
||||
// Methods for initializing the context from coefficients or evaluations.
|
||||
virtual void from_coefficients(uint64_t nof_coefficients, const C* coefficients = nullptr) = 0;
|
||||
virtual void from_rou_evaluations(uint64_t nof_evaluations, const I* evaluations = nullptr) = 0;
|
||||
|
||||
// Method for cloning the context from another instance.
|
||||
virtual void clone(IPolynomialContext& from) = 0;
|
||||
|
||||
// Methods for resource management.
|
||||
virtual void allocate(uint64_t nof_elements, State init_state = State::Coefficients, bool memset_zeros = true) = 0;
|
||||
virtual void release() = 0;
|
||||
|
||||
// Methods for transforming between coefficients and evaluations.
|
||||
virtual void transform_to_coefficients(uint64_t nof_coefficients = 0) = 0;
|
||||
virtual void transform_to_evaluations(uint64_t nof_evaluations = 0, bool is_reversed = false) = 0;
|
||||
|
||||
// Accessors for the state and number of elements.
|
||||
virtual State get_state() const = 0;
|
||||
virtual uint64_t get_nof_elements() const = 0;
|
||||
|
||||
// Methods to get direct access to coefficients and evaluations.
|
||||
virtual std::pair<const C*, uint64_t> get_coefficients() = 0;
|
||||
virtual std::pair<const I*, uint64_t> get_rou_evaluations() = 0;
|
||||
|
||||
// Methods to get views of coefficients and evaluations, including device id.
|
||||
virtual std::tuple<IntegrityPointer<C>, uint64_t /*size*/, uint64_t /*device_id*/> get_coefficients_view() = 0;
|
||||
virtual std::tuple<IntegrityPointer<I>, uint64_t /*size*/, uint64_t /*device_id*/>
|
||||
get_rou_evaluations_view(uint64_t nof_evaluations = 0, bool is_reversed = false) = 0;
|
||||
|
||||
// Method for printing the context state to an output stream.
|
||||
virtual void print(std::ostream& os) = 0;
|
||||
|
||||
protected:
|
||||
// Provides mutable access to the underlying storage for backend computations.
|
||||
virtual void* get_storage_mutable() = 0;
|
||||
virtual const void* get_storage_immutable() = 0;
|
||||
|
||||
// Static and instance variables for debug id management.
|
||||
static inline uint64_t s_id = 0; // Global id counter.
|
||||
|
||||
public:
|
||||
const uint64_t m_id;
|
||||
};
|
||||
} // namespace polynomials
|
||||
133
icicle/include/polynomials/polynomials.h
Normal file
133
icicle/include/polynomials/polynomials.h
Normal file
@@ -0,0 +1,133 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include "utils/integrity_pointer.h"
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
#include "polynomial_context.h"
|
||||
#include "polynomial_backend.h"
|
||||
#include "polynomial_abstract_factory.h"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
namespace polynomials {
|
||||
|
||||
/**
|
||||
* @brief Represents a polynomial and provides operations for polynomial arithmetic, evaluation, and manipulation.
|
||||
*
|
||||
* This class models a polynomial with coefficients of type `Coeff`, defined over a domain `Domain` and producing
|
||||
* outputs of type `Image`. It supports a range of operations including basic arithmetic (addition, subtraction,
|
||||
* multiplication, division), evaluation at points or over domains, and manipulation (slicing, adding monomials).
|
||||
* The implementation abstracts over the specifics of computation and storage through the use of an abstract factory,
|
||||
* contexts, and backends, allowing for efficient execution across various computational environments.
|
||||
*
|
||||
* @tparam Coeff Type of the coefficients of the polynomial.
|
||||
* @tparam Domain Type representing the input space of the polynomial (defaults to `Coeff`).
|
||||
* @tparam Image Type representing the output space of the polynomial (defaults to `Coeff`).
|
||||
*/
|
||||
template <typename Coeff, typename Domain = Coeff, typename Image = Coeff>
|
||||
class Polynomial
|
||||
{
|
||||
public:
|
||||
// Initialization (coefficients/evaluations can reside on host or device)
|
||||
static Polynomial from_coefficients(const Coeff* coefficients, uint64_t nof_coefficients);
|
||||
static Polynomial from_rou_evaluations(const Image* evaluations, uint64_t nof_evaluations);
|
||||
|
||||
// Clone the polynomial
|
||||
Polynomial clone() const;
|
||||
|
||||
// Arithmetic ops
|
||||
Polynomial operator+(const Polynomial& rhs) const;
|
||||
Polynomial& operator+=(const Polynomial& rhs);
|
||||
|
||||
Polynomial operator-(const Polynomial& rhs) const;
|
||||
|
||||
Polynomial operator*(const Polynomial& rhs) const;
|
||||
Polynomial operator*(const Domain& scalar) const; // scalar multiplication
|
||||
template <typename C, typename D, typename I>
|
||||
friend Polynomial<C, D, I> operator*(const D& scalar, const Polynomial<C, D, I>& rhs);
|
||||
|
||||
std::pair<Polynomial, Polynomial> divide(const Polynomial& rhs) const; // returns (Q(x), R(x))
|
||||
Polynomial operator/(const Polynomial& rhs) const; // returns Quotient Q(x) for A(x) = Q(x)B(x) + R(x)
|
||||
Polynomial operator%(const Polynomial& rhs) const; // returns Remainder R(x) for A(x) = Q(x)B(x) + R(x)
|
||||
Polynomial divide_by_vanishing_polynomial(uint64_t degree) const;
|
||||
|
||||
// arithmetic ops with monomial
|
||||
Polynomial& add_monomial_inplace(Coeff monomial_coeff, uint64_t monomial = 0);
|
||||
Polynomial& sub_monomial_inplace(Coeff monomial_coeff, uint64_t monomial = 0);
|
||||
|
||||
// Slicing and selecting even or odd components.
|
||||
Polynomial slice(uint64_t offset, uint64_t stride, uint64_t size = 0 /*0 means take all elements*/);
|
||||
Polynomial even();
|
||||
Polynomial odd();
|
||||
|
||||
// Note: Following ops cannot be traced. Calling them invokes polynomial evaluation
|
||||
|
||||
// Evaluation methods
|
||||
Image operator()(const Domain& x) const;
|
||||
Image evaluate(const Domain& x) const;
|
||||
void evaluate_on_domain(Domain* domain, uint64_t size, Image* evals /*OUT*/) const; // caller allocates memory
|
||||
|
||||
// Method to obtain the degree of the polynomial
|
||||
int64_t degree();
|
||||
|
||||
// Methods for copying coefficients to host memory.
|
||||
Coeff copy_coefficient_to_host(uint64_t idx) const; // single coefficient
|
||||
// caller is allocating output memory. If coeff==nullptr, returning nof_coeff only
|
||||
int64_t copy_coefficients_to_host(Coeff* host_coeffs = nullptr, int64_t start_idx = 0, int64_t end_idx = -1) const;
|
||||
|
||||
// Methods for obtaining a view of the coefficients or evaluations
|
||||
std::tuple<IntegrityPointer<Coeff>, uint64_t /*size*/, uint64_t /*device_id*/> get_coefficients_view();
|
||||
std::tuple<IntegrityPointer<Image>, uint64_t /*size*/, uint64_t /*device_id*/>
|
||||
get_rou_evaluations_view(uint64_t nof_evaluations = 0, bool is_reversed = false);
|
||||
|
||||
// Overload stream insertion operator for printing.
|
||||
friend std::ostream& operator<<(std::ostream& os, Polynomial& poly)
|
||||
{
|
||||
poly.m_context->print(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
// Static method to initialize the polynomial class with a factory for context and backend creation.
|
||||
static void initialize(std::shared_ptr<AbstractPolynomialFactory<Coeff, Domain, Image>> factory)
|
||||
{
|
||||
std::atexit(cleanup);
|
||||
s_factory = factory;
|
||||
}
|
||||
|
||||
// Cleanup method for releasing factory resources.
|
||||
static void cleanup() { s_factory = nullptr; }
|
||||
|
||||
private:
|
||||
// The context of the polynomial, encapsulating its state.
|
||||
std::shared_ptr<IPolynomialContext<Coeff, Domain, Image>> m_context = nullptr;
|
||||
// The computational backend for the polynomial operations.
|
||||
std::shared_ptr<IPolynomialBackend<Coeff, Domain, Image>> m_backend = nullptr;
|
||||
|
||||
// Factory for constructing the context and backend instances.
|
||||
static inline std::shared_ptr<AbstractPolynomialFactory<Coeff, Domain, Image>> s_factory = nullptr;
|
||||
|
||||
public:
|
||||
Polynomial();
|
||||
~Polynomial() = default;
|
||||
|
||||
// Ensures polynomials can be moved but not copied, to manage resources efficiently.
|
||||
Polynomial(Polynomial&&) = default;
|
||||
Polynomial& operator=(Polynomial&&) = default;
|
||||
Polynomial(const Polynomial&) = delete;
|
||||
Polynomial& operator=(const Polynomial&) = delete;
|
||||
|
||||
std::shared_ptr<IPolynomialContext<Coeff, Domain, Image>> get_context() { return m_context; }
|
||||
};
|
||||
|
||||
// explicit instantiation
|
||||
|
||||
// Friend operator to allow multiplication with a scalar from the left-hand side
|
||||
template <typename C = scalar_t, typename D = C, typename I = C>
|
||||
Polynomial<C, D, I> operator*(const D& scalar, const Polynomial<C, D, I>& rhs);
|
||||
|
||||
// External template instantiation to ensure the template is compiled for specific types.
|
||||
extern template class Polynomial<scalar_t>;
|
||||
|
||||
} // namespace polynomials
|
||||
247
icicle/include/polynomials/polynomials_c_api.h
Normal file
247
icicle/include/polynomials/polynomials_c_api.h
Normal file
@@ -0,0 +1,247 @@
|
||||
#pragma once
|
||||
|
||||
#include "polynomials.h"
|
||||
#include "fields/field_config.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/integrity_pointer.h"
|
||||
|
||||
namespace polynomials {
|
||||
extern "C" {
|
||||
|
||||
// Defines a polynomial instance based on the scalar type from the FIELD configuration.
|
||||
typedef Polynomial<scalar_t> PolynomialInst;
|
||||
|
||||
// Constructs a polynomial from a set of coefficients.
|
||||
// coeffs: Array of coefficients.
|
||||
// size: Number of coefficients in the array.
|
||||
// Returns a pointer to the newly created polynomial instance.
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_create_from_coefficients)(scalar_t* coeffs, size_t size)
|
||||
{
|
||||
auto result = new PolynomialInst(PolynomialInst::from_coefficients(coeffs, size));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Constructs a polynomial from evaluations at the roots of unity.
|
||||
// evals: Array of evaluations.
|
||||
// size: Number of evaluations in the array.
|
||||
// Returns a pointer to the newly created polynomial instance.
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_create_from_rou_evaluations)(scalar_t* evals, size_t size)
|
||||
{
|
||||
auto result = new PolynomialInst(PolynomialInst::from_rou_evaluations(evals, size));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Clones an existing polynomial instance.
|
||||
// p: Pointer to the polynomial instance to clone.
|
||||
// Returns a pointer to the cloned polynomial instance.
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_clone)(const PolynomialInst* p)
|
||||
{
|
||||
auto result = new PolynomialInst(p->clone());
|
||||
return result;
|
||||
}
|
||||
|
||||
// Deletes a polynomial instance, freeing its memory.
|
||||
// instance: Pointer to the polynomial instance to delete.
|
||||
void CONCAT_EXPAND(FIELD, polynomial_delete)(PolynomialInst* instance) { delete instance; }
|
||||
|
||||
// Adds two polynomials.
|
||||
// a, b: Pointers to the polynomial instances to add.
|
||||
// Returns a pointer to the resulting polynomial instance.
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_add)(const PolynomialInst* a, const PolynomialInst* b)
|
||||
{
|
||||
auto result = new PolynomialInst(std::move(*a + *b));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Adds a polynomial to another in place.
|
||||
// a: Pointer to the polynomial to add to.
|
||||
// b: Pointer to the polynomial to add.
|
||||
void CONCAT_EXPAND(FIELD, polynomial_add_inplace)(PolynomialInst* a, const PolynomialInst* b) { *a += *b; }
|
||||
|
||||
// Subtracts one polynomial from another.
|
||||
// a, b: Pointers to the polynomial instances (minuend and subtrahend, respectively).
|
||||
// Returns a pointer to the resulting polynomial instance.
|
||||
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_subtract)(const PolynomialInst* a, const PolynomialInst* b)
|
||||
{
|
||||
auto result = new PolynomialInst(std::move(*a - *b));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Multiplies two polynomials.
|
||||
// a, b: Pointers to the polynomial instances to multiply.
|
||||
// Returns a pointer to the resulting polynomial instance.
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_multiply)(const PolynomialInst* a, const PolynomialInst* b)
|
||||
{
|
||||
auto result = new PolynomialInst(std::move(*a * *b));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Multiplies a polynomial by a scalar coefficient.
|
||||
// a: Pointer to the polynomial instance.
|
||||
// coeff: Scalar coefficient to multiply by.
|
||||
// Returns a pointer to the resulting polynomial instance.
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_multiply_by_coeff)(const PolynomialInst* a, const scalar_t* coeff)
|
||||
{
|
||||
auto result = new PolynomialInst(std::move(*a * *coeff));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Divides one polynomial by another, returning both quotient and remainder.
|
||||
// a, b: Pointers to the polynomial instances (dividend and divisor, respectively).
|
||||
// q: Output parameter for the quotient.
|
||||
// r: Output parameter for the remainder.
|
||||
void CONCAT_EXPAND(FIELD, polynomial_division)(
|
||||
const PolynomialInst* a, const PolynomialInst* b, PolynomialInst** q /*OUT*/, PolynomialInst** r /*OUT*/)
|
||||
{
|
||||
auto [_q, _r] = a->divide(*b);
|
||||
*q = new PolynomialInst(std::move(_q));
|
||||
*r = new PolynomialInst(std::move(_r));
|
||||
}
|
||||
|
||||
// Calculates the quotient of dividing one polynomial by another.
|
||||
// a, b: Pointers to the polynomial instances (dividend and divisor, respectively).
|
||||
// Returns a pointer to the resulting quotient polynomial instance.
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_quotient)(const PolynomialInst* a, const PolynomialInst* b)
|
||||
{
|
||||
auto result = new PolynomialInst(std::move(*a / *b));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Calculates the remainder of dividing one polynomial by another.
|
||||
// a, b: Pointers to the polynomial instances (dividend and divisor, respectively).
|
||||
// Returns a pointer to the resulting remainder polynomial instance.
|
||||
|
||||
PolynomialInst* CONCAT_EXPAND(FIELD, polynomial_remainder)(const PolynomialInst* a, const PolynomialInst* b)
|
||||
{
|
||||
auto result = new PolynomialInst(std::move(*a % *b));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Divides a polynomial by a vanishing polynomial of a given degree, over rou domain.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// vanishing_poly_degree: Degree of the vanishing polynomial.
|
||||
// Returns a pointer to the resulting polynomial instance.
|
||||
PolynomialInst*
|
||||
CONCAT_EXPAND(FIELD, polynomial_divide_by_vanishing)(const PolynomialInst* p, uint64_t vanishing_poly_degree)
|
||||
{
|
||||
auto result = new PolynomialInst(std::move(p->divide_by_vanishing_polynomial(vanishing_poly_degree)));
|
||||
return result;
|
||||
}
|
||||
|
||||
// Adds a monomial to a polynomial in place.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// monomial_coeff: Coefficient of the monomial to add.
|
||||
// monomial: Degree of the monomial to add.
|
||||
void CONCAT_EXPAND(FIELD, polynomial_add_monomial_inplace)(
|
||||
PolynomialInst* p, const scalar_t* monomial_coeff, uint64_t monomial)
|
||||
{
|
||||
p->add_monomial_inplace(*monomial_coeff, monomial);
|
||||
}
|
||||
|
||||
// Subtracts a monomial from a polynomial in place.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// monomial_coeff: Coefficient of the monomial to subtract.
|
||||
// monomial: Degree of the monomial to subtract.
|
||||
void CONCAT_EXPAND(FIELD, polynomial_sub_monomial_inplace)(
|
||||
PolynomialInst* p, const scalar_t* monomial_coeff, uint64_t monomial)
|
||||
{
|
||||
p->sub_monomial_inplace(*monomial_coeff, monomial);
|
||||
}
|
||||
|
||||
// Evaluates a polynomial at a given point.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// x: Point at which to evaluate the polynomial.
|
||||
// Returns the evaluation result.
|
||||
scalar_t CONCAT_EXPAND(FIELD, polynomial_evaluate)(const PolynomialInst* p, const scalar_t& x)
|
||||
{
|
||||
return p->evaluate(x);
|
||||
}
|
||||
|
||||
// Evaluates a polynomial on a domain of points.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// domain: Array of points constituting the domain.
|
||||
// domain_size: Number of points in the domain.
|
||||
// evals: Output array for the evaluations.
|
||||
void CONCAT_EXPAND(FIELD, polynomial_evaluate_on_domain)(
|
||||
const PolynomialInst* p, scalar_t* domain, uint64_t domain_size, scalar_t* evals /*OUT*/)
|
||||
{
|
||||
return p->evaluate_on_domain(domain, domain_size, evals);
|
||||
}
|
||||
|
||||
// Returns the degree of a polynomial.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// Returns the degree of the polynomial.
|
||||
int64_t CONCAT_EXPAND(FIELD, polynomial_degree)(PolynomialInst* p) { return p->degree(); }
|
||||
|
||||
// Copies a single coefficient of a polynomial to host memory.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// idx: Index of the coefficient to copy.
|
||||
// Returns the coefficient value.
|
||||
scalar_t CONCAT_EXPAND(FIELD, polynomial_copy_single_coeff_to_host)(PolynomialInst* p, uint64_t idx)
|
||||
{
|
||||
return p->copy_coefficient_to_host(idx);
|
||||
}
|
||||
|
||||
// Copies a range of polynomial coefficients to host memory.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// host_memory: Array to copy the coefficients into. If NULL, not copying.
|
||||
// start_idx: Start index of the range to copy.
|
||||
// end_idx: End index of the range to copy.
|
||||
// Returns the number of coefficients copied. if host_memory is NULL, returns number of coefficients.
|
||||
int64_t CONCAT_EXPAND(FIELD, polynomial_coeffs_to_host)(
|
||||
PolynomialInst* p, scalar_t* host_memory, uint64_t start_idx, uint64_t end_idx)
|
||||
{
|
||||
return p->copy_coefficients_to_host(host_memory, start_idx, end_idx);
|
||||
}
|
||||
|
||||
// Retrieves a device-memory view of the polynomial coefficients.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// size: Output parameter for the size of the view.
|
||||
// device_id: Output parameter for the device ID.
|
||||
// Returns a pointer to an integrity pointer encapsulating the coefficients view.
|
||||
IntegrityPointer<scalar_t>* CONCAT_EXPAND(FIELD, polynomial_get_coeff_view)(
|
||||
PolynomialInst* p, uint64_t* size /*OUT*/, uint64_t* device_id /*OUT*/)
|
||||
{
|
||||
auto [coeffs, _size, _device_id] = p->get_coefficients_view();
|
||||
*size = _size;
|
||||
*device_id = _device_id;
|
||||
return new IntegrityPointer<scalar_t>(std::move(coeffs));
|
||||
}
|
||||
|
||||
// Retrieves a device-memory view of the polynomial's evaluations on the roots of unity.
|
||||
// p: Pointer to the polynomial instance.
|
||||
// nof_evals: Number of evaluations.
|
||||
// is_reversed: Whether the evaluations are in reversed order.
|
||||
// size: Output parameter for the size of the view.
|
||||
// device_id: Output parameter for the device ID.
|
||||
// Returns a pointer to an integrity pointer encapsulating the evaluations view.
|
||||
IntegrityPointer<scalar_t>* CONCAT_EXPAND(FIELD, polynomial_get_rou_evaluations_view)(
|
||||
PolynomialInst* p, uint64_t nof_evals, bool is_reversed, uint64_t* size /*OUT*/, uint64_t* device_id /*OUT*/)
|
||||
{
|
||||
auto [rou_evals, _size, _device_id] = p->get_rou_evaluations_view(nof_evals, is_reversed);
|
||||
*size = _size;
|
||||
*device_id = _device_id;
|
||||
return new IntegrityPointer<scalar_t>(std::move(rou_evals));
|
||||
}
|
||||
|
||||
// Reads the pointer from an integrity pointer.
|
||||
// p: Pointer to the integrity pointer.
|
||||
// Returns the raw pointer if still valid, otherwise NULL.
|
||||
const scalar_t* CONCAT_EXPAND(FIELD, polynomial_intergrity_ptr_get)(IntegrityPointer<scalar_t>* p)
|
||||
{
|
||||
return p->get();
|
||||
}
|
||||
|
||||
// Checks if an integrity pointer is still valid.
|
||||
// p: Pointer to the integrity pointer.
|
||||
// Returns true if the pointer is valid, false otherwise.
|
||||
bool CONCAT_EXPAND(FIELD, polynomial_intergrity_ptr_is_valid)(IntegrityPointer<scalar_t>* p) { return p->isValid(); }
|
||||
|
||||
// Destroys an integrity pointer, freeing its resources.
|
||||
// p: Pointer to the integrity pointer to destroy.
|
||||
void CONCAT_EXPAND(FIELD, polynomial_intergrity_ptr_destroy)(IntegrityPointer<scalar_t>* p) { delete p; }
|
||||
|
||||
} // extern "C"
|
||||
|
||||
} // namespace polynomials
|
||||
@@ -4,9 +4,8 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include "utils/device_context.cuh"
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "utils/error_handler.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
/**
|
||||
@@ -2,10 +2,10 @@
|
||||
#ifndef MERKLE_H
|
||||
#define MERKLE_H
|
||||
|
||||
#include "utils/device_context.cuh"
|
||||
#include "utils/error_handler.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "appUtils/poseidon/poseidon.cuh"
|
||||
#include "poseidon/poseidon.cuh"
|
||||
|
||||
#include <iostream>
|
||||
#include <math.h>
|
||||
102
icicle/include/utils/integrity_pointer.h
Normal file
102
icicle/include/utils/integrity_pointer.h
Normal file
@@ -0,0 +1,102 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
/**
|
||||
* @brief A template class that wraps a raw pointer with additional checks for data integrity.
|
||||
*
|
||||
* IntegrityPointer is designed to wrap a raw pointer and associate it with a validation
|
||||
* mechanism based on a counter. This counter is monitored via a std::weak_ptr, allowing
|
||||
* the IntegrityPointer to check if the data it points to has potentially been invalidated.
|
||||
* It is intended for scenarios where there's a need to ensure the integrity of the pointed-to
|
||||
* data throughout the lifetime of the pointer, particularly useful in complex systems where
|
||||
* data validity can change over time due to external factors.
|
||||
*
|
||||
* Usage involves providing the raw pointer to be wrapped, a std::weak_ptr to a counter, and
|
||||
* the expected value of that counter. The IntegrityPointer can then be used much like a normal
|
||||
* pointer, with the addition of integrity checks before access.
|
||||
*
|
||||
* @tparam T The type of the pointed-to object.
|
||||
*/
|
||||
|
||||
template <typename T>
|
||||
class IntegrityPointer
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructs an IntegrityPointer wrapping a raw pointer with a validity check based on a counter.
|
||||
*
|
||||
* @param ptr A raw pointer to the data of type T.
|
||||
* @param counterWeakPtr A std::weak_ptr to an int counter, used for validation.
|
||||
* @param expectedCounterValue The expected value of the counter for the pointer to be considered valid.
|
||||
*/
|
||||
IntegrityPointer(const T* ptr, std::weak_ptr<int> counterWeakPtr, int expectedCounterValue)
|
||||
: m_ptr(ptr), m_counterWeakPtr(counterWeakPtr), m_expectedCounterValue(expectedCounterValue)
|
||||
{
|
||||
}
|
||||
IntegrityPointer(const IntegrityPointer& other) = default;
|
||||
IntegrityPointer(IntegrityPointer&& other) = default;
|
||||
|
||||
/**
|
||||
* Retrieves the raw pointer. Use with caution, as direct access bypasses validity checks.
|
||||
* @return T* The raw pointer to the data.
|
||||
*/
|
||||
const T* get() const { return isValid() ? m_ptr : nullptr; }
|
||||
|
||||
/**
|
||||
* Dereferences the pointer. Throws std::runtime_error if the pointer is invalid.
|
||||
* @return A reference to the data pointed to by the raw pointer.
|
||||
*/
|
||||
const T& operator*() const
|
||||
{
|
||||
assertValid();
|
||||
return *m_ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provides access to the member of the pointed-to object. Throws std::runtime_error if the pointer is invalid.
|
||||
* @return T* The raw pointer to the data.
|
||||
*/
|
||||
const T* operator->() const
|
||||
{
|
||||
assertValid();
|
||||
return m_ptr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks whether the pointer is still considered valid by comparing the current value of the counter
|
||||
* to the expected value.
|
||||
* @return true if the pointer is valid, false otherwise.
|
||||
*/
|
||||
bool isValid() const
|
||||
{
|
||||
if (auto counterSharedPtr = m_counterWeakPtr.lock()) { return *counterSharedPtr == m_expectedCounterValue; }
|
||||
return false;
|
||||
}
|
||||
|
||||
private:
|
||||
const T* m_ptr; ///< The raw pointer to the data.
|
||||
std::weak_ptr<int> m_counterWeakPtr; ///< A weak pointer to the counter used for validation.
|
||||
const int m_expectedCounterValue; ///< The expected value of the counter for the pointer to be valid.
|
||||
|
||||
/**
|
||||
* Asserts the validity of the pointer. Throws std::runtime_error if the pointer is invalid.
|
||||
*/
|
||||
void assertValid() const
|
||||
{
|
||||
if (!isValid()) {
|
||||
logInvalidAccess();
|
||||
throw std::runtime_error("Attempted to access invalidated IntegrityPointer.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Logs an attempt to access an invalidated pointer.
|
||||
*/
|
||||
static void logInvalidAccess()
|
||||
{
|
||||
std::cerr << "Warning: Attempted to access invalidated IntegrityPointer." << std::endl;
|
||||
}
|
||||
};
|
||||
142
icicle/include/utils/test_functions.cuh
Normal file
142
icicle/include/utils/test_functions.cuh
Normal file
@@ -0,0 +1,142 @@
|
||||
#pragma once
|
||||
|
||||
template <class T1, class T2, int N_REP>
|
||||
__global__ void add_elements_kernel(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
T1 res = x[gid];
|
||||
T2 y_gid = y[gid];
|
||||
for (int i = 0; i < N_REP; i++)
|
||||
res = res + y_gid;
|
||||
result[gid] = res;
|
||||
}
|
||||
|
||||
template <class T1, class T2, int N_REP = 1>
|
||||
int vec_add(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
add_elements_kernel<T1, T2, N_REP><<<(count - 1) / 256 + 1, 256>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return (error || (N_REP > 1)) ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
__global__ void sub_elements_kernel(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = x[gid] - y[gid];
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
int vec_sub(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
sub_elements_kernel<T1, T2><<<(count - 1) / 256 + 1, 256>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void neg_elements_kernel(const T* x, T* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = T::neg(x[gid]);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
int vec_neg(const T* x, T* result, const unsigned count)
|
||||
{
|
||||
neg_elements_kernel<T><<<(count - 1) / 256 + 1, 256>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class F, class G, int N_REP>
|
||||
__global__ void mul_elements_kernel(const F* x, const G* y, G* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
F x_gid = x[gid];
|
||||
G res = y[gid];
|
||||
for (int i = 0; i < N_REP; i++)
|
||||
res = res * x_gid;
|
||||
result[gid] = res;
|
||||
}
|
||||
|
||||
template <class F, class G, int N_REP = 1>
|
||||
int vec_mul(const F* x, const G* y, G* result, const unsigned count)
|
||||
{
|
||||
mul_elements_kernel<F, G, N_REP><<<(count - 1) / 256 + 1, 256>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return (error || (N_REP > 1)) ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class F>
|
||||
__global__ void inv_field_elements_kernel(const F* x, F* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = F::inverse(x[gid]);
|
||||
}
|
||||
|
||||
template <class F>
|
||||
int field_vec_inv(const F* x, F* result, const unsigned count)
|
||||
{
|
||||
inv_field_elements_kernel<<<(count - 1) / 256 + 1, 256>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class F, int N_REP>
|
||||
__global__ void sqr_field_elements_kernel(const F* x, F* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
F x_gid = x[gid];
|
||||
for (int i = 0; i < N_REP; i++)
|
||||
x_gid = F::sqr(x_gid);
|
||||
result[gid] = x_gid;
|
||||
}
|
||||
|
||||
template <class F, int N_REP = 1>
|
||||
int field_vec_sqr(const F* x, F* result, const unsigned count)
|
||||
{
|
||||
sqr_field_elements_kernel<F, N_REP><<<(count - 1) / 256 + 1, 256>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return (error || (N_REP > 1)) ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class P, class A>
|
||||
__global__ void to_affine_points_kernel(const P* x, A* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = P::to_affine(x[gid]);
|
||||
}
|
||||
|
||||
template <class P, class A>
|
||||
int point_vec_to_affine(const P* x, A* result, const unsigned count)
|
||||
{
|
||||
to_affine_points_kernel<P, A><<<(count - 1) / 256 + 1, 256>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
int device_populate_random(T* d_elements, unsigned n)
|
||||
{
|
||||
T* h_elements = (T*)malloc(n * sizeof(T));
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
h_elements[i] = T::rand_host();
|
||||
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
int device_set(T* d_elements, T el, unsigned n)
|
||||
{
|
||||
T* h_elements = (T*)malloc(n * sizeof(T));
|
||||
for (unsigned i = 0; i < n; i++)
|
||||
h_elements[i] = el;
|
||||
return cudaMemcpy(d_elements, h_elements, sizeof(T) * n, cudaMemcpyHostToDevice);
|
||||
}
|
||||
@@ -2,7 +2,7 @@
|
||||
#ifndef LDE_H
|
||||
#define LDE_H
|
||||
|
||||
#include "device_context.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
/**
|
||||
* @namespace vec_ops
|
||||
@@ -24,9 +24,6 @@ namespace vec_ops {
|
||||
|
||||
bool is_result_on_device; /**< If true, output is preserved on device, otherwise on host. Default value: false. */
|
||||
|
||||
bool is_result_montgomery_form; /**< True if `result` vector should be in Montgomery form and false otherwise.
|
||||
* Default value: false. */
|
||||
|
||||
bool is_async; /**< Whether to run the vector operations asynchronously. If set to `true`, the function will be
|
||||
* non-blocking and you'd need to synchronize it explicitly by running
|
||||
* `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, the
|
||||
@@ -46,7 +43,6 @@ namespace vec_ops {
|
||||
false, // is_a_on_device
|
||||
false, // is_b_on_device
|
||||
false, // is_result_on_device
|
||||
false, // is_result_montgomery_form
|
||||
false, // is_async
|
||||
};
|
||||
return config;
|
||||
@@ -64,7 +60,7 @@ namespace vec_ops {
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
template <typename E, typename S>
|
||||
cudaError_t Mul(S* vec_a, E* vec_b, int n, VecOpsConfig<E>& config, E* result);
|
||||
cudaError_t Mul(const S* vec_a, const E* vec_b, int n, VecOpsConfig<E>& config, E* result);
|
||||
|
||||
/**
|
||||
* A function that adds two vectors element-wise.
|
||||
@@ -79,7 +75,7 @@ namespace vec_ops {
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
template <typename E>
|
||||
cudaError_t Add(E* vec_a, E* vec_b, int n, VecOpsConfig<E>& config, E* result);
|
||||
cudaError_t Add(const E* vec_a, const E* vec_b, int n, VecOpsConfig<E>& config, E* result);
|
||||
|
||||
/**
|
||||
* A function that subtracts two vectors element-wise.
|
||||
@@ -94,7 +90,7 @@ namespace vec_ops {
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
template <typename E>
|
||||
cudaError_t Sub(E* vec_a, E* vec_b, int n, VecOpsConfig<E>& config, E* result);
|
||||
cudaError_t Sub(const E* vec_a, const E* vec_b, int n, VecOpsConfig<E>& config, E* result);
|
||||
|
||||
/**
|
||||
* Transposes an input matrix out-of-place inside GPU.
|
||||
@@ -1,10 +0,0 @@
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "field.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
using namespace curve_config;
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, GenerateScalars)(scalar_t* scalars, int size)
|
||||
{
|
||||
scalar_t::RandHostMany(scalars, size);
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
|
||||
using namespace curve_config;
|
||||
|
||||
template <class T1, class T2>
|
||||
__global__ void add_elements_kernel(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = x[gid] + y[gid];
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
int vec_add(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
add_elements_kernel<T1, T2><<<(count - 1) / 32 + 1, 32>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
__global__ void sub_elements_kernel(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = x[gid] - y[gid];
|
||||
}
|
||||
|
||||
template <class T1, class T2>
|
||||
int vec_sub(const T1* x, const T2* y, T1* result, const unsigned count)
|
||||
{
|
||||
sub_elements_kernel<T1, T2><<<(count - 1) / 32 + 1, 32>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class T>
|
||||
__global__ void neg_elements_kernel(const T* x, T* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = T::neg(x[gid]);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
int vec_neg(const T* x, T* result, const unsigned count)
|
||||
{
|
||||
neg_elements_kernel<T><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class F, class G>
|
||||
__global__ void mul_elements_kernel(const F* x, const G* y, G* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = x[gid] * y[gid];
|
||||
}
|
||||
|
||||
template <class F, class G>
|
||||
int vec_mul(const F* x, const G* y, G* result, const unsigned count)
|
||||
{
|
||||
mul_elements_kernel<F, G><<<(count - 1) / 32 + 1, 32>>>(x, y, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void inv_field_elements_kernel(const scalar_t* x, scalar_t* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = scalar_t::inverse(x[gid]);
|
||||
}
|
||||
|
||||
int field_vec_inv(const scalar_t* x, scalar_t* result, const unsigned count)
|
||||
{
|
||||
inv_field_elements_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
__global__ void sqr_field_elements_kernel(const scalar_t* x, scalar_t* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = scalar_t::sqr(x[gid]);
|
||||
}
|
||||
|
||||
int field_vec_sqr(const scalar_t* x, scalar_t* result, const unsigned count)
|
||||
{
|
||||
sqr_field_elements_kernel<<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
|
||||
template <class P, class A>
|
||||
__global__ void to_affine_points_kernel(const P* x, A* result, const unsigned count)
|
||||
{
|
||||
const unsigned gid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (gid >= count) return;
|
||||
result[gid] = P::to_affine(x[gid]);
|
||||
}
|
||||
|
||||
template <class P, class A>
|
||||
int point_vec_to_affine(const P* x, A* result, const unsigned count)
|
||||
{
|
||||
to_affine_points_kernel<P, A><<<(count - 1) / 32 + 1, 32>>>(x, result, count);
|
||||
int error = cudaGetLastError();
|
||||
return error ? error : cudaDeviceSynchronize();
|
||||
}
|
||||
25
icicle/src/curves/CMakeLists.txt
Normal file
25
icicle/src/curves/CMakeLists.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
if (G2)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DG2")
|
||||
endif ()
|
||||
|
||||
set(TARGET icicle_curve)
|
||||
set(FIELD_TARGET icicle_field)
|
||||
|
||||
set(SRC ${CMAKE_SOURCE_DIR}/src)
|
||||
|
||||
set(CURVE_SOURCE ${SRC}/curves/extern.cu)
|
||||
list(APPEND CURVE_SOURCE ${SRC}/msm/extern.cu)
|
||||
if(G2)
|
||||
list(APPEND CURVE_SOURCE ${SRC}/curves/extern_g2.cu)
|
||||
list(APPEND CURVE_SOURCE ${SRC}/msm/extern_g2.cu)
|
||||
endif()
|
||||
if(ECNTT)
|
||||
list(APPEND CURVE_SOURCE ${SRC}/ntt/extern_ecntt.cu)
|
||||
list(APPEND CURVE_SOURCE ${SRC}/ntt/kernel_ntt.cu)
|
||||
endif()
|
||||
|
||||
add_library(${TARGET} STATIC ${CURVE_SOURCE})
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "ingo_curve_${CURVE}")
|
||||
target_compile_definitions(${TARGET} PUBLIC CURVE=${CURVE})
|
||||
target_link_libraries(${TARGET} PRIVATE ${FIELD_TARGET})
|
||||
51
icicle/src/curves/extern.cu
Normal file
51
icicle/src/curves/extern.cu
Normal file
@@ -0,0 +1,51 @@
|
||||
#include "curves/curve_config.cuh"
|
||||
|
||||
using namespace curve_config;
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "utils/mont.cuh"
|
||||
|
||||
extern "C" bool CONCAT_EXPAND(CURVE, Eq)(projective_t* point1, projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == point_field_t::zero()) && (point1->y == point_field_t::zero()) &&
|
||||
(point1->z == point_field_t::zero())) &&
|
||||
!((point2->x == point_field_t::zero()) && (point2->y == point_field_t::zero()) &&
|
||||
(point2->z == point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, ToAffine)(projective_t* point, affine_t* point_out)
|
||||
{
|
||||
*point_out = projective_t::to_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, GenerateProjectivePoints)(projective_t* points, int size)
|
||||
{
|
||||
projective_t::RandHostMany(points, size);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, GenerateAffinePoints)(affine_t* points, int size)
|
||||
{
|
||||
projective_t::RandHostManyAffine(points, size);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, AffineConvertMontgomery)(
|
||||
affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx)
|
||||
{
|
||||
if (is_into) {
|
||||
return mont::ToMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
} else {
|
||||
return mont::FromMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, ProjectiveConvertMontgomery)(
|
||||
projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx)
|
||||
{
|
||||
if (is_into) {
|
||||
return mont::ToMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
} else {
|
||||
return mont::FromMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
}
|
||||
}
|
||||
@@ -1,37 +1,10 @@
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "projective.cuh"
|
||||
#include <cuda.h>
|
||||
|
||||
using namespace curve_config;
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
using namespace curve_config;
|
||||
|
||||
extern "C" bool CONCAT_EXPAND(CURVE, Eq)(projective_t* point1, projective_t* point2)
|
||||
{
|
||||
return (*point1 == *point2) &&
|
||||
!((point1->x == point_field_t::zero()) && (point1->y == point_field_t::zero()) &&
|
||||
(point1->z == point_field_t::zero())) &&
|
||||
!((point2->x == point_field_t::zero()) && (point2->y == point_field_t::zero()) &&
|
||||
(point2->z == point_field_t::zero()));
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, ToAffine)(projective_t* point, affine_t* point_out)
|
||||
{
|
||||
*point_out = projective_t::to_affine(*point);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, GenerateProjectivePoints)(projective_t* points, int size)
|
||||
{
|
||||
projective_t::RandHostMany(points, size);
|
||||
}
|
||||
|
||||
extern "C" void CONCAT_EXPAND(CURVE, GenerateAffinePoints)(affine_t* points, int size)
|
||||
{
|
||||
projective_t::RandHostManyAffine(points, size);
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
|
||||
using namespace curve_config;
|
||||
#include "utils/mont.cuh"
|
||||
|
||||
extern "C" bool CONCAT_EXPAND(CURVE, G2Eq)(g2_projective_t* point1, g2_projective_t* point2)
|
||||
{
|
||||
@@ -57,4 +30,22 @@ extern "C" void CONCAT_EXPAND(CURVE, G2GenerateAffinePoints)(g2_affine_t* points
|
||||
g2_projective_t::RandHostManyAffine(points, size);
|
||||
}
|
||||
|
||||
#endif
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, G2AffineConvertMontgomery)(
|
||||
g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx)
|
||||
{
|
||||
if (is_into) {
|
||||
return mont::ToMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
} else {
|
||||
return mont::FromMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, G2ProjectiveConvertMontgomery)(
|
||||
g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx)
|
||||
{
|
||||
if (is_into) {
|
||||
return mont::ToMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
} else {
|
||||
return mont::FromMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
}
|
||||
}
|
||||
39
icicle/src/fields/CMakeLists.txt
Normal file
39
icicle/src/fields/CMakeLists.txt
Normal file
@@ -0,0 +1,39 @@
|
||||
if (EXT_FIELD)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
|
||||
endif ()
|
||||
|
||||
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin)
|
||||
|
||||
set(TARGET icicle_field)
|
||||
|
||||
set(SRC ${CMAKE_SOURCE_DIR}/src)
|
||||
|
||||
set(FIELD_SOURCE ${SRC}/fields/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern.cu)
|
||||
if(EXT_FIELD)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/fields/extern_extension.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern_extension.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern_extension.cu)
|
||||
endif()
|
||||
|
||||
set(POLYNOMIAL_SOURCE_FILES
|
||||
${SRC}/polynomials/polynomials.cu
|
||||
${SRC}/polynomials/cuda_backend/polynomial_cuda_backend.cu)
|
||||
|
||||
list(APPEND FIELD_SOURCE ${POLYNOMIAL_SOURCE_FILES})
|
||||
|
||||
# TODO: impl poseidon for small fields. note that it needs to be defined for the extension field!
|
||||
if (DEFINED CURVE)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/poseidon.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/poseidon/tree/merkle.cu)
|
||||
endif()
|
||||
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern.cu)
|
||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/kernel_ntt.cu)
|
||||
endif()
|
||||
|
||||
add_library(${TARGET} STATIC ${FIELD_SOURCE})
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "ingo_field_${FIELD}")
|
||||
target_compile_definitions(${TARGET} PUBLIC FIELD=${FIELD})
|
||||
22
icicle/src/fields/extern.cu
Normal file
22
icicle/src/fields/extern.cu
Normal file
@@ -0,0 +1,22 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "utils/mont.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
extern "C" void CONCAT_EXPAND(FIELD, GenerateScalars)(scalar_t* scalars, int size)
|
||||
{
|
||||
scalar_t::RandHostMany(scalars, size);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, ScalarConvertMontgomery)(
|
||||
scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx)
|
||||
{
|
||||
if (is_into) {
|
||||
return mont::ToMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
} else {
|
||||
return mont::FromMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
}
|
||||
}
|
||||
22
icicle/src/fields/extern_extension.cu
Normal file
22
icicle/src/fields/extern_extension.cu
Normal file
@@ -0,0 +1,22 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "utils/mont.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
extern "C" void CONCAT_EXPAND(FIELD, ExtensionGenerateScalars)(extension_t* scalars, int size)
|
||||
{
|
||||
extension_t::RandHostMany(scalars, size);
|
||||
}
|
||||
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, ExtensionScalarConvertMontgomery)(
|
||||
extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx)
|
||||
{
|
||||
if (is_into) {
|
||||
return mont::ToMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
} else {
|
||||
return mont::FromMontgomery(d_inout, n, ctx.stream, d_inout);
|
||||
}
|
||||
}
|
||||
5
icicle/src/hash/CMakeLists.txt
Normal file
5
icicle/src/hash/CMakeLists.txt
Normal file
@@ -0,0 +1,5 @@
|
||||
set(TARGET icicle_hash)
|
||||
|
||||
add_library(${TARGET} STATIC keccak/keccak.cu)
|
||||
target_include_directories(${TARGET} PUBLIC ${CMAKE_SOURCE_DIR}/include/)
|
||||
set_target_properties(${TARGET} PROPERTIES OUTPUT_NAME "ingo_hash")
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "keccak.cuh"
|
||||
#include "hash/keccak/keccak.cuh"
|
||||
|
||||
namespace keccak {
|
||||
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
|
||||
BIN
icicle/src/hash/keccak/test_keccak
Executable file
BIN
icicle/src/hash/keccak/test_keccak
Executable file
Binary file not shown.
43
icicle/src/msm/extern.cu
Normal file
43
icicle/src/msm/extern.cu
Normal file
@@ -0,0 +1,43 @@
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace curve_config;
|
||||
using namespace field_config;
|
||||
|
||||
#include "msm.cu"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace msm {
|
||||
/**
|
||||
* Extern "C" version of [PrecomputeMSMBases](@ref PrecomputeMSMBases) function with the following values of template
|
||||
* parameters (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `A` is the [affine representation](@ref affine_t) of curve points;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, PrecomputeMSMBases)(
|
||||
affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
affine_t* output_bases)
|
||||
{
|
||||
return PrecomputeMSMBases<affine_t, projective_t>(
|
||||
bases, bases_size, precompute_factor, _c, are_bases_on_device, ctx, output_bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [MSM](@ref MSM) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [scalar field](@ref scalar_t) of the curve;
|
||||
* - `A` is the [affine representation](@ref affine_t) of curve points;
|
||||
* - `P` is the [projective representation](@ref projective_t) of curve points.
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, MSMCuda)(
|
||||
const scalar_t* scalars, const affine_t* points, int msm_size, MSMConfig& config, projective_t* out)
|
||||
{
|
||||
return MSM<scalar_t, affine_t, projective_t>(scalars, points, msm_size, config, out);
|
||||
}
|
||||
} // namespace msm
|
||||
43
icicle/src/msm/extern_g2.cu
Normal file
43
icicle/src/msm/extern_g2.cu
Normal file
@@ -0,0 +1,43 @@
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace curve_config;
|
||||
using namespace field_config;
|
||||
|
||||
#include "msm.cu"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace msm {
|
||||
/**
|
||||
* Extern "C" version of [PrecomputeMSMBases](@ref PrecomputeMSMBases) function with the following values of template
|
||||
* parameters (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `A` is the [affine representation](@ref g2_affine_t) of G2 curve points;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, G2PrecomputeMSMBases)(
|
||||
g2_affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
g2_affine_t* output_bases)
|
||||
{
|
||||
return PrecomputeMSMBases<g2_affine_t, g2_projective_t>(
|
||||
bases, bases_size, precompute_factor, _c, are_bases_on_device, ctx, output_bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [MSM](@ref MSM) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [scalar field](@ref scalar_t) of the curve;
|
||||
* - `A` is the [affine representation](@ref g2_affine_t) of G2 curve points;
|
||||
* - `P` is the [projective representation](@ref g2_projective_t) of G2 curve points.
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, G2MSMCuda)(
|
||||
const scalar_t* scalars, const g2_affine_t* points, int msm_size, MSMConfig& config, g2_projective_t* out)
|
||||
{
|
||||
return MSM<scalar_t, g2_affine_t, g2_projective_t>(scalars, points, msm_size, config, out);
|
||||
}
|
||||
} // namespace msm
|
||||
@@ -1,4 +1,4 @@
|
||||
#include "msm.cuh"
|
||||
#include "msm/msm.cuh"
|
||||
|
||||
#include <cooperative_groups.h>
|
||||
#include <cub/device/device_radix_sort.cuh>
|
||||
@@ -10,13 +10,11 @@
|
||||
#include <stdexcept>
|
||||
#include <vector>
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "primitives/affine.cuh"
|
||||
#include "primitives/field.cuh"
|
||||
#include "primitives/projective.cuh"
|
||||
#include "utils/error_handler.cuh"
|
||||
#include "curves/affine.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
#include "utils/mont.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace msm {
|
||||
|
||||
@@ -831,9 +829,8 @@ namespace msm {
|
||||
} // namespace
|
||||
|
||||
template <typename A>
|
||||
MSMConfig DefaultMSMConfig()
|
||||
MSMConfig DefaultMSMConfig(const device_context::DeviceContext& ctx)
|
||||
{
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
MSMConfig config = {
|
||||
ctx, // ctx
|
||||
0, // points_size
|
||||
@@ -853,6 +850,9 @@ namespace msm {
|
||||
return config;
|
||||
}
|
||||
|
||||
// explicit instantiation to avoid having to include this file
|
||||
template MSMConfig DefaultMSMConfig<scalar_t>(const device_context::DeviceContext& ctx);
|
||||
|
||||
template <typename S, typename A, typename P>
|
||||
cudaError_t MSM(const S* scalars, const A* points, int msm_size, MSMConfig& config, P* results)
|
||||
{
|
||||
@@ -906,85 +906,4 @@ namespace msm {
|
||||
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [PrecomputeMSMBases](@ref PrecomputeMSMBases) function with the following values of
|
||||
* template parameters (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `A` is the [affine representation](@ref affine_t) of curve points;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, PrecomputeMSMBases)(
|
||||
curve_config::affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
curve_config::affine_t* output_bases)
|
||||
{
|
||||
return PrecomputeMSMBases<curve_config::affine_t, curve_config::projective_t>(
|
||||
bases, bases_size, precompute_factor, _c, are_bases_on_device, ctx, output_bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [MSM](@ref MSM) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [scalar field](@ref scalar_t) of the curve;
|
||||
* - `A` is the [affine representation](@ref affine_t) of curve points;
|
||||
* - `P` is the [projective representation](@ref projective_t) of curve points.
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, MSMCuda)(
|
||||
const curve_config::scalar_t* scalars,
|
||||
const curve_config::affine_t* points,
|
||||
int msm_size,
|
||||
MSMConfig& config,
|
||||
curve_config::projective_t* out)
|
||||
{
|
||||
return MSM<curve_config::scalar_t, curve_config::affine_t, curve_config::projective_t>(
|
||||
scalars, points, msm_size, config, out);
|
||||
}
|
||||
|
||||
#if defined(G2_DEFINED)
|
||||
|
||||
/**
|
||||
* Extern "C" version of [PrecomputeMSMBases](@ref PrecomputeMSMBases) function with the following values of
|
||||
* template parameters (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `A` is the [affine representation](@ref g2_affine_t) of G2 curve points;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, G2PrecomputeMSMBases)(
|
||||
curve_config::g2_affine_t* bases,
|
||||
int bases_size,
|
||||
int precompute_factor,
|
||||
int _c,
|
||||
bool are_bases_on_device,
|
||||
device_context::DeviceContext& ctx,
|
||||
curve_config::g2_affine_t* output_bases)
|
||||
{
|
||||
return PrecomputeMSMBases<curve_config::g2_affine_t, curve_config::g2_projective_t>(
|
||||
bases, bases_size, precompute_factor, _c, are_bases_on_device, ctx, output_bases);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [MSM](@ref MSM) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [scalar field](@ref scalar_t) of the curve;
|
||||
* - `A` is the [affine representation](@ref g2_affine_t) of G2 curve points;
|
||||
* - `P` is the [projective representation](@ref g2_projective_t) of G2 curve points.
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, G2MSMCuda)(
|
||||
const curve_config::scalar_t* scalars,
|
||||
const curve_config::g2_affine_t* points,
|
||||
int msm_size,
|
||||
MSMConfig& config,
|
||||
curve_config::g2_projective_t* out)
|
||||
{
|
||||
return MSM<curve_config::scalar_t, curve_config::g2_affine_t, curve_config::g2_projective_t>(
|
||||
scalars, points, msm_size, config, out);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace msm
|
||||
} // namespace msm
|
||||
@@ -6,10 +6,12 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "primitives/field.cuh"
|
||||
#include "primitives/projective.cuh"
|
||||
#include "utils/device_context.cuh"
|
||||
#include "curves/bn254/bn254.cuh"
|
||||
#include "fields/field.cuh"
|
||||
#include "curves/projective.cuh"
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
|
||||
using namespace bn254;
|
||||
|
||||
class Dummy_Scalar
|
||||
{
|
||||
@@ -111,9 +113,9 @@ public:
|
||||
|
||||
// switch between dummy and real:
|
||||
|
||||
typedef curve_config::scalar_t test_scalar;
|
||||
typedef curve_config::projective_t test_projective;
|
||||
typedef curve_config::affine_t test_affine;
|
||||
typedef scalar_t test_scalar;
|
||||
typedef projective_t test_projective;
|
||||
typedef affine_t test_affine;
|
||||
|
||||
// typedef Dummy_Scalar test_scalar;
|
||||
// typedef Dummy_Projective test_projective;
|
||||
@@ -136,7 +138,7 @@ int main()
|
||||
|
||||
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
|
||||
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
|
||||
test_projective large_res[batch_size];
|
||||
test_projective large_res[2];
|
||||
// test_projective batched_large_res[batch_size];
|
||||
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
|
||||
// fake_point batched_large_res[256];
|
||||
@@ -195,7 +197,9 @@ int main()
|
||||
printf("No Big Triangle : %.3f seconds.\n", elapsed1.count() * 1e-9);
|
||||
config.is_big_triangle = true;
|
||||
config.are_results_on_device = false;
|
||||
std::cout << test_projective::to_affine(large_res[0]) << std::endl;
|
||||
cudaMemcpy(&large_res[1], large_res_d, sizeof(test_projective), cudaMemcpyDeviceToHost);
|
||||
std::cout << test_projective::to_affine(large_res[1]) << " " << test_projective::is_on_curve(large_res[1])
|
||||
<< std::endl;
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
msm::MSM<test_scalar, test_affine, test_projective>(scalars_d, points_d, msm_size, config, large_res);
|
||||
// test_reduce_triangle(scalars);
|
||||
@@ -208,10 +212,6 @@ int main()
|
||||
cudaStreamSynchronize(stream);
|
||||
cudaStreamDestroy(stream);
|
||||
|
||||
std::cout << test_projective::to_affine(large_res[0]) << std::endl;
|
||||
|
||||
cudaMemcpy(&large_res[1], large_res_d, sizeof(test_projective), cudaMemcpyDeviceToHost);
|
||||
|
||||
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
|
||||
|
||||
// std::cout<<"final results batched large"<<std::endl;
|
||||
@@ -1,13 +1,13 @@
|
||||
build_verification:
|
||||
mkdir -p work
|
||||
nvcc -o work/test_verification -I. -I.. -I../.. -I../ntt tests/verification.cu -std=c++17
|
||||
nvcc -o work/test_verification -I. -I../../include tests/verification.cu -std=c++17
|
||||
|
||||
test_verification: build_verification
|
||||
work/test_verification
|
||||
|
||||
build_verification_ecntt:
|
||||
mkdir -p work
|
||||
nvcc -o work/test_verification_ecntt -I. -I.. -I../.. -I../ntt tests/verification.cu -std=c++17 -DECNTT_DEFINED
|
||||
nvcc -o work/test_verification_ecntt -I. -I../../include tests/verification.cu -std=c++17 -DECNTT
|
||||
|
||||
test_verification_ecntt: build_verification_ecntt
|
||||
work/test_verification_ecntt
|
||||
47
icicle/src/ntt/extern.cu
Normal file
47
icicle/src/ntt/extern.cu
Normal file
@@ -0,0 +1,47 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "ntt.cu"
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace ntt {
|
||||
/**
|
||||
* Extern "C" version of [InitDomain](@ref InitDomain) function with the following
|
||||
* value of template parameter (where the field is given by `-DFIELD` env variable during build):
|
||||
* - `S` is the [field](@ref scalar_t) - either a scalar field of the elliptic curve or a
|
||||
* stand-alone "STARK field";
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, InitializeDomain)(
|
||||
scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode)
|
||||
{
|
||||
return InitDomain(*primitive_root, ctx, fast_twiddles_mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [NTT](@ref NTT) function with the following values of template parameters
|
||||
* (where the field is given by `-DFIELD` env variable during build):
|
||||
* - `S` is the [field](@ref scalar_t) - either a scalar field of the elliptic curve or a
|
||||
* stand-alone "STARK field";
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, NTTCuda)(
|
||||
const scalar_t* input, int size, NTTDir dir, NTTConfig<scalar_t>& config, scalar_t* output)
|
||||
{
|
||||
return NTT<scalar_t, scalar_t>(input, size, dir, config, output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [ReleaseDomain](@ref ReleaseDomain) function with the following values of template parameters
|
||||
* (where the field is given by `-DFIELD` env variable during build):
|
||||
* - `S` is the [field](@ref scalar_t) - either a scalar field of the elliptic curve or a
|
||||
* stand-alone "STARK field";
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, ReleaseDomain)(device_context::DeviceContext& ctx)
|
||||
{
|
||||
return ReleaseDomain<scalar_t>(ctx);
|
||||
}
|
||||
} // namespace ntt
|
||||
25
icicle/src/ntt/extern_ecntt.cu
Normal file
25
icicle/src/ntt/extern_ecntt.cu
Normal file
@@ -0,0 +1,25 @@
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace curve_config;
|
||||
using namespace field_config;
|
||||
|
||||
#include "ntt.cu"
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace ntt {
|
||||
/**
|
||||
* Extern "C" version of [NTT](@ref NTT) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [projective representation](@ref projective_t) of the curve (i.e. EC NTT is computed);
|
||||
* - `E` is the [scalar field](@ref scalar_t) of the curve;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, ECNTTCuda)(
|
||||
const projective_t* input, int size, NTTDir dir, NTTConfig<scalar_t>& config, projective_t* output)
|
||||
{
|
||||
return NTT<scalar_t, projective_t>(input, size, dir, config, output);
|
||||
}
|
||||
} // namespace ntt
|
||||
23
icicle/src/ntt/extern_extension.cu
Normal file
23
icicle/src/ntt/extern_extension.cu
Normal file
@@ -0,0 +1,23 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "ntt.cu"
|
||||
|
||||
#include "gpu-utils/device_context.cuh"
|
||||
#include "utils/utils.h"
|
||||
|
||||
namespace ntt {
|
||||
/**
|
||||
* Extern "C" version of [NTT](@ref NTT) function with the following values of template parameters
|
||||
* (where the field is given by `-DFIELD` env variable during build):
|
||||
* - `E` is the [field](@ref scalar_t);
|
||||
* - `S` is the [extension](@ref extension_t) of `E` of appropriate degree;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(FIELD, ExtensionNTTCuda)(
|
||||
const extension_t* input, int size, NTTDir dir, NTTConfig<scalar_t>& config, extension_t* output)
|
||||
{
|
||||
return NTT<scalar_t, extension_t>(input, size, dir, config, output);
|
||||
}
|
||||
} // namespace ntt
|
||||
@@ -1,8 +1,10 @@
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
#include "appUtils/ntt/thread_ntt.cu"
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "utils/sharedmem.cuh"
|
||||
#include "appUtils/ntt/ntt.cuh" // for ntt::Ordering
|
||||
using namespace field_config;
|
||||
|
||||
#include "thread_ntt.cu"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
#include "ntt/ntt.cuh" // for ntt::Ordering
|
||||
|
||||
namespace mxntt {
|
||||
|
||||
@@ -998,27 +1000,27 @@ namespace mxntt {
|
||||
|
||||
// Explicit instantiation for scalar type
|
||||
template cudaError_t generate_external_twiddles_generic(
|
||||
const curve_config::scalar_t& basic_root,
|
||||
curve_config::scalar_t* external_twiddles,
|
||||
curve_config::scalar_t*& internal_twiddles,
|
||||
curve_config::scalar_t*& basic_twiddles,
|
||||
const scalar_t& basic_root,
|
||||
scalar_t* external_twiddles,
|
||||
scalar_t*& internal_twiddles,
|
||||
scalar_t*& basic_twiddles,
|
||||
uint32_t log_size,
|
||||
cudaStream_t& stream);
|
||||
|
||||
template cudaError_t generate_external_twiddles_fast_twiddles_mode(
|
||||
const curve_config::scalar_t& basic_root,
|
||||
curve_config::scalar_t* external_twiddles,
|
||||
curve_config::scalar_t*& internal_twiddles,
|
||||
curve_config::scalar_t*& basic_twiddles,
|
||||
const scalar_t& basic_root,
|
||||
scalar_t* external_twiddles,
|
||||
scalar_t*& internal_twiddles,
|
||||
scalar_t*& basic_twiddles,
|
||||
uint32_t log_size,
|
||||
cudaStream_t& stream);
|
||||
|
||||
template cudaError_t mixed_radix_ntt<curve_config::scalar_t, curve_config::scalar_t>(
|
||||
const curve_config::scalar_t* d_input,
|
||||
curve_config::scalar_t* d_output,
|
||||
curve_config::scalar_t* external_twiddles,
|
||||
curve_config::scalar_t* internal_twiddles,
|
||||
curve_config::scalar_t* basic_twiddles,
|
||||
template cudaError_t mixed_radix_ntt<scalar_t, scalar_t>(
|
||||
const scalar_t* d_input,
|
||||
scalar_t* d_output,
|
||||
scalar_t* external_twiddles,
|
||||
scalar_t* internal_twiddles,
|
||||
scalar_t* basic_twiddles,
|
||||
int ntt_size,
|
||||
int max_logn,
|
||||
int batch_size,
|
||||
@@ -1026,17 +1028,37 @@ namespace mxntt {
|
||||
bool is_inverse,
|
||||
bool fast_tw,
|
||||
ntt::Ordering ordering,
|
||||
curve_config::scalar_t* arbitrary_coset,
|
||||
scalar_t* arbitrary_coset,
|
||||
int coset_gen_index,
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
#if defined(EXT_FIELD)
|
||||
template cudaError_t mixed_radix_ntt<extension_t, scalar_t>(
|
||||
const extension_t* d_input,
|
||||
extension_t* d_output,
|
||||
scalar_t* external_twiddles,
|
||||
scalar_t* internal_twiddles,
|
||||
scalar_t* basic_twiddles,
|
||||
int ntt_size,
|
||||
int max_logn,
|
||||
int batch_size,
|
||||
bool columns_batch,
|
||||
bool is_inverse,
|
||||
bool fast_tw,
|
||||
ntt::Ordering ordering,
|
||||
scalar_t* arbitrary_coset,
|
||||
int coset_gen_index,
|
||||
cudaStream_t cuda_stream);
|
||||
#endif
|
||||
|
||||
// TODO: we may reintroduce mixed-radix ECNTT based on upcoming benching PR
|
||||
// #if defined(ECNTT_DEFINED)
|
||||
// template cudaError_t mixed_radix_ntt<curve_config::projective_t, curve_config::scalar_t>(
|
||||
// curve_config::projective_t* d_input,
|
||||
// curve_config::projective_t* d_output,
|
||||
// curve_config::scalar_t* external_twiddles,
|
||||
// curve_config::scalar_t* internal_twiddles,
|
||||
// curve_config::scalar_t* basic_twiddles,
|
||||
// #if defined(ECNTT)
|
||||
// template cudaError_t mixed_radix_ntt<projective_t, scalar_t>(
|
||||
// projective_t* d_input,
|
||||
// projective_t* d_output,
|
||||
// scalar_t* external_twiddles,
|
||||
// scalar_t* internal_twiddles,
|
||||
// scalar_t* basic_twiddles,
|
||||
// int ntt_size,
|
||||
// int max_logn,
|
||||
// int batch_size,
|
||||
@@ -1044,8 +1066,8 @@ namespace mxntt {
|
||||
// bool is_inverse,
|
||||
// bool fast_tw,
|
||||
// ntt::Ordering ordering,
|
||||
// curve_config::scalar_t* arbitrary_coset,
|
||||
// scalar_t* arbitrary_coset,
|
||||
// int coset_gen_index,
|
||||
// cudaStream_t cuda_stream);
|
||||
// #endif // ECNTT_DEFINED
|
||||
// #endif // ECNTT
|
||||
} // namespace mxntt
|
||||
@@ -1,19 +1,27 @@
|
||||
#include "ntt.cuh"
|
||||
#include "fields/field_config.cuh"
|
||||
|
||||
using namespace field_config;
|
||||
|
||||
#include "ntt/ntt.cuh"
|
||||
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <type_traits>
|
||||
|
||||
#include "curves/curve_config.cuh"
|
||||
#include "utils/sharedmem.cuh"
|
||||
#include "gpu-utils/sharedmem.cuh"
|
||||
#include "utils/utils_kernels.cuh"
|
||||
#include "utils/utils.h"
|
||||
#include "appUtils/ntt/ntt_impl.cuh"
|
||||
#include "appUtils/ntt/ntt.cuh" // for ntt::Ordering
|
||||
#include "ntt/ntt_impl.cuh"
|
||||
|
||||
#include <mutex>
|
||||
|
||||
#define IS_ECNTT std::is_same_v<E, curve_config::projective_t>
|
||||
#ifdef CURVE_ID
|
||||
#include "curves/curve_config.cuh"
|
||||
using namespace curve_config;
|
||||
#define IS_ECNTT std::is_same_v<E, projective_t>
|
||||
#else
|
||||
#define IS_ECNTT false
|
||||
#endif
|
||||
|
||||
namespace ntt {
|
||||
|
||||
@@ -404,6 +412,9 @@ namespace ntt {
|
||||
template <typename U>
|
||||
friend cudaError_t ReleaseDomain(device_context::DeviceContext& ctx);
|
||||
|
||||
template <typename U>
|
||||
friend U GetRootOfUnity<U>(uint64_t logn, device_context::DeviceContext& ctx);
|
||||
|
||||
template <typename U, typename E>
|
||||
friend cudaError_t NTT<U, E>(const E* input, int size, NTTDir dir, NTTConfig<U>& config, E* output);
|
||||
};
|
||||
@@ -528,6 +539,22 @@ namespace ntt {
|
||||
return CHK_LAST();
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
S GetRootOfUnity(uint64_t logn, device_context::DeviceContext& ctx)
|
||||
{
|
||||
Domain<S>& domain = domains_for_devices<S>[ctx.device_id];
|
||||
if (logn > domain.max_log_size) {
|
||||
std::ostringstream oss;
|
||||
oss << "NTT log_size=" << logn
|
||||
<< " is too large for the domain. Consider generating your domain with a higher order root of unity.\n";
|
||||
THROW_ICICLE_ERR(IcicleError_t::InvalidArgument, oss.str().c_str());
|
||||
}
|
||||
const size_t twiddles_idx = 1ULL << (domain.max_log_size - logn);
|
||||
return domain.twiddles[twiddles_idx];
|
||||
}
|
||||
// explicit instantiation to avoid having to include this file
|
||||
template scalar_t GetRootOfUnity(uint64_t logn, device_context::DeviceContext& ctx);
|
||||
|
||||
template <typename S>
|
||||
static bool is_choosing_radix2_algorithm(int logn, int batch_size, const NTTConfig<S>& config)
|
||||
{
|
||||
@@ -588,7 +615,6 @@ namespace ntt {
|
||||
break;
|
||||
case Ordering::kRN:
|
||||
case Ordering::kMN:
|
||||
dit = true;
|
||||
reverse_input = false;
|
||||
}
|
||||
|
||||
@@ -706,9 +732,8 @@ namespace ntt {
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
NTTConfig<S> DefaultNTTConfig()
|
||||
NTTConfig<S> DefaultNTTConfig(const device_context::DeviceContext& ctx)
|
||||
{
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
NTTConfig<S> config = {
|
||||
ctx, // ctx
|
||||
S::one(), // coset_gen
|
||||
@@ -722,63 +747,6 @@ namespace ntt {
|
||||
};
|
||||
return config;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [InitDomain](@ref InitDomain) function with the following
|
||||
* value of template parameter (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [scalar field](@ref scalar_t) of the curve;
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, InitializeDomain)(
|
||||
curve_config::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode)
|
||||
{
|
||||
return InitDomain(*primitive_root, ctx, fast_twiddles_mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [NTT](@ref NTT) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` and `E` are both the [scalar field](@ref scalar_t) of the curve;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, NTTCuda)(
|
||||
const curve_config::scalar_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<curve_config::scalar_t>& config,
|
||||
curve_config::scalar_t* output)
|
||||
{
|
||||
return NTT<curve_config::scalar_t, curve_config::scalar_t>(input, size, dir, config, output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extern "C" version of [ReleaseDomain](@ref ReleaseDomain) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [scalar field](@ref scalar_t) of the curve;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, ReleaseDomain)(device_context::DeviceContext& ctx)
|
||||
{
|
||||
return ReleaseDomain<curve_config::scalar_t>(ctx);
|
||||
}
|
||||
|
||||
#if defined(ECNTT_DEFINED)
|
||||
/**
|
||||
* Extern "C" version of [NTT](@ref NTT) function with the following values of template parameters
|
||||
* (where the curve is given by `-DCURVE` env variable during build):
|
||||
* - `S` is the [projective representation](@ref projective_t) of the curve (i.e. EC NTT is computed);
|
||||
* - `E` is the [scalar field](@ref scalar_t) of the curve;
|
||||
* @return `cudaSuccess` if the execution was successful and an error code otherwise.
|
||||
*/
|
||||
extern "C" cudaError_t CONCAT_EXPAND(CURVE, ECNTTCuda)(
|
||||
const curve_config::projective_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<curve_config::scalar_t>& config,
|
||||
curve_config::projective_t* output)
|
||||
{
|
||||
return NTT<curve_config::scalar_t, curve_config::projective_t>(input, size, dir, config, output);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// explicit instantiation to avoid having to include this file
|
||||
template NTTConfig<scalar_t> DefaultNTTConfig(const device_context::DeviceContext& ctx);
|
||||
} // namespace ntt
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user