mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-08 04:34:02 -05:00
Feat/m31 (#547)
This PR adds support of the m31 Field --------- Co-authored-by: Jeremy Felder <jeremy.felder1@gmail.com>
This commit is contained in:
2
.github/workflows/cpp_cuda.yml
vendored
2
.github/workflows/cpp_cuda.yml
vendored
@@ -73,6 +73,8 @@ jobs:
|
|||||||
build_args: -DEXT_FIELD=ON
|
build_args: -DEXT_FIELD=ON
|
||||||
- name: stark252
|
- name: stark252
|
||||||
build_args: -DEXT_FIELD=OFF
|
build_args: -DEXT_FIELD=OFF
|
||||||
|
- name: m31
|
||||||
|
build_args: -DEXT_FIELD=ON
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout Repo
|
- name: Checkout Repo
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
|
|||||||
11
.github/workflows/rust.yml
vendored
11
.github/workflows/rust.yml
vendored
@@ -62,8 +62,8 @@ jobs:
|
|||||||
# We need to limit the number of threads to avoid running out of memory on weaker machines
|
# We need to limit the number of threads to avoid running out of memory on weaker machines
|
||||||
# ignored tests are polynomial tests. Since they conflict with NTT tests, they are executed separately
|
# ignored tests are polynomial tests. Since they conflict with NTT tests, they are executed separately
|
||||||
run: |
|
run: |
|
||||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2 --ignored
|
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2 --ignored
|
||||||
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --release --verbose --features=g2 -- --test-threads=2
|
cargo test --workspace --exclude icicle-babybear --exclude icicle-stark252 --exclude icicle-m31 --release --verbose --features=g2 -- --test-threads=2
|
||||||
|
|
||||||
- name: Run baby bear tests
|
- name: Run baby bear tests
|
||||||
working-directory: ./wrappers/rust/icicle-fields/icicle-babybear
|
working-directory: ./wrappers/rust/icicle-fields/icicle-babybear
|
||||||
@@ -79,6 +79,13 @@ jobs:
|
|||||||
cargo test --release --verbose -- --ignored
|
cargo test --release --verbose -- --ignored
|
||||||
cargo test --release --verbose
|
cargo test --release --verbose
|
||||||
|
|
||||||
|
- name: Run m31 tests
|
||||||
|
working-directory: ./wrappers/rust/icicle-fields/icicle-m31
|
||||||
|
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||||
|
run: |
|
||||||
|
cargo test --release --verbose -- --ignored
|
||||||
|
cargo test --release --verbose
|
||||||
|
|
||||||
# build-windows:
|
# build-windows:
|
||||||
# name: Build on Windows
|
# name: Build on Windows
|
||||||
# runs-on: windows-2022
|
# runs-on: windows-2022
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
function(check_field)
|
function(check_field)
|
||||||
set(SUPPORTED_FIELDS babybear;stark252)
|
set(SUPPORTED_FIELDS babybear;stark252;m31)
|
||||||
|
|
||||||
set(IS_FIELD_SUPPORTED FALSE)
|
set(IS_FIELD_SUPPORTED FALSE)
|
||||||
set(I 1000)
|
set(I 1000)
|
||||||
|
|||||||
@@ -19,6 +19,74 @@
|
|||||||
extern "C" cudaError_t babybear_extension_ntt_cuda(
|
extern "C" cudaError_t babybear_extension_ntt_cuda(
|
||||||
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
|
const babybear::extension_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::extension_t* output);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_initialize_domain(
|
||||||
|
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_ntt_cuda(
|
||||||
|
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_scalar_convert_montgomery(
|
||||||
|
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_extension_mul_cuda(
|
||||||
|
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_extension_add_cuda(
|
||||||
|
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_extension_accumulate_cuda(
|
||||||
|
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_extension_sub_cuda(
|
||||||
|
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
||||||
|
const babybear::extension_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
babybear::extension_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_extension_bit_reverse_cuda(
|
||||||
|
const babybear::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::extension_t* output);
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
|
||||||
|
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_mul_cuda(
|
||||||
|
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_add_cuda(
|
||||||
|
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_accumulate_cuda(
|
||||||
|
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_sub_cuda(
|
||||||
|
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_transpose_matrix_cuda(
|
||||||
|
const babybear::scalar_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
babybear::scalar_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t babybear_bit_reverse_cuda(
|
||||||
|
const babybear::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, babybear::scalar_t* output);
|
||||||
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
|
extern "C" cudaError_t babybear_create_poseidon2_constants_cuda(
|
||||||
int width,
|
int width,
|
||||||
int alpha,
|
int alpha,
|
||||||
@@ -50,67 +118,4 @@ extern "C" cudaError_t babybear_release_poseidon2_constants_cuda(
|
|||||||
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
|
poseidon2::Poseidon2Constants<babybear::scalar_t>* constants,
|
||||||
device_context::DeviceContext& ctx);
|
device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_mul_cuda(
|
|
||||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_add_cuda(
|
|
||||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_accumulate_cuda(
|
|
||||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_sub_cuda(
|
|
||||||
babybear::scalar_t* vec_a, babybear::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_transpose_matrix_cuda(
|
|
||||||
const babybear::scalar_t* input,
|
|
||||||
uint32_t row_size,
|
|
||||||
uint32_t column_size,
|
|
||||||
babybear::scalar_t* output,
|
|
||||||
device_context::DeviceContext& ctx,
|
|
||||||
bool on_device,
|
|
||||||
bool is_async);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_bit_reverse_cuda(
|
|
||||||
const babybear::scalar_t* input,
|
|
||||||
uint64_t n,
|
|
||||||
vec_ops::BitReverseConfig& config,
|
|
||||||
babybear::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_scalar_convert_montgomery(
|
|
||||||
babybear::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_initialize_domain(
|
|
||||||
babybear::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_ntt_cuda(
|
|
||||||
const babybear::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<babybear::scalar_t>& config, babybear::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_release_domain(device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_extension_scalar_convert_montgomery(
|
|
||||||
babybear::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_extension_mul_cuda(
|
|
||||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_extension_add_cuda(
|
|
||||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_extension_sub_cuda(
|
|
||||||
babybear::extension_t* vec_a, babybear::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, babybear::extension_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t babybear_extension_transpose_matrix_cuda(
|
|
||||||
const babybear::extension_t* input,
|
|
||||||
uint32_t row_size,
|
|
||||||
uint32_t column_size,
|
|
||||||
babybear::extension_t* output,
|
|
||||||
device_context::DeviceContext& ctx,
|
|
||||||
bool on_device,
|
|
||||||
bool is_async);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -16,6 +16,20 @@
|
|||||||
#include "poseidon/poseidon.cuh"
|
#include "poseidon/poseidon.cuh"
|
||||||
#include "poseidon/tree/merkle.cuh"
|
#include "poseidon/tree/merkle.cuh"
|
||||||
|
|
||||||
|
extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);
|
||||||
|
|
||||||
|
extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);
|
||||||
|
|
||||||
|
extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
|
||||||
|
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
|
||||||
|
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_g2_precompute_msm_bases_cuda(
|
extern "C" cudaError_t bls12_377_g2_precompute_msm_bases_cuda(
|
||||||
bls12_377::g2_affine_t* bases,
|
bls12_377::g2_affine_t* bases,
|
||||||
int msm_size,
|
int msm_size,
|
||||||
@@ -34,20 +48,6 @@ extern "C" cudaError_t bls12_377_precompute_msm_bases_cuda(
|
|||||||
extern "C" cudaError_t bls12_377_msm_cuda(
|
extern "C" cudaError_t bls12_377_msm_cuda(
|
||||||
const bls12_377::scalar_t* scalars, const bls12_377::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_377::projective_t* out);
|
const bls12_377::scalar_t* scalars, const bls12_377::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_377::projective_t* out);
|
||||||
|
|
||||||
extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);
|
|
||||||
|
|
||||||
extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);
|
|
||||||
|
|
||||||
extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_g2_affine_convert_montgomery(
|
|
||||||
bls12_377::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_g2_projective_convert_montgomery(
|
|
||||||
bls12_377::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_ecntt_cuda(
|
extern "C" cudaError_t bls12_377_ecntt_cuda(
|
||||||
const bls12_377::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::projective_t* output);
|
const bls12_377::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::projective_t* output);
|
||||||
|
|
||||||
@@ -65,6 +65,44 @@ extern "C" cudaError_t bls12_377_affine_convert_montgomery(
|
|||||||
extern "C" cudaError_t bls12_377_projective_convert_montgomery(
|
extern "C" cudaError_t bls12_377_projective_convert_montgomery(
|
||||||
bls12_377::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
bls12_377::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_initialize_domain(
|
||||||
|
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_ntt_cuda(
|
||||||
|
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
|
||||||
|
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_mul_cuda(
|
||||||
|
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_add_cuda(
|
||||||
|
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_accumulate_cuda(
|
||||||
|
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_sub_cuda(
|
||||||
|
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_transpose_matrix_cuda(
|
||||||
|
const bls12_377::scalar_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
bls12_377::scalar_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_377_bit_reverse_cuda(
|
||||||
|
const bls12_377::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_377::scalar_t* output);
|
||||||
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
|
extern "C" cudaError_t bls12_377_create_optimized_poseidon_constants_cuda(
|
||||||
int arity,
|
int arity,
|
||||||
int full_rounds_half,
|
int full_rounds_half,
|
||||||
@@ -92,44 +130,4 @@ extern "C" cudaError_t bls12_377_build_poseidon_merkle_tree(
|
|||||||
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
poseidon::PoseidonConstants<bls12_377::scalar_t>& constants,
|
||||||
merkle::TreeBuilderConfig& config);
|
merkle::TreeBuilderConfig& config);
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_mul_cuda(
|
|
||||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_add_cuda(
|
|
||||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_accumulate_cuda(
|
|
||||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_sub_cuda(
|
|
||||||
bls12_377::scalar_t* vec_a, bls12_377::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_377::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_transpose_matrix_cuda(
|
|
||||||
const bls12_377::scalar_t* input,
|
|
||||||
uint32_t row_size,
|
|
||||||
uint32_t column_size,
|
|
||||||
bls12_377::scalar_t* output,
|
|
||||||
device_context::DeviceContext& ctx,
|
|
||||||
bool on_device,
|
|
||||||
bool is_async);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_bit_reverse_cuda(
|
|
||||||
const bls12_377::scalar_t* input,
|
|
||||||
uint64_t n,
|
|
||||||
vec_ops::BitReverseConfig& config,
|
|
||||||
bls12_377::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_scalar_convert_montgomery(
|
|
||||||
bls12_377::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_initialize_domain(
|
|
||||||
bls12_377::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_ntt_cuda(
|
|
||||||
const bls12_377::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_377::scalar_t>& config, bls12_377::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_377_release_domain(device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -16,6 +16,20 @@
|
|||||||
#include "poseidon/poseidon.cuh"
|
#include "poseidon/poseidon.cuh"
|
||||||
#include "poseidon/tree/merkle.cuh"
|
#include "poseidon/tree/merkle.cuh"
|
||||||
|
|
||||||
|
extern "C" bool bls12_381_g2_eq(bls12_381::g2_projective_t* point1, bls12_381::g2_projective_t* point2);
|
||||||
|
|
||||||
|
extern "C" void bls12_381_g2_to_affine(bls12_381::g2_projective_t* point, bls12_381::g2_affine_t* point_out);
|
||||||
|
|
||||||
|
extern "C" void bls12_381_g2_generate_projective_points(bls12_381::g2_projective_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" void bls12_381_g2_generate_affine_points(bls12_381::g2_affine_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_g2_affine_convert_montgomery(
|
||||||
|
bls12_381::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_g2_projective_convert_montgomery(
|
||||||
|
bls12_381::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_g2_precompute_msm_bases_cuda(
|
extern "C" cudaError_t bls12_381_g2_precompute_msm_bases_cuda(
|
||||||
bls12_381::g2_affine_t* bases,
|
bls12_381::g2_affine_t* bases,
|
||||||
int msm_size,
|
int msm_size,
|
||||||
@@ -34,20 +48,6 @@ extern "C" cudaError_t bls12_381_precompute_msm_bases_cuda(
|
|||||||
extern "C" cudaError_t bls12_381_msm_cuda(
|
extern "C" cudaError_t bls12_381_msm_cuda(
|
||||||
const bls12_381::scalar_t* scalars, const bls12_381::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_381::projective_t* out);
|
const bls12_381::scalar_t* scalars, const bls12_381::affine_t* points, int msm_size, msm::MSMConfig& config, bls12_381::projective_t* out);
|
||||||
|
|
||||||
extern "C" bool bls12_381_g2_eq(bls12_381::g2_projective_t* point1, bls12_381::g2_projective_t* point2);
|
|
||||||
|
|
||||||
extern "C" void bls12_381_g2_to_affine(bls12_381::g2_projective_t* point, bls12_381::g2_affine_t* point_out);
|
|
||||||
|
|
||||||
extern "C" void bls12_381_g2_generate_projective_points(bls12_381::g2_projective_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" void bls12_381_g2_generate_affine_points(bls12_381::g2_affine_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_g2_affine_convert_montgomery(
|
|
||||||
bls12_381::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_g2_projective_convert_montgomery(
|
|
||||||
bls12_381::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_ecntt_cuda(
|
extern "C" cudaError_t bls12_381_ecntt_cuda(
|
||||||
const bls12_381::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::projective_t* output);
|
const bls12_381::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::projective_t* output);
|
||||||
|
|
||||||
@@ -65,6 +65,44 @@ extern "C" cudaError_t bls12_381_affine_convert_montgomery(
|
|||||||
extern "C" cudaError_t bls12_381_projective_convert_montgomery(
|
extern "C" cudaError_t bls12_381_projective_convert_montgomery(
|
||||||
bls12_381::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
bls12_381::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_initialize_domain(
|
||||||
|
bls12_381::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_ntt_cuda(
|
||||||
|
const bls12_381::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::scalar_t* output);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_release_domain(device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_scalar_convert_montgomery(
|
||||||
|
bls12_381::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_mul_cuda(
|
||||||
|
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_add_cuda(
|
||||||
|
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_accumulate_cuda(
|
||||||
|
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_sub_cuda(
|
||||||
|
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_transpose_matrix_cuda(
|
||||||
|
const bls12_381::scalar_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
bls12_381::scalar_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bls12_381_bit_reverse_cuda(
|
||||||
|
const bls12_381::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bls12_381::scalar_t* output);
|
||||||
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_create_optimized_poseidon_constants_cuda(
|
extern "C" cudaError_t bls12_381_create_optimized_poseidon_constants_cuda(
|
||||||
int arity,
|
int arity,
|
||||||
int full_rounds_half,
|
int full_rounds_half,
|
||||||
@@ -92,44 +130,4 @@ extern "C" cudaError_t bls12_381_build_poseidon_merkle_tree(
|
|||||||
poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
poseidon::PoseidonConstants<bls12_381::scalar_t>& constants,
|
||||||
merkle::TreeBuilderConfig& config);
|
merkle::TreeBuilderConfig& config);
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_mul_cuda(
|
|
||||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_add_cuda(
|
|
||||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_accumulate_cuda(
|
|
||||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_sub_cuda(
|
|
||||||
bls12_381::scalar_t* vec_a, bls12_381::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bls12_381::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_transpose_matrix_cuda(
|
|
||||||
const bls12_381::scalar_t* input,
|
|
||||||
uint32_t row_size,
|
|
||||||
uint32_t column_size,
|
|
||||||
bls12_381::scalar_t* output,
|
|
||||||
device_context::DeviceContext& ctx,
|
|
||||||
bool on_device,
|
|
||||||
bool is_async);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_bit_reverse_cuda(
|
|
||||||
const bls12_381::scalar_t* input,
|
|
||||||
uint64_t n,
|
|
||||||
vec_ops::BitReverseConfig& config,
|
|
||||||
bls12_381::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_scalar_convert_montgomery(
|
|
||||||
bls12_381::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_initialize_domain(
|
|
||||||
bls12_381::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_ntt_cuda(
|
|
||||||
const bls12_381::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bls12_381::scalar_t>& config, bls12_381::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bls12_381_release_domain(device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -17,6 +17,20 @@
|
|||||||
#include "poseidon/tree/merkle.cuh"
|
#include "poseidon/tree/merkle.cuh"
|
||||||
#include "poseidon2/poseidon2.cuh"
|
#include "poseidon2/poseidon2.cuh"
|
||||||
|
|
||||||
|
extern "C" bool bn254_g2_eq(bn254::g2_projective_t* point1, bn254::g2_projective_t* point2);
|
||||||
|
|
||||||
|
extern "C" void bn254_g2_to_affine(bn254::g2_projective_t* point, bn254::g2_affine_t* point_out);
|
||||||
|
|
||||||
|
extern "C" void bn254_g2_generate_projective_points(bn254::g2_projective_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" void bn254_g2_generate_affine_points(bn254::g2_affine_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_g2_affine_convert_montgomery(
|
||||||
|
bn254::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_g2_projective_convert_montgomery(
|
||||||
|
bn254::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
|
extern "C" cudaError_t bn254_g2_precompute_msm_bases_cuda(
|
||||||
bn254::g2_affine_t* bases,
|
bn254::g2_affine_t* bases,
|
||||||
int msm_size,
|
int msm_size,
|
||||||
@@ -35,20 +49,6 @@ extern "C" cudaError_t bn254_precompute_msm_bases_cuda(
|
|||||||
extern "C" cudaError_t bn254_msm_cuda(
|
extern "C" cudaError_t bn254_msm_cuda(
|
||||||
const bn254::scalar_t* scalars, const bn254::affine_t* points, int msm_size, msm::MSMConfig& config, bn254::projective_t* out);
|
const bn254::scalar_t* scalars, const bn254::affine_t* points, int msm_size, msm::MSMConfig& config, bn254::projective_t* out);
|
||||||
|
|
||||||
extern "C" bool bn254_g2_eq(bn254::g2_projective_t* point1, bn254::g2_projective_t* point2);
|
|
||||||
|
|
||||||
extern "C" void bn254_g2_to_affine(bn254::g2_projective_t* point, bn254::g2_affine_t* point_out);
|
|
||||||
|
|
||||||
extern "C" void bn254_g2_generate_projective_points(bn254::g2_projective_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" void bn254_g2_generate_affine_points(bn254::g2_affine_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_g2_affine_convert_montgomery(
|
|
||||||
bn254::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_g2_projective_convert_montgomery(
|
|
||||||
bn254::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_ecntt_cuda(
|
extern "C" cudaError_t bn254_ecntt_cuda(
|
||||||
const bn254::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::projective_t* output);
|
const bn254::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::projective_t* output);
|
||||||
|
|
||||||
@@ -66,6 +66,44 @@ extern "C" cudaError_t bn254_affine_convert_montgomery(
|
|||||||
extern "C" cudaError_t bn254_projective_convert_montgomery(
|
extern "C" cudaError_t bn254_projective_convert_montgomery(
|
||||||
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
bn254::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_initialize_domain(
|
||||||
|
bn254::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_ntt_cuda(
|
||||||
|
const bn254::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_release_domain(device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_scalar_convert_montgomery(
|
||||||
|
bn254::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_mul_cuda(
|
||||||
|
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_add_cuda(
|
||||||
|
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_accumulate_cuda(
|
||||||
|
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_sub_cuda(
|
||||||
|
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_transpose_matrix_cuda(
|
||||||
|
const bn254::scalar_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
bn254::scalar_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bn254_bit_reverse_cuda(
|
||||||
|
const bn254::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bn254::scalar_t* output);
|
||||||
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
|
extern "C" cudaError_t bn254_create_poseidon2_constants_cuda(
|
||||||
int width,
|
int width,
|
||||||
int alpha,
|
int alpha,
|
||||||
@@ -124,44 +162,4 @@ extern "C" cudaError_t bn254_build_poseidon_merkle_tree(
|
|||||||
poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
poseidon::PoseidonConstants<bn254::scalar_t>& constants,
|
||||||
merkle::TreeBuilderConfig& config);
|
merkle::TreeBuilderConfig& config);
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_mul_cuda(
|
|
||||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_add_cuda(
|
|
||||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_accumulate_cuda(
|
|
||||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_sub_cuda(
|
|
||||||
bn254::scalar_t* vec_a, bn254::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bn254::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_transpose_matrix_cuda(
|
|
||||||
const bn254::scalar_t* input,
|
|
||||||
uint32_t row_size,
|
|
||||||
uint32_t column_size,
|
|
||||||
bn254::scalar_t* output,
|
|
||||||
device_context::DeviceContext& ctx,
|
|
||||||
bool on_device,
|
|
||||||
bool is_async);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_bit_reverse_cuda(
|
|
||||||
const bn254::scalar_t* input,
|
|
||||||
uint64_t n,
|
|
||||||
vec_ops::BitReverseConfig& config,
|
|
||||||
bn254::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_scalar_convert_montgomery(
|
|
||||||
bn254::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_initialize_domain(
|
|
||||||
bn254::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_ntt_cuda(
|
|
||||||
const bn254::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bn254_release_domain(device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -16,6 +16,20 @@
|
|||||||
#include "poseidon/poseidon.cuh"
|
#include "poseidon/poseidon.cuh"
|
||||||
#include "poseidon/tree/merkle.cuh"
|
#include "poseidon/tree/merkle.cuh"
|
||||||
|
|
||||||
|
extern "C" bool bw6_761_g2_eq(bw6_761::g2_projective_t* point1, bw6_761::g2_projective_t* point2);
|
||||||
|
|
||||||
|
extern "C" void bw6_761_g2_to_affine(bw6_761::g2_projective_t* point, bw6_761::g2_affine_t* point_out);
|
||||||
|
|
||||||
|
extern "C" void bw6_761_g2_generate_projective_points(bw6_761::g2_projective_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" void bw6_761_g2_generate_affine_points(bw6_761::g2_affine_t* points, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_g2_affine_convert_montgomery(
|
||||||
|
bw6_761::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_g2_projective_convert_montgomery(
|
||||||
|
bw6_761::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_g2_precompute_msm_bases_cuda(
|
extern "C" cudaError_t bw6_761_g2_precompute_msm_bases_cuda(
|
||||||
bw6_761::g2_affine_t* bases,
|
bw6_761::g2_affine_t* bases,
|
||||||
int msm_size,
|
int msm_size,
|
||||||
@@ -34,20 +48,6 @@ extern "C" cudaError_t bw6_761_precompute_msm_bases_cuda(
|
|||||||
extern "C" cudaError_t bw6_761_msm_cuda(
|
extern "C" cudaError_t bw6_761_msm_cuda(
|
||||||
const bw6_761::scalar_t* scalars, const bw6_761::affine_t* points, int msm_size, msm::MSMConfig& config, bw6_761::projective_t* out);
|
const bw6_761::scalar_t* scalars, const bw6_761::affine_t* points, int msm_size, msm::MSMConfig& config, bw6_761::projective_t* out);
|
||||||
|
|
||||||
extern "C" bool bw6_761_g2_eq(bw6_761::g2_projective_t* point1, bw6_761::g2_projective_t* point2);
|
|
||||||
|
|
||||||
extern "C" void bw6_761_g2_to_affine(bw6_761::g2_projective_t* point, bw6_761::g2_affine_t* point_out);
|
|
||||||
|
|
||||||
extern "C" void bw6_761_g2_generate_projective_points(bw6_761::g2_projective_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" void bw6_761_g2_generate_affine_points(bw6_761::g2_affine_t* points, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_g2_affine_convert_montgomery(
|
|
||||||
bw6_761::g2_affine_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_g2_projective_convert_montgomery(
|
|
||||||
bw6_761::g2_projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_ecntt_cuda(
|
extern "C" cudaError_t bw6_761_ecntt_cuda(
|
||||||
const bw6_761::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::projective_t* output);
|
const bw6_761::projective_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::projective_t* output);
|
||||||
|
|
||||||
@@ -65,6 +65,44 @@ extern "C" cudaError_t bw6_761_affine_convert_montgomery(
|
|||||||
extern "C" cudaError_t bw6_761_projective_convert_montgomery(
|
extern "C" cudaError_t bw6_761_projective_convert_montgomery(
|
||||||
bw6_761::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
bw6_761::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_initialize_domain(
|
||||||
|
bw6_761::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_ntt_cuda(
|
||||||
|
const bw6_761::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::scalar_t* output);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_release_domain(device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_scalar_convert_montgomery(
|
||||||
|
bw6_761::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_mul_cuda(
|
||||||
|
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_add_cuda(
|
||||||
|
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_accumulate_cuda(
|
||||||
|
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_sub_cuda(
|
||||||
|
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_transpose_matrix_cuda(
|
||||||
|
const bw6_761::scalar_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
bw6_761::scalar_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t bw6_761_bit_reverse_cuda(
|
||||||
|
const bw6_761::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, bw6_761::scalar_t* output);
|
||||||
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_create_optimized_poseidon_constants_cuda(
|
extern "C" cudaError_t bw6_761_create_optimized_poseidon_constants_cuda(
|
||||||
int arity,
|
int arity,
|
||||||
int full_rounds_half,
|
int full_rounds_half,
|
||||||
@@ -92,44 +130,4 @@ extern "C" cudaError_t bw6_761_build_poseidon_merkle_tree(
|
|||||||
poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
poseidon::PoseidonConstants<bw6_761::scalar_t>& constants,
|
||||||
merkle::TreeBuilderConfig& config);
|
merkle::TreeBuilderConfig& config);
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_mul_cuda(
|
|
||||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_add_cuda(
|
|
||||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_accumulate_cuda(
|
|
||||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_sub_cuda(
|
|
||||||
bw6_761::scalar_t* vec_a, bw6_761::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, bw6_761::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_transpose_matrix_cuda(
|
|
||||||
const bw6_761::scalar_t* input,
|
|
||||||
uint32_t row_size,
|
|
||||||
uint32_t column_size,
|
|
||||||
bw6_761::scalar_t* output,
|
|
||||||
device_context::DeviceContext& ctx,
|
|
||||||
bool on_device,
|
|
||||||
bool is_async);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_bit_reverse_cuda(
|
|
||||||
const bw6_761::scalar_t* input,
|
|
||||||
uint64_t n,
|
|
||||||
vec_ops::BitReverseConfig& config,
|
|
||||||
bw6_761::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_scalar_convert_montgomery(
|
|
||||||
bw6_761::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_initialize_domain(
|
|
||||||
bw6_761::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_ntt_cuda(
|
|
||||||
const bw6_761::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<bw6_761::scalar_t>& config, bw6_761::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" cudaError_t bw6_761_release_domain(device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -38,6 +38,36 @@ extern "C" cudaError_t grumpkin_affine_convert_montgomery(
|
|||||||
extern "C" cudaError_t grumpkin_projective_convert_montgomery(
|
extern "C" cudaError_t grumpkin_projective_convert_montgomery(
|
||||||
grumpkin::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
grumpkin::projective_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t grumpkin_scalar_convert_montgomery(
|
||||||
|
grumpkin::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t grumpkin_mul_cuda(
|
||||||
|
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t grumpkin_add_cuda(
|
||||||
|
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t grumpkin_accumulate_cuda(
|
||||||
|
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t grumpkin_sub_cuda(
|
||||||
|
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t grumpkin_transpose_matrix_cuda(
|
||||||
|
const grumpkin::scalar_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
grumpkin::scalar_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t grumpkin_bit_reverse_cuda(
|
||||||
|
const grumpkin::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, grumpkin::scalar_t* output);
|
||||||
|
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_create_optimized_poseidon_constants_cuda(
|
extern "C" cudaError_t grumpkin_create_optimized_poseidon_constants_cuda(
|
||||||
int arity,
|
int arity,
|
||||||
int full_rounds_half,
|
int full_rounds_half,
|
||||||
@@ -65,36 +95,4 @@ extern "C" cudaError_t grumpkin_build_poseidon_merkle_tree(
|
|||||||
poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
poseidon::PoseidonConstants<grumpkin::scalar_t>& constants,
|
||||||
merkle::TreeBuilderConfig& config);
|
merkle::TreeBuilderConfig& config);
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_mul_cuda(
|
|
||||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_add_cuda(
|
|
||||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_accumulate_cuda(
|
|
||||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_sub_cuda(
|
|
||||||
grumpkin::scalar_t* vec_a, grumpkin::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, grumpkin::scalar_t* result);
|
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_transpose_matrix_cuda(
|
|
||||||
const grumpkin::scalar_t* input,
|
|
||||||
uint32_t row_size,
|
|
||||||
uint32_t column_size,
|
|
||||||
grumpkin::scalar_t* output,
|
|
||||||
device_context::DeviceContext& ctx,
|
|
||||||
bool on_device,
|
|
||||||
bool is_async);
|
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_bit_reverse_cuda(
|
|
||||||
const grumpkin::scalar_t* input,
|
|
||||||
uint64_t n,
|
|
||||||
vec_ops::BitReverseConfig& config,
|
|
||||||
grumpkin::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t grumpkin_scalar_convert_montgomery(
|
|
||||||
grumpkin::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
75
icicle/include/api/m31.h
Normal file
75
icicle/include/api/m31.h
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
// WARNING: This file is auto-generated by a script.
|
||||||
|
// Any changes made to this file may be overwritten.
|
||||||
|
// Please modify the code generation script instead.
|
||||||
|
// Path to the code generation script: scripts/gen_c_api.py
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#ifndef M31_API_H
|
||||||
|
#define M31_API_H
|
||||||
|
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include "gpu-utils/device_context.cuh"
|
||||||
|
#include "fields/stark_fields/m31.cuh"
|
||||||
|
#include "vec_ops/vec_ops.cuh"
|
||||||
|
|
||||||
|
extern "C" void m31_generate_scalars(m31::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_scalar_convert_montgomery(
|
||||||
|
m31::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_extension_mul_cuda(
|
||||||
|
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_extension_add_cuda(
|
||||||
|
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_extension_accumulate_cuda(
|
||||||
|
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_extension_sub_cuda(
|
||||||
|
m31::extension_t* vec_a, m31::extension_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::extension_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_extension_transpose_matrix_cuda(
|
||||||
|
const m31::extension_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
m31::extension_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_extension_bit_reverse_cuda(
|
||||||
|
const m31::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::extension_t* output);
|
||||||
|
|
||||||
|
|
||||||
|
extern "C" void m31_extension_generate_scalars(m31::extension_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_extension_scalar_convert_montgomery(
|
||||||
|
m31::extension_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_mul_cuda(
|
||||||
|
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_add_cuda(
|
||||||
|
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_accumulate_cuda(
|
||||||
|
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_sub_cuda(
|
||||||
|
m31::scalar_t* vec_a, m31::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, m31::scalar_t* result);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_transpose_matrix_cuda(
|
||||||
|
const m31::scalar_t* input,
|
||||||
|
uint32_t row_size,
|
||||||
|
uint32_t column_size,
|
||||||
|
m31::scalar_t* output,
|
||||||
|
device_context::DeviceContext& ctx,
|
||||||
|
bool on_device,
|
||||||
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t m31_bit_reverse_cuda(
|
||||||
|
const m31::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, m31::scalar_t* output);
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
@@ -13,6 +13,19 @@
|
|||||||
#include "ntt/ntt.cuh"
|
#include "ntt/ntt.cuh"
|
||||||
#include "vec_ops/vec_ops.cuh"
|
#include "vec_ops/vec_ops.cuh"
|
||||||
|
|
||||||
|
extern "C" cudaError_t stark252_initialize_domain(
|
||||||
|
stark252::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
||||||
|
|
||||||
|
extern "C" cudaError_t stark252_ntt_cuda(
|
||||||
|
const stark252::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<stark252::scalar_t>& config, stark252::scalar_t* output);
|
||||||
|
|
||||||
|
extern "C" cudaError_t stark252_release_domain(device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
|
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||||
|
|
||||||
|
extern "C" cudaError_t stark252_scalar_convert_montgomery(
|
||||||
|
stark252::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
||||||
|
|
||||||
extern "C" cudaError_t stark252_mul_cuda(
|
extern "C" cudaError_t stark252_mul_cuda(
|
||||||
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
stark252::scalar_t* vec_a, stark252::scalar_t* vec_b, int n, vec_ops::VecOpsConfig& config, stark252::scalar_t* result);
|
||||||
|
|
||||||
@@ -35,22 +48,7 @@ extern "C" cudaError_t stark252_transpose_matrix_cuda(
|
|||||||
bool is_async);
|
bool is_async);
|
||||||
|
|
||||||
extern "C" cudaError_t stark252_bit_reverse_cuda(
|
extern "C" cudaError_t stark252_bit_reverse_cuda(
|
||||||
const stark252::scalar_t* input,
|
const stark252::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, stark252::scalar_t* output);
|
||||||
uint64_t n,
|
|
||||||
vec_ops::BitReverseConfig& config,
|
|
||||||
stark252::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
|
||||||
|
|
||||||
extern "C" cudaError_t stark252_scalar_convert_montgomery(
|
|
||||||
stark252::scalar_t* d_inout, size_t n, bool is_into, device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
extern "C" cudaError_t stark252_initialize_domain(
|
|
||||||
stark252::scalar_t* primitive_root, device_context::DeviceContext& ctx, bool fast_twiddles_mode);
|
|
||||||
|
|
||||||
extern "C" cudaError_t stark252_ntt_cuda(
|
|
||||||
const stark252::scalar_t* input, int size, ntt::NTTDir dir, ntt::NTTConfig<stark252::scalar_t>& config, stark252::scalar_t* output);
|
|
||||||
|
|
||||||
extern "C" cudaError_t stark252_release_domain(device_context::DeviceContext& ctx);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -17,4 +17,7 @@ extern "C" cudaError_t ${FIELD}_transpose_matrix_cuda(
|
|||||||
${FIELD}::scalar_t* output,
|
${FIELD}::scalar_t* output,
|
||||||
device_context::DeviceContext& ctx,
|
device_context::DeviceContext& ctx,
|
||||||
bool on_device,
|
bool on_device,
|
||||||
bool is_async);
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t ${FIELD}_bit_reverse_cuda(
|
||||||
|
const ${FIELD}::scalar_t* input, uint64_t n, vec_ops::BitReverseConfig& config, ${FIELD}::scalar_t* output);
|
||||||
|
|||||||
@@ -17,4 +17,7 @@ extern "C" cudaError_t ${FIELD}_extension_transpose_matrix_cuda(
|
|||||||
${FIELD}::extension_t* output,
|
${FIELD}::extension_t* output,
|
||||||
device_context::DeviceContext& ctx,
|
device_context::DeviceContext& ctx,
|
||||||
bool on_device,
|
bool on_device,
|
||||||
bool is_async);
|
bool is_async);
|
||||||
|
|
||||||
|
extern "C" cudaError_t ${FIELD}_extension_bit_reverse_cuda(
|
||||||
|
const ${FIELD}::extension_t* input, uint64_t n, vec_ops::BitReverseConfig& config, ${FIELD}::extension_t* output);
|
||||||
|
|||||||
@@ -22,7 +22,7 @@
|
|||||||
typedef Affine<point_field_t> affine_t;
|
typedef Affine<point_field_t> affine_t;
|
||||||
|
|
||||||
#define G2_CURVE_DEFINITIONS \
|
#define G2_CURVE_DEFINITIONS \
|
||||||
typedef ExtensionField<fq_config> g2_point_field_t; \
|
typedef ExtensionField<fq_config, point_field_t> g2_point_field_t; \
|
||||||
static constexpr g2_point_field_t g2_generator_x = \
|
static constexpr g2_point_field_t g2_generator_x = \
|
||||||
g2_point_field_t{point_field_t{g2_gen_x_re}, point_field_t{g2_gen_x_im}}; \
|
g2_point_field_t{point_field_t{g2_gen_x_re}, point_field_t{g2_gen_x_im}}; \
|
||||||
static constexpr g2_point_field_t g2_generator_y = \
|
static constexpr g2_point_field_t g2_generator_y = \
|
||||||
|
|||||||
@@ -33,6 +33,9 @@ namespace field_config = babybear;
|
|||||||
#elif FIELD_ID == STARK_252
|
#elif FIELD_ID == STARK_252
|
||||||
#include "fields/stark_fields/stark252.cuh"
|
#include "fields/stark_fields/stark252.cuh"
|
||||||
namespace field_config = stark252;
|
namespace field_config = stark252;
|
||||||
|
#elif FIELD_ID == M31
|
||||||
|
#include "fields/stark_fields/m31.cuh"
|
||||||
|
namespace field_config = m31;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -10,5 +10,6 @@
|
|||||||
|
|
||||||
#define BABY_BEAR 1001
|
#define BABY_BEAR 1001
|
||||||
#define STARK_252 1002
|
#define STARK_252 1002
|
||||||
|
#define M31 1003
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
@@ -4,13 +4,13 @@
|
|||||||
#include "gpu-utils/modifiers.cuh"
|
#include "gpu-utils/modifiers.cuh"
|
||||||
#include "gpu-utils/sharedmem.cuh"
|
#include "gpu-utils/sharedmem.cuh"
|
||||||
|
|
||||||
template <typename CONFIG>
|
template <typename CONFIG, class T>
|
||||||
class ExtensionField
|
class ExtensionField
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
friend Field<CONFIG>;
|
friend T;
|
||||||
|
|
||||||
typedef typename Field<CONFIG>::Wide FWide;
|
typedef typename T::Wide FWide;
|
||||||
|
|
||||||
struct ExtensionWide {
|
struct ExtensionWide {
|
||||||
FWide real;
|
FWide real;
|
||||||
@@ -28,7 +28,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef Field<CONFIG> FF;
|
typedef T FF;
|
||||||
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
static constexpr unsigned TLC = 2 * CONFIG::limbs_count;
|
||||||
|
|
||||||
FF real;
|
FF real;
|
||||||
@@ -196,11 +196,11 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class CONFIG>
|
template <typename CONFIG, class T>
|
||||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
struct SharedMemory<ExtensionField<CONFIG, T>> {
|
||||||
__device__ ExtensionField<CONFIG>* getPointer()
|
__device__ ExtensionField<CONFIG, T>* getPointer()
|
||||||
{
|
{
|
||||||
extern __shared__ ExtensionField<CONFIG> s_ext2_scalar_[];
|
extern __shared__ ExtensionField<CONFIG, T> s_ext2_scalar_[];
|
||||||
return s_ext2_scalar_;
|
return s_ext2_scalar_;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -4,11 +4,11 @@
|
|||||||
#include "gpu-utils/modifiers.cuh"
|
#include "gpu-utils/modifiers.cuh"
|
||||||
#include "gpu-utils/sharedmem.cuh"
|
#include "gpu-utils/sharedmem.cuh"
|
||||||
|
|
||||||
template <typename CONFIG>
|
template <typename CONFIG, class T>
|
||||||
class ExtensionField
|
class ExtensionField
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
typedef typename Field<CONFIG>::Wide FWide;
|
typedef typename T::Wide FWide;
|
||||||
|
|
||||||
struct ExtensionWide {
|
struct ExtensionWide {
|
||||||
FWide real;
|
FWide real;
|
||||||
@@ -28,7 +28,7 @@ private:
|
|||||||
};
|
};
|
||||||
|
|
||||||
public:
|
public:
|
||||||
typedef Field<CONFIG> FF;
|
typedef T FF;
|
||||||
static constexpr unsigned TLC = 4 * CONFIG::limbs_count;
|
static constexpr unsigned TLC = 4 * CONFIG::limbs_count;
|
||||||
|
|
||||||
FF real;
|
FF real;
|
||||||
@@ -49,15 +49,14 @@ public:
|
|||||||
static constexpr HOST_DEVICE_INLINE ExtensionField to_montgomery(const ExtensionField& xs)
|
static constexpr HOST_DEVICE_INLINE ExtensionField to_montgomery(const ExtensionField& xs)
|
||||||
{
|
{
|
||||||
return ExtensionField{
|
return ExtensionField{
|
||||||
xs.real * FF{CONFIG::montgomery_r}, xs.im1 * FF{CONFIG::montgomery_r}, xs.im2 * FF{CONFIG::montgomery_r},
|
FF::to_montgomery(xs.real), FF::to_montgomery(xs.im1), FF::to_montgomery(xs.im2), FF::to_montgomery(xs.im3)};
|
||||||
xs.im3 * FF{CONFIG::montgomery_r}};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr HOST_DEVICE_INLINE ExtensionField from_montgomery(const ExtensionField& xs)
|
static constexpr HOST_DEVICE_INLINE ExtensionField from_montgomery(const ExtensionField& xs)
|
||||||
{
|
{
|
||||||
return ExtensionField{
|
return ExtensionField{
|
||||||
xs.real * FF{CONFIG::montgomery_r_inv}, xs.im1 * FF{CONFIG::montgomery_r_inv},
|
FF::from_montgomery(xs.real), FF::from_montgomery(xs.im1), FF::from_montgomery(xs.im2),
|
||||||
xs.im2 * FF{CONFIG::montgomery_r_inv}, xs.im3 * FF{CONFIG::montgomery_r_inv}};
|
FF::from_montgomery(xs.im3)};
|
||||||
}
|
}
|
||||||
|
|
||||||
static HOST_INLINE ExtensionField rand_host()
|
static HOST_INLINE ExtensionField rand_host()
|
||||||
@@ -247,11 +246,11 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class CONFIG>
|
template <class CONFIG, class T>
|
||||||
struct SharedMemory<ExtensionField<CONFIG>> {
|
struct SharedMemory<ExtensionField<CONFIG, T>> {
|
||||||
__device__ ExtensionField<CONFIG>* getPointer()
|
__device__ ExtensionField<CONFIG, T>* getPointer()
|
||||||
{
|
{
|
||||||
extern __shared__ ExtensionField<CONFIG> s_ext4_scalar_[];
|
extern __shared__ ExtensionField<CONFIG, T> s_ext4_scalar_[];
|
||||||
return s_ext4_scalar_;
|
return s_ext4_scalar_;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -58,5 +58,5 @@ namespace babybear {
|
|||||||
/**
|
/**
|
||||||
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
||||||
*/
|
*/
|
||||||
typedef ExtensionField<fp_config> extension_t;
|
typedef ExtensionField<fp_config, scalar_t> extension_t;
|
||||||
} // namespace babybear
|
} // namespace babybear
|
||||||
|
|||||||
224
icicle/include/fields/stark_fields/m31.cuh
Normal file
224
icicle/include/fields/stark_fields/m31.cuh
Normal file
@@ -0,0 +1,224 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "fields/storage.cuh"
|
||||||
|
#include "fields/field.cuh"
|
||||||
|
#include "fields/quartic_extension.cuh"
|
||||||
|
|
||||||
|
namespace m31 {
|
||||||
|
template <class CONFIG>
|
||||||
|
class MersenneField : public Field<CONFIG>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
HOST_DEVICE_INLINE MersenneField(const MersenneField& other) : Field<CONFIG>(other) {}
|
||||||
|
HOST_DEVICE_INLINE MersenneField(const uint32_t& x = 0) : Field<CONFIG>({x}) {}
|
||||||
|
HOST_DEVICE_INLINE MersenneField(storage<CONFIG::limbs_count> x) : Field<CONFIG>{x} {}
|
||||||
|
HOST_DEVICE_INLINE MersenneField(const Field<CONFIG>& other) : Field<CONFIG>(other) {}
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField zero() { return MersenneField(CONFIG::zero.limbs[0]); }
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField one() { return MersenneField(CONFIG::one.limbs[0]); }
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField from(uint32_t value) { return MersenneField(value); }
|
||||||
|
|
||||||
|
static HOST_INLINE MersenneField rand_host() { return MersenneField(Field<CONFIG>::rand_host()); }
|
||||||
|
|
||||||
|
static void rand_host_many(MersenneField* out, int size)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < size; i++)
|
||||||
|
out[i] = rand_host();
|
||||||
|
}
|
||||||
|
|
||||||
|
HOST_DEVICE_INLINE MersenneField& operator=(const Field<CONFIG>& other)
|
||||||
|
{
|
||||||
|
if (this != &other) { Field<CONFIG>::operator=(other); }
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
HOST_DEVICE_INLINE uint32_t get_limb() const { return this->limbs_storage.limbs[0]; }
|
||||||
|
|
||||||
|
// The `Wide` struct represents a redundant 32-bit form of the Mersenne Field.
|
||||||
|
struct Wide {
|
||||||
|
uint32_t storage;
|
||||||
|
static constexpr HOST_DEVICE_INLINE Wide from_field(const MersenneField& xs)
|
||||||
|
{
|
||||||
|
Wide out{};
|
||||||
|
out.storage = xs.get_limb();
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
static constexpr HOST_DEVICE_INLINE Wide from_number(const uint32_t& xs)
|
||||||
|
{
|
||||||
|
Wide out{};
|
||||||
|
out.storage = xs;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
friend HOST_DEVICE_INLINE Wide operator+(Wide xs, const Wide& ys)
|
||||||
|
{
|
||||||
|
uint64_t tmp = (uint64_t)xs.storage + ys.storage; // max: 2^33 - 2 = 2^32(1) + (2^32 - 2)
|
||||||
|
tmp = ((tmp >> 32) << 1) + (uint32_t)(tmp); // 2(1)+(2^32-2) = 2^32(1)+(0)
|
||||||
|
return from_number((uint32_t)((tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1) + 0 = 2
|
||||||
|
}
|
||||||
|
friend HOST_DEVICE_INLINE Wide operator-(Wide xs, const Wide& ys)
|
||||||
|
{
|
||||||
|
uint64_t tmp = CONFIG::modulus_3 + xs.storage -
|
||||||
|
ys.storage; // max: 3(2^31-1) + 2^32-1 - 0 = 2^33 + 2^31-4 = 2^32(2) + (2^31-4)
|
||||||
|
return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(2)+(2^31-4) = 2^31
|
||||||
|
}
|
||||||
|
template <unsigned MODULUS_MULTIPLE = 1>
|
||||||
|
static constexpr HOST_DEVICE_INLINE Wide neg(const Wide& xs)
|
||||||
|
{
|
||||||
|
uint64_t tmp = CONFIG::modulus_3 - xs.storage; // max: 3(2^31-1) - 0 = 2^32(1) + (2^31 - 3)
|
||||||
|
return from_number(((uint32_t)(tmp >> 32) << 1) + (uint32_t)(tmp)); // max: 2(1)+(2^31-3) = 2^31 - 1
|
||||||
|
}
|
||||||
|
friend HOST_DEVICE_INLINE Wide operator*(Wide xs, const Wide& ys)
|
||||||
|
{
|
||||||
|
uint64_t t1 = (uint64_t)xs.storage * ys.storage; // max: 2^64 - 2^33+1 = 2^32(2^32 - 2) + 1
|
||||||
|
t1 = ((t1 >> 32) << 1) + (uint32_t)(t1); // max: 2(2^32 - 2) + 1 = 2^32(1) + (2^32 - 3)
|
||||||
|
return from_number((((uint32_t)(t1 >> 32)) << 1) + (uint32_t)(t1)); // max: 2(1) - (2^32 - 3) = 2^32 - 1
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField div2(const MersenneField& xs, const uint32_t& power = 1)
|
||||||
|
{
|
||||||
|
uint32_t t = xs.get_limb();
|
||||||
|
return MersenneField{{((t >> power) | (t << (31 - power))) & MersenneField::get_modulus().limbs[0]}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField neg(const MersenneField& xs)
|
||||||
|
{
|
||||||
|
uint32_t t = xs.get_limb();
|
||||||
|
return MersenneField{{t == 0 ? t : MersenneField::get_modulus().limbs[0] - t}};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned MODULUS_MULTIPLE = 1>
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField reduce(Wide xs)
|
||||||
|
{
|
||||||
|
const uint32_t modulus = MersenneField::get_modulus().limbs[0];
|
||||||
|
uint32_t tmp = (xs.storage >> 31) + (xs.storage & modulus); // max: 1 + 2^31-1 = 2^31
|
||||||
|
tmp = (xs.storage >> 31) + (xs.storage & modulus); // max: 1 + 0 = 1
|
||||||
|
return MersenneField{{tmp == modulus ? 0 : tmp}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField inverse(const MersenneField& x)
|
||||||
|
{
|
||||||
|
uint32_t xs = x.limbs_storage.limbs[0];
|
||||||
|
if (xs <= 1) return xs;
|
||||||
|
uint32_t a = 1, b = 0, y = xs, z = MersenneField::get_modulus().limbs[0], e, m = z;
|
||||||
|
while (1) {
|
||||||
|
#ifdef __CUDA_ARCH__
|
||||||
|
e = __ffs(y) - 1;
|
||||||
|
#else
|
||||||
|
e = __builtin_ctz(y);
|
||||||
|
#endif
|
||||||
|
y >>= e;
|
||||||
|
if (a >= m) {
|
||||||
|
a = (a & m) + (a >> 31);
|
||||||
|
if (a == m) a = 0;
|
||||||
|
}
|
||||||
|
a = ((a >> e) | (a << (31 - e))) & m;
|
||||||
|
if (y == 1) return a;
|
||||||
|
e = a + b;
|
||||||
|
b = a;
|
||||||
|
a = e;
|
||||||
|
e = y + z;
|
||||||
|
z = y;
|
||||||
|
y = e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
friend HOST_DEVICE_INLINE MersenneField operator+(MersenneField xs, const MersenneField& ys)
|
||||||
|
{
|
||||||
|
uint32_t m = MersenneField::get_modulus().limbs[0];
|
||||||
|
uint32_t t = xs.get_limb() + ys.get_limb();
|
||||||
|
if (t > m) t = (t & m) + (t >> 31);
|
||||||
|
if (t == m) t = 0;
|
||||||
|
return MersenneField{{t}};
|
||||||
|
}
|
||||||
|
|
||||||
|
friend HOST_DEVICE_INLINE MersenneField operator-(MersenneField xs, const MersenneField& ys)
|
||||||
|
{
|
||||||
|
return xs + neg(ys);
|
||||||
|
}
|
||||||
|
|
||||||
|
friend HOST_DEVICE_INLINE MersenneField operator*(MersenneField xs, const MersenneField& ys)
|
||||||
|
{
|
||||||
|
uint64_t x = (uint64_t)(xs.get_limb()) * ys.get_limb();
|
||||||
|
uint32_t t = ((x >> 31) + (x & MersenneField::get_modulus().limbs[0]));
|
||||||
|
uint32_t m = MersenneField::get_modulus().limbs[0];
|
||||||
|
if (t > m) t = (t & m) + (t >> 31);
|
||||||
|
if (t > m) t = (t & m) + (t >> 31);
|
||||||
|
if (t == m) t = 0;
|
||||||
|
return MersenneField{{t}};
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE Wide mul_wide(const MersenneField& xs, const MersenneField& ys)
|
||||||
|
{
|
||||||
|
return Wide::from_field(xs) * Wide::from_field(ys);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <unsigned MODULUS_MULTIPLE = 1>
|
||||||
|
static constexpr HOST_DEVICE_INLINE Wide sqr_wide(const MersenneField& xs)
|
||||||
|
{
|
||||||
|
return mul_wide(xs, xs);
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField sqr(const MersenneField& xs) { return xs * xs; }
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField to_montgomery(const MersenneField& xs) { return xs; }
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField from_montgomery(const MersenneField& xs) { return xs; }
|
||||||
|
|
||||||
|
static constexpr HOST_DEVICE_INLINE MersenneField pow(MersenneField base, int exp)
|
||||||
|
{
|
||||||
|
MersenneField res = one();
|
||||||
|
while (exp > 0) {
|
||||||
|
if (exp & 1) res = res * base;
|
||||||
|
base = base * base;
|
||||||
|
exp >>= 1;
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
struct fp_config {
|
||||||
|
static constexpr unsigned limbs_count = 1;
|
||||||
|
static constexpr unsigned omegas_count = 1;
|
||||||
|
static constexpr unsigned modulus_bit_count = 31;
|
||||||
|
static constexpr unsigned num_of_reductions = 1;
|
||||||
|
|
||||||
|
static constexpr storage<limbs_count> modulus = {0x7fffffff};
|
||||||
|
static constexpr storage<limbs_count> modulus_2 = {0xfffffffe};
|
||||||
|
static constexpr uint64_t modulus_3 = 0x17ffffffd;
|
||||||
|
static constexpr storage<limbs_count> modulus_4 = {0xfffffffc};
|
||||||
|
static constexpr storage<limbs_count> neg_modulus = {0x87ffffff};
|
||||||
|
static constexpr storage<2 * limbs_count> modulus_wide = {0x7fffffff, 0x00000000};
|
||||||
|
static constexpr storage<2 * limbs_count> modulus_squared = {0x00000001, 0x3fffffff};
|
||||||
|
static constexpr storage<2 * limbs_count> modulus_squared_2 = {0x00000002, 0x7ffffffe};
|
||||||
|
static constexpr storage<2 * limbs_count> modulus_squared_4 = {0x00000004, 0xfffffffc};
|
||||||
|
|
||||||
|
static constexpr storage<limbs_count> m = {0x80000001};
|
||||||
|
static constexpr storage<limbs_count> one = {0x00000001};
|
||||||
|
static constexpr storage<limbs_count> zero = {0x00000000};
|
||||||
|
static constexpr storage<limbs_count> montgomery_r = {0x00000001};
|
||||||
|
static constexpr storage<limbs_count> montgomery_r_inv = {0x00000001};
|
||||||
|
|
||||||
|
static constexpr storage_array<omegas_count, limbs_count> omega = {{{0x7ffffffe}}};
|
||||||
|
|
||||||
|
static constexpr storage_array<omegas_count, limbs_count> omega_inv = {{{0x7ffffffe}}};
|
||||||
|
|
||||||
|
static constexpr storage_array<omegas_count, limbs_count> inv = {{{0x40000000}}};
|
||||||
|
|
||||||
|
// nonresidue to generate the extension field
|
||||||
|
static constexpr uint32_t nonresidue = 11;
|
||||||
|
// true if nonresidue is negative.
|
||||||
|
static constexpr bool nonresidue_is_negative = false;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scalar field. Is always a prime field.
|
||||||
|
*/
|
||||||
|
typedef MersenneField<fp_config> scalar_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extension field of `scalar_t` enabled if `-DEXT_FIELD` env variable is.
|
||||||
|
*/
|
||||||
|
typedef ExtensionField<fp_config, scalar_t> extension_t;
|
||||||
|
} // namespace m31
|
||||||
@@ -2,8 +2,8 @@ if (EXT_FIELD)
|
|||||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
|
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DEXT_FIELD")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin)
|
SET(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin;m31)
|
||||||
SET(SUPPORTED_FIELDS_WITHOUT_POSEIDON2 bls12_381;bls12_377;grumpkin;bw6_761;stark252)
|
SET(SUPPORTED_FIELDS_WITHOUT_POSEIDON2 bls12_381;bls12_377;grumpkin;bw6_761;stark252;m31)
|
||||||
|
|
||||||
set(TARGET icicle_field)
|
set(TARGET icicle_field)
|
||||||
|
|
||||||
@@ -13,7 +13,9 @@ set(FIELD_SOURCE ${SRC}/fields/extern.cu)
|
|||||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern.cu)
|
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern.cu)
|
||||||
if(EXT_FIELD)
|
if(EXT_FIELD)
|
||||||
list(APPEND FIELD_SOURCE ${SRC}/fields/extern_extension.cu)
|
list(APPEND FIELD_SOURCE ${SRC}/fields/extern_extension.cu)
|
||||||
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern_extension.cu)
|
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
|
||||||
|
list(APPEND FIELD_SOURCE ${SRC}/ntt/extern_extension.cu)
|
||||||
|
endif()
|
||||||
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern_extension.cu)
|
list(APPEND FIELD_SOURCE ${SRC}/vec_ops/extern_extension.cu)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ target_link_libraries(runner GTest::gtest_main)
|
|||||||
set_target_properties(runner PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
set_target_properties(runner PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||||
|
|
||||||
# polynomial test-bench
|
# polynomial test-bench
|
||||||
set(POLY_UNSUPPORTED_FIELD grumpkin)
|
set(POLY_UNSUPPORTED_FIELD grumpkin;m31)
|
||||||
if (NOT FIELD IN_LIST POLY_UNSUPPORTED_FIELD)
|
if (NOT FIELD IN_LIST POLY_UNSUPPORTED_FIELD)
|
||||||
add_executable(polynomial_tb polynomial_test.cu)
|
add_executable(polynomial_tb polynomial_test.cu)
|
||||||
target_link_libraries(polynomial_tb GTest::gtest_main icicle_field pthread)
|
target_link_libraries(polynomial_tb GTest::gtest_main icicle_field pthread)
|
||||||
|
|||||||
@@ -64,6 +64,12 @@ FIELDS_CONFIG = {
|
|||||||
"field_ext.h",
|
"field_ext.h",
|
||||||
"vec_ops_ext.h",
|
"vec_ops_ext.h",
|
||||||
"ntt_ext.h",
|
"ntt_ext.h",
|
||||||
|
},
|
||||||
|
"m31": {
|
||||||
|
"ntt_ext.h",
|
||||||
|
"ntt.h",
|
||||||
|
"poseidon.h",
|
||||||
|
"poseidon2.h",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -23,6 +23,8 @@ then
|
|||||||
echo "find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -style=file"
|
echo "find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -style=file"
|
||||||
echo ""
|
echo ""
|
||||||
status=1
|
status=1
|
||||||
|
else
|
||||||
|
echo "🟩 Icicle Core format is fine"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run go fmt across all Golang packages
|
# Run go fmt across all Golang packages
|
||||||
@@ -32,6 +34,8 @@ then
|
|||||||
echo "Please commit the formatted files"
|
echo "Please commit the formatted files"
|
||||||
echo ""
|
echo ""
|
||||||
status=1
|
status=1
|
||||||
|
else
|
||||||
|
echo "🟩 Golang files format is fine"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Run cargo fmt on Rust files
|
# Run cargo fmt on Rust files
|
||||||
@@ -42,6 +46,8 @@ then
|
|||||||
echo "Please go to wrappers/rust and format the Rust files using the following command:"
|
echo "Please go to wrappers/rust and format the Rust files using the following command:"
|
||||||
echo "find . -path ./icicle-curves/icicle-curve-template -prune -o -name target -prune -o -iname *.rs -print | xargs cargo fmt --check --"
|
echo "find . -path ./icicle-curves/icicle-curve-template -prune -o -name target -prune -o -iname *.rs -print | xargs cargo fmt --check --"
|
||||||
status=1
|
status=1
|
||||||
|
else
|
||||||
|
echo "🟩 Rust files format is fine"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exit $status
|
exit $status
|
||||||
@@ -9,6 +9,7 @@ members = [
|
|||||||
"icicle-curves/icicle-bn254",
|
"icicle-curves/icicle-bn254",
|
||||||
"icicle-curves/icicle-grumpkin",
|
"icicle-curves/icicle-grumpkin",
|
||||||
"icicle-fields/icicle-babybear",
|
"icicle-fields/icicle-babybear",
|
||||||
|
"icicle-fields/icicle-m31",
|
||||||
"icicle-fields/icicle-stark252",
|
"icicle-fields/icicle-stark252",
|
||||||
"icicle-hash",
|
"icicle-hash",
|
||||||
]
|
]
|
||||||
|
|||||||
19
wrappers/rust/icicle-fields/icicle-m31/Cargo.toml
Normal file
19
wrappers/rust/icicle-fields/icicle-m31/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
[package]
|
||||||
|
name = "icicle-m31"
|
||||||
|
version.workspace = true
|
||||||
|
edition.workspace = true
|
||||||
|
authors.workspace = true
|
||||||
|
description = "Rust wrapper for the CUDA implementation of m31 prime field by Ingonyama"
|
||||||
|
homepage.workspace = true
|
||||||
|
repository.workspace = true
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
icicle-core = { workspace = true }
|
||||||
|
icicle-cuda-runtime = { workspace = true }
|
||||||
|
|
||||||
|
[build-dependencies]
|
||||||
|
cmake = "0.1.50"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
devmode = ["icicle-core/devmode"]
|
||||||
29
wrappers/rust/icicle-fields/icicle-m31/build.rs
Normal file
29
wrappers/rust/icicle-fields/icicle-m31/build.rs
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
use cmake::Config;
|
||||||
|
use std::env;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||||
|
println!("cargo:rerun-if-changed=../../../../icicle");
|
||||||
|
|
||||||
|
// Base config
|
||||||
|
let mut config = Config::new("../../../../icicle/");
|
||||||
|
config
|
||||||
|
.define("FIELD", "m31")
|
||||||
|
.define("CMAKE_BUILD_TYPE", "Release")
|
||||||
|
.define("EXT_FIELD", "ON");
|
||||||
|
|
||||||
|
if let Ok(cuda_arch) = env::var("CUDA_ARCH") {
|
||||||
|
config.define("CUDA_ARCH", &cuda_arch);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build
|
||||||
|
let out_dir = config
|
||||||
|
.build_target("icicle_field")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
|
||||||
|
|
||||||
|
println!("cargo:rustc-link-lib=ingo_field_m31");
|
||||||
|
println!("cargo:rustc-link-lib=stdc++");
|
||||||
|
println!("cargo:rustc-link-lib=cudart");
|
||||||
|
}
|
||||||
33
wrappers/rust/icicle-fields/icicle-m31/src/field.rs
Normal file
33
wrappers/rust/icicle-fields/icicle-m31/src/field.rs
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
use icicle_core::field::{Field, MontgomeryConvertibleField};
|
||||||
|
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
|
||||||
|
use icicle_core::{impl_field, impl_scalar_field};
|
||||||
|
use icicle_cuda_runtime::device::check_device;
|
||||||
|
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||||
|
use icicle_cuda_runtime::error::CudaError;
|
||||||
|
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||||
|
|
||||||
|
pub(crate) const SCALAR_LIMBS: usize = 1;
|
||||||
|
pub(crate) const EXTENSION_LIMBS: usize = 4;
|
||||||
|
|
||||||
|
impl_scalar_field!("m31", m31, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
|
||||||
|
impl_scalar_field!(
|
||||||
|
"m31_extension",
|
||||||
|
m31_extension,
|
||||||
|
EXTENSION_LIMBS,
|
||||||
|
ExtensionField,
|
||||||
|
ExtensionCfg,
|
||||||
|
Fr
|
||||||
|
);
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::{ExtensionField, ScalarField};
|
||||||
|
use icicle_core::impl_field_tests;
|
||||||
|
use icicle_core::tests::*;
|
||||||
|
|
||||||
|
impl_field_tests!(ScalarField);
|
||||||
|
mod extension {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
impl_field_tests!(ExtensionField);
|
||||||
|
}
|
||||||
|
}
|
||||||
2
wrappers/rust/icicle-fields/icicle-m31/src/lib.rs
Normal file
2
wrappers/rust/icicle-fields/icicle-m31/src/lib.rs
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
pub mod field;
|
||||||
|
pub mod vec_ops;
|
||||||
26
wrappers/rust/icicle-fields/icicle-m31/src/vec_ops/mod.rs
Normal file
26
wrappers/rust/icicle-fields/icicle-m31/src/vec_ops/mod.rs
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
use crate::field::{ExtensionCfg, ExtensionField, ScalarCfg, ScalarField};
|
||||||
|
|
||||||
|
use icicle_core::error::IcicleResult;
|
||||||
|
use icicle_core::impl_vec_ops_field;
|
||||||
|
use icicle_core::traits::IcicleResultWrap;
|
||||||
|
use icicle_core::vec_ops::{BitReverseConfig, VecOps, VecOpsConfig};
|
||||||
|
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||||
|
use icicle_cuda_runtime::error::CudaError;
|
||||||
|
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||||
|
|
||||||
|
impl_vec_ops_field!("m31", m31, ScalarField, ScalarCfg);
|
||||||
|
impl_vec_ops_field!("m31_extension", m31_extension, ExtensionField, ExtensionCfg);
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub(crate) mod tests {
|
||||||
|
use crate::field::{ExtensionField, ScalarField};
|
||||||
|
use icicle_core::impl_vec_add_tests;
|
||||||
|
use icicle_core::vec_ops::tests::*;
|
||||||
|
|
||||||
|
impl_vec_add_tests!(ScalarField);
|
||||||
|
mod extension {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
impl_vec_add_tests!(ExtensionField);
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user