Compare commits

...

23 Commits

Author SHA1 Message Date
hadaringonyama
b39b529463 no device sync 2023-07-05 11:07:07 +03:00
hadaringonyama
86bee3af42 works without the sort - temp fix 2023-07-05 10:55:59 +03:00
hadaringonyama
dbd5cd4cbb fixing streams - WIP 2023-07-05 09:50:04 +03:00
hadaringonyama
655f014dc2 merge with dev passes all tests 2023-07-02 16:13:51 +03:00
hadaringonyama
18c7cad89c Merge remote-tracking branch 'origin/dev' into msm-performance 2023-07-02 14:49:19 +03:00
hadaringonyama
1e9f628235 c=16 works 2023-07-02 12:07:46 +03:00
hadaringonyama
2e45ed1bd4 reduction - simple implementation, c=8 works 2023-06-29 19:12:53 +03:00
hadaringonyama
e828d1da2a c=8 works 2023-06-28 16:23:13 +03:00
hadaringonyama
233927668c triangle block mix fix 2023-06-28 12:31:38 +03:00
guy-ingo
1866df60f1 new kernels implementation, no correctness 2023-06-26 18:25:38 +03:00
guy-ingo
ccc8892a52 rectangle kernel works for powers of 2 2023-06-26 17:56:02 +03:00
guy-ingo
67e4ee2864 triangle kernel works as expected 2023-06-26 16:51:42 +03:00
guy-ingo
6c5fe47e55 big triangle replacment works 2023-06-25 15:28:56 +03:00
hadaringonyama
ed9de3d1e9 signed digits working 2023-06-20 14:48:13 +03:00
hadaringonyama
d01e0dbfb1 signed + top bm - WIP 2023-06-18 18:10:11 +03:00
hadaringonyama
6aa6fe0c1c adding sort by bucket size 2023-06-18 11:27:07 +03:00
hadaringonyama
a64df640de temp 2023-05-30 09:37:15 +03:00
hadaringonyama
1b2b9f2826 code cleaning 2023-05-29 09:22:10 +03:00
hadaringonyama
0a36a545bf remove short msm from extern call 2023-05-28 16:24:04 +03:00
hadaringonyama
407273dee3 bugs fixed 2023-05-28 16:13:47 +03:00
hadaringonyama
f55bd30e13 bugs fixed 2023-05-28 15:53:02 +03:00
HadarIngonyama
071c24ce5a supporting new curves (#74)
* Fix for local machines GoogleTest and CMake (#70)

GoogleTest fix, updated readme

* Supporting Additional Curves (#72)

* init commit - changes for supporting new curves

* refactor + additional curve (bls12-377 works, bn254 - not yet)

* general refactor + curves script + fixing bn245

* revert unnecessary changes + refactor new curve script

* add README and fix limbs_p=limbs_q case in python script

---------

Co-authored-by: Vitalii Hnatyk <vhnatyk@gmail.com>
Co-authored-by: guy-ingo <106763145+guy-ingo@users.noreply.github.com>
2023-05-15 15:51:48 +03:00
DmytroTym
08c34a5183 Fix for local machines GoogleTest and CMake (#70) (#73)
GoogleTest fix, updated readme

Co-authored-by: Vitalii Hnatyk <vhnatyk@gmail.com>
2023-05-15 15:23:06 +03:00
10 changed files with 1146 additions and 94 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -3,7 +3,7 @@
#pragma once
template <typename S, typename P, typename A>
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream);
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, bool big_triangle, cudaStream_t stream);
template <typename S, typename P, typename A>
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
@@ -12,7 +12,7 @@ template <typename S, typename P, typename A>
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
template <typename S, typename P, typename A>
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream);
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, bool big_triangle, cudaStream_t stream);
template <typename S, typename P, typename A>
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);

View File

@@ -5,15 +5,19 @@
#include "../../utils/cuda_utils.cuh"
#include "../../primitives/projective.cuh"
#include "../../primitives/field.cuh"
#include "../../curves/bls12_381/curve_config.cuh"
// #include "../../curves/bls12_377/curve_config.cuh"
#include "../../curves/bn254/curve_config.cuh"
using namespace BLS12_381;
// using namespace BLS12_377;
using namespace BN254;
class Dummy_Scalar {
public:
static constexpr unsigned NBITS = 32;
unsigned x;
// unsigned p = 10;
unsigned p = 1<<30;
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
os << scalar.x;
@@ -25,7 +29,7 @@ class Dummy_Scalar {
}
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
return {p1.x+p2.x};
return {(p1.x+p2.x)%p1.p};
}
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
@@ -36,10 +40,11 @@ class Dummy_Scalar {
return (p1.x == p2);
}
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
// return {Dummy_Scalar::neg(point.x)};
// }
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
return {scalar.p-scalar.x};
}
static HOST_INLINE Dummy_Scalar rand_host() {
// return {(unsigned)rand()%10};
return {(unsigned)rand()};
}
};
@@ -53,6 +58,10 @@ class Dummy_Projective {
return {0};
}
static HOST_DEVICE_INLINE Dummy_Projective one() {
return {1};
}
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
return {point.x};
}
@@ -61,9 +70,9 @@ class Dummy_Projective {
return {point.x};
}
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
// return {Dummy_Scalar::neg(point.x)};
// }
static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
return {Dummy_Scalar::neg(point.x)};
}
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
return {p1.x+p2.x};
@@ -119,62 +128,68 @@ typedef affine_t test_affine;
int main()
{
unsigned batch_size = 4;
unsigned msm_size = 1<<15;
unsigned batch_size = 1;
unsigned msm_size = 1<<24;
unsigned N = batch_size*msm_size;
test_scalar *scalars = new test_scalar[N];
test_affine *points = new test_affine[N];
for (unsigned i=0;i<N;i++){
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
// scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
// scalars[i] = test_scalar::rand_host();
scalars[i] = test_scalar::rand_host();
// points[i] = test_projective::to_affine(test_projective::rand_host());
}
std::cout<<"finished generating"<<std::endl;
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
test_projective large_res[batch_size];
test_projective batched_large_res[batch_size];
test_projective large_res[batch_size*2];
// test_projective batched_large_res[batch_size];
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
// fake_point batched_large_res[256];
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
for (unsigned i=0;i<batch_size;i++){
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
// for (unsigned i=0;i<batch_size;i++){
// large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
// std::cout<<"final result large"<<std::endl;
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
}
// }
auto begin = std::chrono::high_resolution_clock::now();
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
// batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false, true,0);
// std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res+1, false, false,0);
// test_reduce_triangle(scalars);
// test_reduce_rectangle(scalars);
// test_reduce_single(scalars);
auto end = std::chrono::high_resolution_clock::now();
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
std::cout<<test_projective::to_affine(large_res[1])<<std::endl;
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
std::cout<<"final results batched large"<<std::endl;
bool success = true;
for (unsigned i = 0; i < batch_size; i++)
{
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
std::cout<<"good"<<std::endl;
}
else{
std::cout<<"miss"<<std::endl;
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
success = false;
}
}
if (success){
std::cout<<"success!"<<std::endl;
}
// std::cout<<"final results batched large"<<std::endl;
// bool success = true;
// for (unsigned i = 0; i < batch_size; i++)
// {
// std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
// if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
// std::cout<<"good"<<std::endl;
// }
// else{
// std::cout<<"miss"<<std::endl;
// std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
// success = false;
// }
// }
// if (success){
// std::cout<<"success!"<<std::endl;
// }
// std::cout<<batched_large_res[0]<<std::endl;
// std::cout<<batched_large_res[1]<<std::endl;

View File

@@ -12,7 +12,7 @@ int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, stream);
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -53,7 +53,7 @@ extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377:
{
try
{
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true, stream);
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}

View File

@@ -12,7 +12,7 @@ int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[
{
try
{
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false, stream);
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -52,7 +52,7 @@ extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381:
{
try
{
large_msm(d_scalars, d_points, count, d_out, true, stream);
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}

View File

@@ -2,4 +2,4 @@
#include "lde.cu"
#include "msm.cu"
#include "ve_mod_mult.cu"
#include "poseidon.cu"
#include "poseidon.cu"

View File

@@ -12,7 +12,7 @@ int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
{
try
{
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false, stream);
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -52,7 +52,7 @@ extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t po
{
try
{
large_msm(d_scalars, d_points, count, d_out, true, stream);
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}

View File

@@ -16,4 +16,4 @@ extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
}
#endif
#endif

View File

@@ -11,7 +11,7 @@ int msm_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *out, ${CURVE_NAME_U}
{
try
{
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, count, out, false, stream);
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, count, out, false, false, stream);
return CUDA_SUCCESS;
}
catch (const std::runtime_error &ex)
@@ -52,7 +52,7 @@ extern "C" int msm_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* out
{
try
{
large_msm(d_scalars, d_points, count, d_out, true, stream);
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
cudaStreamSynchronize(stream);
return 0;
}

View File

@@ -16,4 +16,4 @@ extern "C" bool eq_g2_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *point1,
!((point1->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::g2_point_field_t::zero())) &&
!((point2->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::g2_point_field_t::zero()));
}
#endif
#endif