mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-14 09:58:02 -05:00
Compare commits
23 Commits
hadar_ref8
...
msm-fix16
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b39b529463 | ||
|
|
86bee3af42 | ||
|
|
dbd5cd4cbb | ||
|
|
655f014dc2 | ||
|
|
18c7cad89c | ||
|
|
1e9f628235 | ||
|
|
2e45ed1bd4 | ||
|
|
e828d1da2a | ||
|
|
233927668c | ||
|
|
1866df60f1 | ||
|
|
ccc8892a52 | ||
|
|
67e4ee2864 | ||
|
|
6c5fe47e55 | ||
|
|
ed9de3d1e9 | ||
|
|
d01e0dbfb1 | ||
|
|
6aa6fe0c1c | ||
|
|
a64df640de | ||
|
|
1b2b9f2826 | ||
|
|
0a36a545bf | ||
|
|
407273dee3 | ||
|
|
f55bd30e13 | ||
|
|
071c24ce5a | ||
|
|
08c34a5183 |
File diff suppressed because it is too large
Load Diff
@@ -3,7 +3,7 @@
|
||||
#pragma once
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream);
|
||||
void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, bool big_triangle, cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
|
||||
@@ -12,7 +12,7 @@ template <typename S, typename P, typename A>
|
||||
void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream);
|
||||
void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, bool big_triangle, cudaStream_t stream);
|
||||
|
||||
template <typename S, typename P, typename A>
|
||||
void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);
|
||||
|
||||
@@ -5,15 +5,19 @@
|
||||
#include "../../utils/cuda_utils.cuh"
|
||||
#include "../../primitives/projective.cuh"
|
||||
#include "../../primitives/field.cuh"
|
||||
#include "../../curves/bls12_381/curve_config.cuh"
|
||||
// #include "../../curves/bls12_377/curve_config.cuh"
|
||||
#include "../../curves/bn254/curve_config.cuh"
|
||||
|
||||
using namespace BLS12_381;
|
||||
// using namespace BLS12_377;
|
||||
using namespace BN254;
|
||||
|
||||
class Dummy_Scalar {
|
||||
public:
|
||||
static constexpr unsigned NBITS = 32;
|
||||
|
||||
unsigned x;
|
||||
// unsigned p = 10;
|
||||
unsigned p = 1<<30;
|
||||
|
||||
friend HOST_INLINE std::ostream& operator<<(std::ostream& os, const Dummy_Scalar& scalar) {
|
||||
os << scalar.x;
|
||||
@@ -25,7 +29,7 @@ class Dummy_Scalar {
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Scalar operator+(Dummy_Scalar p1, const Dummy_Scalar& p2) {
|
||||
return {p1.x+p2.x};
|
||||
return {(p1.x+p2.x)%p1.p};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE bool operator==(const Dummy_Scalar& p1, const Dummy_Scalar& p2) {
|
||||
@@ -36,10 +40,11 @@ class Dummy_Scalar {
|
||||
return (p1.x == p2);
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
static HOST_DEVICE_INLINE Dummy_Scalar neg(const Dummy_Scalar &scalar) {
|
||||
return {scalar.p-scalar.x};
|
||||
}
|
||||
static HOST_INLINE Dummy_Scalar rand_host() {
|
||||
// return {(unsigned)rand()%10};
|
||||
return {(unsigned)rand()};
|
||||
}
|
||||
};
|
||||
@@ -53,6 +58,10 @@ class Dummy_Projective {
|
||||
return {0};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective one() {
|
||||
return {1};
|
||||
}
|
||||
|
||||
static HOST_DEVICE_INLINE Dummy_Projective to_affine(const Dummy_Projective &point) {
|
||||
return {point.x};
|
||||
}
|
||||
@@ -61,9 +70,9 @@ class Dummy_Projective {
|
||||
return {point.x};
|
||||
}
|
||||
|
||||
// static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
|
||||
// return {Dummy_Scalar::neg(point.x)};
|
||||
// }
|
||||
static HOST_DEVICE_INLINE Dummy_Projective neg(const Dummy_Projective &point) {
|
||||
return {Dummy_Scalar::neg(point.x)};
|
||||
}
|
||||
|
||||
friend HOST_DEVICE_INLINE Dummy_Projective operator+(Dummy_Projective p1, const Dummy_Projective& p2) {
|
||||
return {p1.x+p2.x};
|
||||
@@ -119,62 +128,68 @@ typedef affine_t test_affine;
|
||||
|
||||
int main()
|
||||
{
|
||||
unsigned batch_size = 4;
|
||||
unsigned msm_size = 1<<15;
|
||||
unsigned batch_size = 1;
|
||||
unsigned msm_size = 1<<24;
|
||||
unsigned N = batch_size*msm_size;
|
||||
|
||||
test_scalar *scalars = new test_scalar[N];
|
||||
test_affine *points = new test_affine[N];
|
||||
|
||||
for (unsigned i=0;i<N;i++){
|
||||
scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
|
||||
// scalars[i] = (i%msm_size < 10)? test_scalar::rand_host() : scalars[i-10];
|
||||
points[i] = (i%msm_size < 10)? test_projective::to_affine(test_projective::rand_host()): points[i-10];
|
||||
// scalars[i] = test_scalar::rand_host();
|
||||
scalars[i] = test_scalar::rand_host();
|
||||
// points[i] = test_projective::to_affine(test_projective::rand_host());
|
||||
}
|
||||
std::cout<<"finished generating"<<std::endl;
|
||||
|
||||
// projective_t *short_res = (projective_t*)malloc(sizeof(projective_t));
|
||||
// test_projective *large_res = (test_projective*)malloc(sizeof(test_projective));
|
||||
test_projective large_res[batch_size];
|
||||
test_projective batched_large_res[batch_size];
|
||||
test_projective large_res[batch_size*2];
|
||||
// test_projective batched_large_res[batch_size];
|
||||
// fake_point *large_res = (fake_point*)malloc(sizeof(fake_point));
|
||||
// fake_point batched_large_res[256];
|
||||
|
||||
|
||||
// short_msm<scalar_t, projective_t, affine_t>(scalars, points, N, short_res);
|
||||
for (unsigned i=0;i<batch_size;i++){
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
|
||||
// for (unsigned i=0;i<batch_size;i++){
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars+msm_size*i, points+msm_size*i, msm_size, large_res+i, false);
|
||||
// std::cout<<"final result large"<<std::endl;
|
||||
// std::cout<<test_projective::to_affine(*large_res)<<std::endl;
|
||||
}
|
||||
// }
|
||||
auto begin = std::chrono::high_resolution_clock::now();
|
||||
batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
|
||||
// large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false);
|
||||
// batched_large_msm<test_scalar, test_projective, test_affine>(scalars, points, batch_size, msm_size, batched_large_res, false);
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res, false, true,0);
|
||||
// std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
large_msm<test_scalar, test_projective, test_affine>(scalars, points, msm_size, large_res+1, false, false,0);
|
||||
// test_reduce_triangle(scalars);
|
||||
// test_reduce_rectangle(scalars);
|
||||
// test_reduce_single(scalars);
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::nanoseconds>(end - begin);
|
||||
printf("Time measured: %.3f seconds.\n", elapsed.count() * 1e-9);
|
||||
std::cout<<test_projective::to_affine(large_res[0])<<std::endl;
|
||||
std::cout<<test_projective::to_affine(large_res[1])<<std::endl;
|
||||
|
||||
// reference_msm<test_affine, test_scalar, test_projective>(scalars, points, msm_size);
|
||||
|
||||
std::cout<<"final results batched large"<<std::endl;
|
||||
bool success = true;
|
||||
for (unsigned i = 0; i < batch_size; i++)
|
||||
{
|
||||
std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
|
||||
if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
|
||||
std::cout<<"good"<<std::endl;
|
||||
}
|
||||
else{
|
||||
std::cout<<"miss"<<std::endl;
|
||||
std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
|
||||
success = false;
|
||||
}
|
||||
}
|
||||
if (success){
|
||||
std::cout<<"success!"<<std::endl;
|
||||
}
|
||||
// std::cout<<"final results batched large"<<std::endl;
|
||||
// bool success = true;
|
||||
// for (unsigned i = 0; i < batch_size; i++)
|
||||
// {
|
||||
// std::cout<<test_projective::to_affine(batched_large_res[i])<<std::endl;
|
||||
// if (test_projective::to_affine(large_res[i])==test_projective::to_affine(batched_large_res[i])){
|
||||
// std::cout<<"good"<<std::endl;
|
||||
// }
|
||||
// else{
|
||||
// std::cout<<"miss"<<std::endl;
|
||||
// std::cout<<test_projective::to_affine(large_res[i])<<std::endl;
|
||||
// success = false;
|
||||
// }
|
||||
// }
|
||||
// if (success){
|
||||
// std::cout<<"success!"<<std::endl;
|
||||
// }
|
||||
|
||||
// std::cout<<batched_large_res[0]<<std::endl;
|
||||
// std::cout<<batched_large_res[1]<<std::endl;
|
||||
|
||||
@@ -12,7 +12,7 @@ int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, stream);
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -53,7 +53,7 @@ extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377:
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true, stream);
|
||||
large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false, stream);
|
||||
large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -52,7 +52,7 @@ extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381:
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true, stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -2,4 +2,4 @@
|
||||
#include "lde.cu"
|
||||
#include "msm.cu"
|
||||
#include "ve_mod_mult.cu"
|
||||
#include "poseidon.cu"
|
||||
#include "poseidon.cu"
|
||||
|
||||
@@ -12,7 +12,7 @@ int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false, stream);
|
||||
large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -52,7 +52,7 @@ extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t po
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true, stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -16,4 +16,4 @@ extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective
|
||||
!((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) &&
|
||||
!((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@@ -11,7 +11,7 @@ int msm_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t *out, ${CURVE_NAME_U}
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, count, out, false, stream);
|
||||
large_msm<${CURVE_NAME_U}::scalar_t, ${CURVE_NAME_U}::projective_t, ${CURVE_NAME_U}::affine_t>(scalars, points, count, out, false, false, stream);
|
||||
return CUDA_SUCCESS;
|
||||
}
|
||||
catch (const std::runtime_error &ex)
|
||||
@@ -52,7 +52,7 @@ extern "C" int msm_batch_cuda_${CURVE_NAME_L}(${CURVE_NAME_U}::projective_t* out
|
||||
{
|
||||
try
|
||||
{
|
||||
large_msm(d_scalars, d_points, count, d_out, true, stream);
|
||||
large_msm(d_scalars, d_points, count, d_out, true, false, stream);
|
||||
cudaStreamSynchronize(stream);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -16,4 +16,4 @@ extern "C" bool eq_g2_${CURVE_NAME_L}(${CURVE_NAME_U}::g2_projective_t *point1,
|
||||
!((point1->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point1->z == ${CURVE_NAME_U}::g2_point_field_t::zero())) &&
|
||||
!((point2->x == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->y == ${CURVE_NAME_U}::g2_point_field_t::zero()) && (point2->z == ${CURVE_NAME_U}::g2_point_field_t::zero()));
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user