From 80ed03771e7df3c5b8fe5042b3bb654a0c98f95e Mon Sep 17 00:00:00 2001 From: Koren-Brand Date: Wed, 5 Jun 2024 13:21:39 +0300 Subject: [PATCH] basic pipenger cpu works --- icicle_v3/backend/cpu/src/cpu_device_api.cpp | 3 + icicle_v3/backend/cpu/src/curve/cpu_msm.cpp | 177 +++++++++++++++++-- icicle_v3/include/icicle/curves/projective.h | 2 +- icicle_v3/tests/test_curve_api.cpp | 17 +- 4 files changed, 174 insertions(+), 25 deletions(-) diff --git a/icicle_v3/backend/cpu/src/cpu_device_api.cpp b/icicle_v3/backend/cpu/src/cpu_device_api.cpp index e40f49ab..712b19fc 100644 --- a/icicle_v3/backend/cpu/src/cpu_device_api.cpp +++ b/icicle_v3/backend/cpu/src/cpu_device_api.cpp @@ -105,3 +105,6 @@ public: }; REGISTER_DEVICE_API("CPU", CpuDeviceAPI); + +class CpuRefDeviceAPI : public CpuDeviceAPI {}; +REGISTER_DEVICE_API("CPU_REF", CpuRefDeviceAPI); diff --git a/icicle_v3/backend/cpu/src/curve/cpu_msm.cpp b/icicle_v3/backend/cpu/src/curve/cpu_msm.cpp index df171388..a42294e4 100644 --- a/icicle_v3/backend/cpu/src/curve/cpu_msm.cpp +++ b/icicle_v3/backend/cpu/src/curve/cpu_msm.cpp @@ -9,18 +9,163 @@ using namespace curve_config; using namespace icicle; -template -eIcicleError -cpu_msm(const Device& device, const S* scalars, const A* bases, int msm_size, const MSMConfig& config, P* results) +uint32_t** msm_bucket_coeffs( + const scalar_t* scalars, + const unsigned int msm_size, + const unsigned int c, + const unsigned int num_windows) { - for (auto batch_idx = 0; batch_idx < config.batch_size; ++batch_idx) { - P res = P::zero(); - const S* batch_scalars = scalars + msm_size * batch_idx; - const A* batch_bases = config.are_bases_shared ? bases : bases + msm_size * batch_idx; - for (auto i = 0; i < msm_size; ++i) { - res = res + P::from_affine(batch_bases[i]) * batch_scalars[i]; + /** + * Split msm scalars to c-wide coefficients for use in the bucket method + * @param scalars - original scalar array + * @param msm_size - length of the above array + * @param c - window-size (inverse to number of buckets) + * @param num_windows - NBITS/c + * @param coefficients - output array of the decomposed scalar + * @return status of function success / failure in the case of invalid arguments + */ + // TODO add check that c divides NBITS + uint32_t** coefficients = new uint32_t*[msm_size]; + for (int i = 0; i < msm_size; i++) + { + coefficients[i] = new uint32_t[num_windows]; + for (int w = 0; w < num_windows; w++) + { + coefficients[i][w] = scalars[i].get_scalar_digit(w, c); } - results[batch_idx] = res; + } + return coefficients; +} + +projective_t** msm_bucket_accumulator( + const scalar_t* scalars, + const affine_t* bases, + const unsigned int c, + const unsigned int num_windows, + int msm_size) +{ + /** + * Accumulate into the different buckets + * @param scalars - original scalars given from the msm result + * @param bases - point bases to add + * @param c - width of windows to split scalars above + * @param msm_size - number of scalars to add + * @param buckets - points array containing all buckets + */ + uint32_t** coefficients = msm_bucket_coeffs(scalars, msm_size, c, num_windows); + + uint32_t num_buckets = 1< 0; i--) + { + if (!projective_t::is_zero(buckets[w][i])) partial_sum = partial_sum + buckets[w][i]; + window_sums[w] = window_sums[w] + partial_sum; + } + } + return window_sums; +} + +projective_t msm_final_sum( + projective_t* window_sums, + const unsigned int c, + const unsigned int num_windows) +{ + projective_t result = window_sums[num_windows - 1]; + for (int w = num_windows - 2; w >= 0; w--) + { + for (int dbl = 0; dbl < c; dbl++) + { + result = projective_t::dbl(result); + } + result = result + window_sums[w]; + } + return result; +} + +void msm_delete_arrays( + projective_t** buckets, + projective_t* windows, + const unsigned int num_windows) +{ + for (int w = 0; w < num_windows; w++) + { + delete[] buckets[w]; + } + delete[] buckets; + delete[] windows; +} + +// Double and add +eIcicleError cpu_msm( + const Device& device, + const scalar_t* scalars, // COMMENT it assumes no negative scalar inputs + const affine_t* bases, + int msm_size, + const MSMConfig& config, + projective_t* results) +{ + const unsigned int c = 15; // TODO integrate into msm config + const int num_windows = (scalar_t::NBITS / c) + ((scalar_t::NBITS % c != 0)? 1 : 0); + + projective_t** buckets = msm_bucket_accumulator(scalars, bases, c, num_windows, msm_size); + projective_t* window_sums = msm_window_sum(buckets, c, num_windows); + projective_t res = msm_final_sum(window_sums, c, num_windows); + // COMMENT do I need to delete the buckets manually or is it handled automatically when the function finishes? + results[0] = res; + msm_delete_arrays(buckets, window_sums, num_windows); + return eIcicleError::SUCCESS; +} + +eIcicleError cpu_msm_ref( + const Device& device, + const scalar_t* scalars, + const affine_t* bases, + int msm_size, + const MSMConfig& config, + projective_t* results) +{ + projective_t res = projective_t::zero(); + for (auto i = 0; i < msm_size; ++i) { + res = res + projective_t::from_affine(bases[i]) * scalars[i]; } return eIcicleError::SUCCESS; } @@ -29,15 +174,11 @@ template eIcicleError cpu_msm_precompute_bases( const Device& device, const A* input_bases, int nof_bases, const MSMConfig& config, A* output_bases) { - ICICLE_ASSERT(!config.are_points_on_device && !config.are_scalars_on_device); - memcpy(output_bases, input_bases, sizeof(A) * nof_bases); - return eIcicleError::SUCCESS; + return eIcicleError::API_NOT_IMPLEMENTED; } -REGISTER_MSM_BACKEND("CPU", (cpu_msm)); REGISTER_MSM_PRE_COMPUTE_BASES_BACKEND("CPU", cpu_msm_precompute_bases); +REGISTER_MSM_BACKEND("CPU", (cpu_msm)); -#ifdef G2 -REGISTER_MSM_G2_BACKEND("CPU", (cpu_msm)); -REGISTER_MSM_G2_PRE_COMPUTE_BASES_BACKEND("CPU", cpu_msm_precompute_bases); -#endif // G2 \ No newline at end of file +REGISTER_MSM_PRE_COMPUTE_BASES_BACKEND("CPU_REF", cpu_msm_precompute_bases); +REGISTER_MSM_BACKEND("CPU_REF", cpu_msm_ref); diff --git a/icicle_v3/include/icicle/curves/projective.h b/icicle_v3/include/icicle/curves/projective.h index 0055c008..75d50b45 100644 --- a/icicle_v3/include/icicle/curves/projective.h +++ b/icicle_v3/include/icicle/curves/projective.h @@ -22,7 +22,7 @@ public: FF x; FF y; FF z; - + static HOST_DEVICE_INLINE Projective copy(const Projective& other) { return {other.x, other.y, other.z}; } static HOST_DEVICE_INLINE Projective zero() { return {FF::zero(), FF::one(), FF::zero()}; } static HOST_DEVICE_INLINE Affine to_affine(const Projective& point) diff --git a/icicle_v3/tests/test_curve_api.cpp b/icicle_v3/tests/test_curve_api.cpp index 473d9c94..258a4d3e 100644 --- a/icicle_v3/tests/test_curve_api.cpp +++ b/icicle_v3/tests/test_curve_api.cpp @@ -154,8 +154,11 @@ TEST_F(CurveApiTest, ecntt) auto input = std::make_unique(N); projective_t::rand_host_many(input.get(), N); - auto out_main = std::make_unique(N); - auto out_ref = std::make_unique(N); + scalar_t::rand_host_many(scalars.get(), N); + projective_t::rand_host_many_affine(bases.get(), N); + projective_t result_cpu{}; + projective_t result_cpu_dbl_n_add{}; + projective_t result_cpu_ref{}; // TODO Yuval should be projective auto run = [&](const std::string& dev_type, projective_t* out, const char* msg, bool measure, int iters) { Device dev = {dev_type, 0}; @@ -174,10 +177,12 @@ TEST_F(CurveApiTest, ecntt) ntt_release_domain(); }; - run(s_main_target, out_main.get(), "ecntt", VERBOSE /*=measure*/, 1 /*=iters*/); - run(s_ref_target, out_ref.get(), "ecntt", VERBOSE /*=measure*/, 1 /*=iters*/); - // ASSERT_EQ(0, memcmp(out_main.get(), out_ref.get(), N * sizeof(projective_t))); // TODO ucomment when CPU is - // implemented + // run("CPU", &result_cpu_dbl_n_add, "CPU msm", false /*=measure*/, 1 /*=iters*/); // warmup + run("CPU", &result_cpu, "CPU msm", VERBOSE /*=measure*/, 1 /*=iters*/); + run("CPU_REF", &result_cpu_ref, "CPU_REF msm", VERBOSE /*=measure*/, 1 /*=iters*/); + // TODO test something + + ASSERT_EQ(result_cpu,result_cpu_ref); } #endif // ECNTT