add vec_div api

This commit is contained in:
Yuval Shekel
2024-07-16 16:59:57 +03:00
parent 028f59cef0
commit f13f24cff2
5 changed files with 63 additions and 9 deletions

View File

@@ -48,6 +48,32 @@ cpu_vector_mul(const Device& device, const T* vec_a, const T* vec_b, uint64_t n,
REGISTER_VECTOR_MUL_BACKEND("CPU", cpu_vector_mul<scalar_t>);
/*********************************** DIV ***********************************/
template <typename T>
eIcicleError
cpu_vector_div(const Device& device, const T* vec_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
{
for (uint64_t i = 0; i < n; ++i) {
output[i] = vec_a[i] * T::inverse(vec_b[i]);
}
return eIcicleError::SUCCESS;
}
REGISTER_VECTOR_DIV_BACKEND("CPU", cpu_vector_div<scalar_t>);
/*********************************** MUL BY SCALAR***********************************/
template <typename T>
eIcicleError cpu_scalar_mul(
const Device& device, const T* scalar_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
{
for (uint64_t i = 0; i < n; ++i) {
output[i] = *scalar_a * vec_b[i];
}
return eIcicleError::SUCCESS;
}
REGISTER_SCALAR_MUL_BACKEND("CPU", cpu_scalar_mul<scalar_t>);
/*********************************** CONVERT MONTGOMERY ***********************************/
template <typename T>
eIcicleError cpu_convert_montgomery(

View File

@@ -137,6 +137,14 @@ __global__ void div_element_wise_kernel(const E* element_vec1, const E* element_
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < n) { result[tid] = element_vec1[tid] * E::inverse(element_vec2[tid]); }
}
template <typename E>
eIcicleError
div_cuda(const Device& device, const E* vec_a, const E* vec_b, int n, const VecOpsConfig& config, E* result)
{
cudaError_t err = vec_op<E, div_element_wise_kernel>(vec_a, vec_b, n, n, config, result, n);
return translateCudaError(err);
}
/*============================== transpose ==============================*/
template <typename E>
@@ -334,6 +342,7 @@ eIcicleError slice_cuda(
REGISTER_VECTOR_ADD_BACKEND("CUDA", add_cuda<scalar_t>);
REGISTER_VECTOR_SUB_BACKEND("CUDA", sub_cuda<scalar_t>);
REGISTER_VECTOR_MUL_BACKEND("CUDA", mul_cuda<scalar_t>);
REGISTER_VECTOR_DIV_BACKEND("CUDA", div_cuda<scalar_t>);
REGISTER_SCALAR_MUL_BACKEND("CUDA", mul_scalar_cuda<scalar_t>);
REGISTER_MATRIX_TRANSPOSE_BACKEND("CUDA", matrix_transpose_cuda<scalar_t>);
REGISTER_BIT_REVERSE_BACKEND("CUDA", bit_reverse_cuda<scalar_t>);

View File

@@ -1,6 +1,5 @@
#pragma once
#include "stdint.h"
// #include "../../../src/vec_ops/vec_ops.cu" // TODO Yuval: avoid this
namespace polynomials {
@@ -88,12 +87,4 @@ namespace polynomials {
r[tid] = r[tid] - monomial_coeff * b_coeff;
}
/*============================== slice ==============================*/
template <typename T>
__global__ void slice_kernel(const T* in, T* out, int offset, int stride, int size)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid < size) { out[tid] = in[offset + tid * stride]; }
}
} // namespace polynomials

View File

@@ -56,6 +56,9 @@ namespace icicle {
template <typename T>
eIcicleError vector_mul(const T* vec_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output);
template <typename T>
eIcicleError vector_div(const T* vec_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output);
template <typename T>
eIcicleError convert_montgomery(const T* input, uint64_t size, bool is_into, const VecOpsConfig& config, T* output);
@@ -115,6 +118,16 @@ namespace icicle {
}(); \
}
void register_vector_div(const std::string& deviceType, scalarVectorOpImpl impl);
#define REGISTER_VECTOR_DIV_BACKEND(DEVICE_TYPE, FUNC) \
namespace { \
static bool UNIQUE(_reg_vec_div) = []() -> bool { \
register_vector_div(DEVICE_TYPE, FUNC); \
return true; \
}(); \
}
void register_scalar_mul(const std::string& deviceType, scalarVectorOpImpl impl);
#define REGISTER_SCALAR_MUL_BACKEND(DEVICE_TYPE, FUNC) \

View File

@@ -102,6 +102,21 @@ namespace icicle {
}
#endif // EXT_FIELD
/*********************************** DIV ***********************************/
ICICLE_DISPATCHER_INST(VectorDivDispatcher, vector_div, scalarVectorOpImpl);
extern "C" eIcicleError CONCAT_EXPAND(FIELD, vector_div)(
const scalar_t* vec_a, const scalar_t* vec_b, uint64_t n, const VecOpsConfig& config, scalar_t* output)
{
return VectorDivDispatcher::execute(vec_a, vec_b, n, config, output);
}
template <>
eIcicleError
vector_div(const scalar_t* vec_a, const scalar_t* vec_b, uint64_t n, const VecOpsConfig& config, scalar_t* output)
{
return CONCAT_EXPAND(FIELD, vector_div)(vec_a, vec_b, n, config, output);
}
/*********************************** MUL BY SCALAR ***********************************/
ICICLE_DISPATCHER_INST(ScalarMulDispatcher, scalar_mul, scalarVectorOpImpl);