mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-10 07:57:56 -05:00
add vec_div api
This commit is contained in:
@@ -48,6 +48,32 @@ cpu_vector_mul(const Device& device, const T* vec_a, const T* vec_b, uint64_t n,
|
||||
|
||||
REGISTER_VECTOR_MUL_BACKEND("CPU", cpu_vector_mul<scalar_t>);
|
||||
|
||||
/*********************************** DIV ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError
|
||||
cpu_vector_div(const Device& device, const T* vec_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = vec_a[i] * T::inverse(vec_b[i]);
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_VECTOR_DIV_BACKEND("CPU", cpu_vector_div<scalar_t>);
|
||||
|
||||
/*********************************** MUL BY SCALAR***********************************/
|
||||
template <typename T>
|
||||
eIcicleError cpu_scalar_mul(
|
||||
const Device& device, const T* scalar_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = *scalar_a * vec_b[i];
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_SCALAR_MUL_BACKEND("CPU", cpu_scalar_mul<scalar_t>);
|
||||
|
||||
/*********************************** CONVERT MONTGOMERY ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError cpu_convert_montgomery(
|
||||
|
||||
@@ -137,6 +137,14 @@ __global__ void div_element_wise_kernel(const E* element_vec1, const E* element_
|
||||
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < n) { result[tid] = element_vec1[tid] * E::inverse(element_vec2[tid]); }
|
||||
}
|
||||
|
||||
template <typename E>
|
||||
eIcicleError
|
||||
div_cuda(const Device& device, const E* vec_a, const E* vec_b, int n, const VecOpsConfig& config, E* result)
|
||||
{
|
||||
cudaError_t err = vec_op<E, div_element_wise_kernel>(vec_a, vec_b, n, n, config, result, n);
|
||||
return translateCudaError(err);
|
||||
}
|
||||
/*============================== transpose ==============================*/
|
||||
|
||||
template <typename E>
|
||||
@@ -334,6 +342,7 @@ eIcicleError slice_cuda(
|
||||
REGISTER_VECTOR_ADD_BACKEND("CUDA", add_cuda<scalar_t>);
|
||||
REGISTER_VECTOR_SUB_BACKEND("CUDA", sub_cuda<scalar_t>);
|
||||
REGISTER_VECTOR_MUL_BACKEND("CUDA", mul_cuda<scalar_t>);
|
||||
REGISTER_VECTOR_DIV_BACKEND("CUDA", div_cuda<scalar_t>);
|
||||
REGISTER_SCALAR_MUL_BACKEND("CUDA", mul_scalar_cuda<scalar_t>);
|
||||
REGISTER_MATRIX_TRANSPOSE_BACKEND("CUDA", matrix_transpose_cuda<scalar_t>);
|
||||
REGISTER_BIT_REVERSE_BACKEND("CUDA", bit_reverse_cuda<scalar_t>);
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
#pragma once
|
||||
#include "stdint.h"
|
||||
// #include "../../../src/vec_ops/vec_ops.cu" // TODO Yuval: avoid this
|
||||
|
||||
namespace polynomials {
|
||||
|
||||
@@ -88,12 +87,4 @@ namespace polynomials {
|
||||
r[tid] = r[tid] - monomial_coeff * b_coeff;
|
||||
}
|
||||
|
||||
/*============================== slice ==============================*/
|
||||
template <typename T>
|
||||
__global__ void slice_kernel(const T* in, T* out, int offset, int stride, int size)
|
||||
{
|
||||
int tid = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
if (tid < size) { out[tid] = in[offset + tid * stride]; }
|
||||
}
|
||||
|
||||
} // namespace polynomials
|
||||
@@ -56,6 +56,9 @@ namespace icicle {
|
||||
template <typename T>
|
||||
eIcicleError vector_mul(const T* vec_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output);
|
||||
|
||||
template <typename T>
|
||||
eIcicleError vector_div(const T* vec_a, const T* vec_b, uint64_t size, const VecOpsConfig& config, T* output);
|
||||
|
||||
template <typename T>
|
||||
eIcicleError convert_montgomery(const T* input, uint64_t size, bool is_into, const VecOpsConfig& config, T* output);
|
||||
|
||||
@@ -115,6 +118,16 @@ namespace icicle {
|
||||
}(); \
|
||||
}
|
||||
|
||||
void register_vector_div(const std::string& deviceType, scalarVectorOpImpl impl);
|
||||
|
||||
#define REGISTER_VECTOR_DIV_BACKEND(DEVICE_TYPE, FUNC) \
|
||||
namespace { \
|
||||
static bool UNIQUE(_reg_vec_div) = []() -> bool { \
|
||||
register_vector_div(DEVICE_TYPE, FUNC); \
|
||||
return true; \
|
||||
}(); \
|
||||
}
|
||||
|
||||
void register_scalar_mul(const std::string& deviceType, scalarVectorOpImpl impl);
|
||||
|
||||
#define REGISTER_SCALAR_MUL_BACKEND(DEVICE_TYPE, FUNC) \
|
||||
|
||||
@@ -102,6 +102,21 @@ namespace icicle {
|
||||
}
|
||||
#endif // EXT_FIELD
|
||||
|
||||
/*********************************** DIV ***********************************/
|
||||
ICICLE_DISPATCHER_INST(VectorDivDispatcher, vector_div, scalarVectorOpImpl);
|
||||
|
||||
extern "C" eIcicleError CONCAT_EXPAND(FIELD, vector_div)(
|
||||
const scalar_t* vec_a, const scalar_t* vec_b, uint64_t n, const VecOpsConfig& config, scalar_t* output)
|
||||
{
|
||||
return VectorDivDispatcher::execute(vec_a, vec_b, n, config, output);
|
||||
}
|
||||
|
||||
template <>
|
||||
eIcicleError
|
||||
vector_div(const scalar_t* vec_a, const scalar_t* vec_b, uint64_t n, const VecOpsConfig& config, scalar_t* output)
|
||||
{
|
||||
return CONCAT_EXPAND(FIELD, vector_div)(vec_a, vec_b, n, config, output);
|
||||
}
|
||||
/*********************************** MUL BY SCALAR ***********************************/
|
||||
ICICLE_DISPATCHER_INST(ScalarMulDispatcher, scalar_mul, scalarVectorOpImpl);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user