Files
icicle/goicicle/templates/hfiles/ntt.h.tmpl
2023-08-27 14:08:56 +03:00

182 lines
7.9 KiB
Cheetah

#include <cuda.h>
#include <stdbool.h>
// ntt.h
#ifndef _{{.CurveNameUpperCase}}_NTT_H
#define _{{.CurveNameUpperCase}}_NTT_H
#ifdef __cplusplus
extern "C" {
#endif
// Incomplete declaration of {{.CurveNameUpperCase}} projective and affine structs
typedef struct {{.CurveNameUpperCase}}_projective_t {{.CurveNameUpperCase}}_projective_t;
typedef struct {{.CurveNameUpperCase}}_affine_t {{.CurveNameUpperCase}}_affine_t;
typedef struct {{.CurveNameUpperCase}}_scalar_t {{.CurveNameUpperCase}}_scalar_t;
typedef struct {{.CurveNameUpperCase}}_g2_projective_t {{.CurveNameUpperCase}}_g2_projective_t;
typedef struct {{.CurveNameUpperCase}}_g2_affine_t {{.CurveNameUpperCase}}_g2_affine_t;
int ntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* arr, uint32_t n, bool inverse, size_t device_id);
int ntt_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
int ecntt_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* arr, uint32_t n, bool inverse, size_t device_id);
int ecntt_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id);
{{.CurveNameUpperCase}}_scalar_t*
build_domain_cuda_{{.CurveNameLowerCase}}(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id, size_t stream);
int interpolate_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned device_id,
size_t stream);
int interpolate_scalars_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_points_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
size_t device_id,
size_t stream);
int interpolate_points_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int interpolate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int interpolate_scalars_batch_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_evaluations,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned n,
unsigned batch_size,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned device_id,
size_t stream);
int evaluate_scalars_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_points_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
size_t device_id,
size_t stream);
int evaluate_points_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
size_t device_id,
size_t stream);
int evaluate_scalars_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
unsigned device_id,
size_t stream);
int evaluate_scalars_on_coset_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out,
{{.CurveNameUpperCase}}_scalar_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int evaluate_points_on_coset_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* d_out,
{{.CurveNameUpperCase}}_projective_t* d_coefficients,
{{.CurveNameUpperCase}}_scalar_t* d_domain,
unsigned domain_size,
unsigned n,
unsigned batch_size,
{{.CurveNameUpperCase}}_scalar_t* coset_powers,
size_t device_id,
size_t stream);
int reverse_order_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_scalars_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int reverse_order_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* arr, int n, size_t device_id, size_t stream);
int reverse_order_points_batch_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_projective_t* arr, int n, int batch_size, size_t device_id, size_t stream);
int add_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
int sub_scalars_cuda_{{.CurveNameLowerCase}}(
{{.CurveNameUpperCase}}_scalar_t* d_out, {{.CurveNameUpperCase}}_scalar_t* d_in1, {{.CurveNameUpperCase}}_scalar_t* d_in2, unsigned n, size_t stream);
int to_montgomery_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_inout, unsigned n, size_t stream);
int from_montgomery_scalars_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_scalar_t* d_inout, unsigned n, size_t stream);
// points g1
int to_montgomery_proj_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_affine_t* d_inout, unsigned n, size_t stream);
// points g2
int to_montgomery_proj_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* d_inout, unsigned n, size_t stream);
int from_montgomery_proj_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_projective_t* d_inout, unsigned n, size_t stream);
int to_montgomery_aff_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_affine_t* d_inout, unsigned n, size_t stream);
int from_montgomery_aff_points_g2_cuda_{{.CurveNameLowerCase}}({{.CurveNameUpperCase}}_g2_affine_t* d_inout, unsigned n, size_t stream);
#ifdef __cplusplus
}
#endif
#endif /* _{{.CurveNameUpperCase}}_NTT_H */