mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-08 23:17:54 -05:00
ICICLE V2 Release (#492)
This PR introduces major updates for ICICLE Core, Rust and Golang bindings --------- Co-authored-by: Yuval Shekel <yshekel@gmail.com> Co-authored-by: DmytroTym <dmytrotym1@gmail.com> Co-authored-by: Otsar <122266060+Otsar-Raikou@users.noreply.github.com> Co-authored-by: VitaliiH <vhnatyk@gmail.com> Co-authored-by: release-bot <release-bot@ingonyama.com> Co-authored-by: Stas <spolonsky@icloud.com> Co-authored-by: Jeremy Felder <jeremy.felder1@gmail.com> Co-authored-by: ImmanuelSegol <3ditds@gmail.com> Co-authored-by: JimmyHongjichuan <45908291+JimmyHongjichuan@users.noreply.github.com> Co-authored-by: pierre <pierreuu@gmail.com> Co-authored-by: Leon Hibnik <107353745+LeonHibnik@users.noreply.github.com> Co-authored-by: nonam3e <timur@ingonyama.com> Co-authored-by: Vlad <88586482+vladfdp@users.noreply.github.com> Co-authored-by: LeonHibnik <leon@ingonyama.com> Co-authored-by: nonam3e <71525212+nonam3e@users.noreply.github.com> Co-authored-by: vladfdp <vlad.heintz@gmail.com>
This commit is contained in:
@@ -8,6 +8,8 @@ members = [
|
||||
"icicle-curves/icicle-bls12-381",
|
||||
"icicle-curves/icicle-bn254",
|
||||
"icicle-curves/icicle-grumpkin",
|
||||
"icicle-fields/icicle-babybear",
|
||||
"icicle-hash",
|
||||
]
|
||||
exclude = [
|
||||
"icicle-curves/icicle-curve-template",
|
||||
|
||||
@@ -19,6 +19,13 @@ ark-std = { version = "0.4.0", optional = true }
|
||||
|
||||
rayon = "1.8.1"
|
||||
|
||||
criterion = "0.3"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
serial_test = "3.0.0"
|
||||
|
||||
|
||||
[features]
|
||||
default = []
|
||||
arkworks = ["ark-ff", "ark-ec", "ark-poly", "ark-std"]
|
||||
|
||||
@@ -7,8 +7,10 @@ use ark_ec::models::CurveConfig as ArkCurveConfig;
|
||||
use ark_ec::short_weierstrass::{Affine as ArkAffine, Projective as ArkProjective, SWCurveConfig};
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_ec::AffineRepr;
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
use std::fmt::Debug;
|
||||
|
||||
pub trait Curve: Debug + PartialEq + Copy + Clone {
|
||||
@@ -24,9 +26,19 @@ pub trait Curve: Debug + PartialEq + Copy + Clone {
|
||||
#[doc(hidden)]
|
||||
fn generate_random_affine_points(size: usize) -> Vec<Affine<Self>>;
|
||||
#[doc(hidden)]
|
||||
fn convert_affine_montgomery(points: &mut HostOrDeviceSlice<Affine<Self>>, is_into: bool) -> CudaError;
|
||||
fn convert_affine_montgomery(
|
||||
points: *mut Affine<Self>,
|
||||
len: usize,
|
||||
is_into: bool,
|
||||
ctx: &DeviceContext,
|
||||
) -> CudaError;
|
||||
#[doc(hidden)]
|
||||
fn convert_projective_montgomery(points: &mut HostOrDeviceSlice<Projective<Self>>, is_into: bool) -> CudaError;
|
||||
fn convert_projective_montgomery(
|
||||
points: *mut Projective<Self>,
|
||||
len: usize,
|
||||
is_into: bool,
|
||||
ctx: &DeviceContext,
|
||||
) -> CudaError;
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
type ArkSWConfig: SWCurveConfig;
|
||||
@@ -77,6 +89,9 @@ impl<C: Curve> Affine<C> {
|
||||
|
||||
impl<C: Curve> From<Affine<C>> for Projective<C> {
|
||||
fn from(item: Affine<C>) -> Self {
|
||||
if item == (Affine::<C>::zero()) {
|
||||
return Self::zero();
|
||||
}
|
||||
Self {
|
||||
x: item.x,
|
||||
y: item.y,
|
||||
@@ -109,35 +124,67 @@ impl<C: Curve> Projective<C> {
|
||||
|
||||
impl<C: Curve> PartialEq for Projective<C> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
C::eq_proj(self as *const _, other as *const _)
|
||||
C::eq_proj(self as *const Self, other as *const Self)
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Curve> From<Projective<C>> for Affine<C> {
|
||||
fn from(proj: Projective<C>) -> Self {
|
||||
let mut aff = Self::zero();
|
||||
C::to_affine(&proj as *const _, &mut aff as *mut _);
|
||||
C::to_affine(&proj as *const Projective<C>, &mut aff as *mut Self);
|
||||
aff
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Curve> MontgomeryConvertible for Affine<C> {
|
||||
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
|
||||
C::convert_affine_montgomery(values, true)
|
||||
impl<'a, C: Curve> MontgomeryConvertible<'a> for Affine<C> {
|
||||
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
check_device(ctx.device_id);
|
||||
assert_eq!(
|
||||
values
|
||||
.device_id()
|
||||
.unwrap(),
|
||||
ctx.device_id,
|
||||
"Device ids are different in slice and context"
|
||||
);
|
||||
C::convert_affine_montgomery(unsafe { values.as_mut_ptr() }, values.len(), true, ctx)
|
||||
}
|
||||
|
||||
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
|
||||
C::convert_affine_montgomery(values, false)
|
||||
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
check_device(ctx.device_id);
|
||||
assert_eq!(
|
||||
values
|
||||
.device_id()
|
||||
.unwrap(),
|
||||
ctx.device_id,
|
||||
"Device ids are different in slice and context"
|
||||
);
|
||||
C::convert_affine_montgomery(unsafe { values.as_mut_ptr() }, values.len(), false, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Curve> MontgomeryConvertible for Projective<C> {
|
||||
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
|
||||
C::convert_projective_montgomery(values, true)
|
||||
impl<'a, C: Curve> MontgomeryConvertible<'a> for Projective<C> {
|
||||
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
check_device(ctx.device_id);
|
||||
assert_eq!(
|
||||
values
|
||||
.device_id()
|
||||
.unwrap(),
|
||||
ctx.device_id,
|
||||
"Device ids are different in slice and context"
|
||||
);
|
||||
C::convert_projective_montgomery(unsafe { values.as_mut_ptr() }, values.len(), true, ctx)
|
||||
}
|
||||
|
||||
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
|
||||
C::convert_projective_montgomery(values, false)
|
||||
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
check_device(ctx.device_id);
|
||||
assert_eq!(
|
||||
values
|
||||
.device_id()
|
||||
.unwrap(),
|
||||
ctx.device_id,
|
||||
"Device ids are different in slice and context"
|
||||
);
|
||||
C::convert_projective_montgomery(unsafe { values.as_mut_ptr() }, values.len(), false, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -228,22 +275,22 @@ macro_rules! impl_curve {
|
||||
use super::{$affine_type, $projective_type, CudaError, DeviceContext};
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($curve_prefix, "Eq")]
|
||||
#[link_name = concat!($curve_prefix, "_eq")]
|
||||
pub(crate) fn eq(point1: *const $projective_type, point2: *const $projective_type) -> bool;
|
||||
#[link_name = concat!($curve_prefix, "ToAffine")]
|
||||
#[link_name = concat!($curve_prefix, "_to_affine")]
|
||||
pub(crate) fn proj_to_affine(point: *const $projective_type, point_out: *mut $affine_type);
|
||||
#[link_name = concat!($curve_prefix, "GenerateProjectivePoints")]
|
||||
#[link_name = concat!($curve_prefix, "_generate_projective_points")]
|
||||
pub(crate) fn generate_projective_points(points: *mut $projective_type, size: usize);
|
||||
#[link_name = concat!($curve_prefix, "GenerateAffinePoints")]
|
||||
#[link_name = concat!($curve_prefix, "_generate_affine_points")]
|
||||
pub(crate) fn generate_affine_points(points: *mut $affine_type, size: usize);
|
||||
#[link_name = concat!($curve_prefix, "AffineConvertMontgomery")]
|
||||
#[link_name = concat!($curve_prefix, "_affine_convert_montgomery")]
|
||||
pub(crate) fn _convert_affine_montgomery(
|
||||
points: *mut $affine_type,
|
||||
size: usize,
|
||||
is_into: bool,
|
||||
ctx: *const DeviceContext,
|
||||
) -> CudaError;
|
||||
#[link_name = concat!($curve_prefix, "ProjectiveConvertMontgomery")]
|
||||
#[link_name = concat!($curve_prefix, "_projective_convert_montgomery")]
|
||||
pub(crate) fn _convert_projective_montgomery(
|
||||
points: *mut $projective_type,
|
||||
size: usize,
|
||||
@@ -284,27 +331,29 @@ macro_rules! impl_curve {
|
||||
res
|
||||
}
|
||||
|
||||
fn convert_affine_montgomery(points: &mut HostOrDeviceSlice<$affine_type>, is_into: bool) -> CudaError {
|
||||
fn convert_affine_montgomery(
|
||||
points: *mut $affine_type,
|
||||
len: usize,
|
||||
is_into: bool,
|
||||
ctx: &DeviceContext,
|
||||
) -> CudaError {
|
||||
unsafe {
|
||||
$curve_prefix_ident::_convert_affine_montgomery(
|
||||
points.as_mut_ptr(),
|
||||
points.len(),
|
||||
is_into,
|
||||
&DeviceContext::default() as *const _ as *const DeviceContext,
|
||||
)
|
||||
$curve_prefix_ident::_convert_affine_montgomery(points, len, is_into, ctx as *const DeviceContext)
|
||||
}
|
||||
}
|
||||
|
||||
fn convert_projective_montgomery(
|
||||
points: &mut HostOrDeviceSlice<$projective_type>,
|
||||
points: *mut $projective_type,
|
||||
len: usize,
|
||||
is_into: bool,
|
||||
ctx: &DeviceContext,
|
||||
) -> CudaError {
|
||||
unsafe {
|
||||
$curve_prefix_ident::_convert_projective_montgomery(
|
||||
points.as_mut_ptr(),
|
||||
points.len(),
|
||||
points,
|
||||
len,
|
||||
is_into,
|
||||
&DeviceContext::default() as *const _ as *const DeviceContext,
|
||||
ctx as *const DeviceContext,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -321,11 +370,6 @@ macro_rules! impl_curve_tests {
|
||||
$base_limbs:ident,
|
||||
$curve:ident
|
||||
) => {
|
||||
#[test]
|
||||
fn test_scalar_equality() {
|
||||
check_scalar_equality::<<$curve as Curve>::ScalarField>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_affine_projective_convert() {
|
||||
check_affine_projective_convert::<$curve>()
|
||||
|
||||
271
wrappers/rust/icicle-core/src/ecntt/mod.rs
Normal file
271
wrappers/rust/icicle-core/src/ecntt/mod.rs
Normal file
@@ -0,0 +1,271 @@
|
||||
#![cfg(feature = "ec_ntt")]
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
use crate::{
|
||||
curve::Curve,
|
||||
ntt::{FieldImpl, IcicleResult, NTTConfig, NTTDir},
|
||||
};
|
||||
|
||||
pub use crate::curve::Projective;
|
||||
|
||||
// #[cfg(feature = "arkworks")] //TODO: uncomment on correctness test
|
||||
#[doc(hidden)]
|
||||
pub mod tests;
|
||||
|
||||
#[doc(hidden)]
|
||||
pub trait ECNTT<C: Curve>: ECNTTUnchecked<Projective<C>, C::ScalarField> {}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub trait ECNTTUnchecked<T, F: FieldImpl> {
|
||||
fn ntt_unchecked(
|
||||
input: &(impl HostOrDeviceSlice<T> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<F>,
|
||||
output: &mut (impl HostOrDeviceSlice<T> + ?Sized),
|
||||
) -> IcicleResult<()>;
|
||||
|
||||
fn ntt_inplace_unchecked(
|
||||
inout: &mut (impl HostOrDeviceSlice<T> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<F>,
|
||||
) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_ecntt {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field_prefix_ident:ident,
|
||||
$field:ident,
|
||||
$field_config:ident,
|
||||
$curve:ident
|
||||
) => {
|
||||
mod $field_prefix_ident {
|
||||
use crate::curve;
|
||||
use crate::curve::BaseCfg;
|
||||
use crate::ecntt::IcicleResult;
|
||||
use crate::ecntt::Projective;
|
||||
use crate::ecntt::{
|
||||
$curve, $field, $field_config, CudaError, DeviceContext, NTTConfig, NTTDir, DEFAULT_DEVICE_ID,
|
||||
};
|
||||
use icicle_core::ecntt::ECNTTUnchecked;
|
||||
use icicle_core::ecntt::ECNTT;
|
||||
use icicle_core::impl_ntt_without_domain;
|
||||
use icicle_core::ntt::NTT;
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
pub type ProjectiveC = Projective<$curve>;
|
||||
impl_ntt_without_domain!(
|
||||
$field_prefix,
|
||||
$field,
|
||||
$field_config,
|
||||
ECNTTUnchecked,
|
||||
"_ecntt_",
|
||||
ProjectiveC
|
||||
);
|
||||
|
||||
impl ECNTT<$curve> for $field_config {}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Computes the ECNTT, or a batch of several ECNTTs.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `input` - inputs of the ECNTT.
|
||||
///
|
||||
/// * `dir` - whether to compute forward of inverse ECNTT.
|
||||
///
|
||||
/// * `cfg` - config used to specify extra arguments of the ECNTT.
|
||||
///
|
||||
/// * `output` - buffer to write the ECNTT outputs into. Must be of the same size as `input`.
|
||||
pub fn ecntt<C: Curve>(
|
||||
input: &(impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<C::ScalarField>,
|
||||
output: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
C::ScalarField: FieldImpl,
|
||||
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
|
||||
{
|
||||
<<C::ScalarField as FieldImpl>::Config as ECNTTUnchecked<Projective<C>, C::ScalarField>>::ntt_unchecked(
|
||||
input, dir, &cfg, output,
|
||||
)
|
||||
}
|
||||
|
||||
/// Computes the ECNTT, or a batch of several ECNTTs inplace.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `inout` - buffer with inputs to also write the ECNTT outputs into.
|
||||
///
|
||||
/// * `dir` - whether to compute forward of inverse ECNTT.
|
||||
///
|
||||
/// * `cfg` - config used to specify extra arguments of the ECNTT.
|
||||
pub fn ecntt_inplace<C: Curve>(
|
||||
inout: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<C::ScalarField>,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
C::ScalarField: FieldImpl,
|
||||
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
|
||||
{
|
||||
<<C::ScalarField as FieldImpl>::Config as ECNTTUnchecked<Projective<C>, C::ScalarField>>::ntt_inplace_unchecked(
|
||||
inout, dir, &cfg,
|
||||
)
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_ecntt_tests {
|
||||
(
|
||||
$field:ident,
|
||||
$curve:ident
|
||||
) => {
|
||||
use icicle_core::ntt::tests::init_domain;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
const MAX_SIZE: u64 = 1 << 18;
|
||||
static INIT: OnceLock<()> = OnceLock::new();
|
||||
const FAST_TWIDDLES_MODE: bool = false;
|
||||
|
||||
#[test]
|
||||
fn test_ecntt() {
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
check_ecntt::<$curve>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ecntt_batch() {
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
check_ecntt_batch::<$curve>()
|
||||
}
|
||||
|
||||
// #[test] //TODO: multi-device test
|
||||
// fn test_ntt_device_async() {
|
||||
// // init_domain is in this test is performed per-device
|
||||
// check_ecntt_device_async::<$field>()
|
||||
// }
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_ecntt_bench {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field:ident,
|
||||
$curve:ident
|
||||
) => {
|
||||
use icicle_core::ntt::ntt;
|
||||
use icicle_core::ntt::NTTDomain;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use icicle_core::{
|
||||
curve::Curve,
|
||||
ecntt::{ecntt, Projective},
|
||||
ntt::{FieldImpl, NTTConfig, NTTDir, NttAlgorithm, Ordering},
|
||||
traits::ArkConvertible,
|
||||
};
|
||||
|
||||
use icicle_core::ecntt::ECNTT;
|
||||
use icicle_core::ntt::NTT;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
fn ecntt_for_bench<C: Curve>(
|
||||
points: &(impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
mut batch_ntt_result: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
test_sizes: usize,
|
||||
batch_size: usize,
|
||||
is_inverse: NTTDir,
|
||||
ordering: Ordering,
|
||||
config: &mut NTTConfig<C::ScalarField>,
|
||||
_seed: u32,
|
||||
) where
|
||||
C::ScalarField: ArkConvertible,
|
||||
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
|
||||
<C::ScalarField as FieldImpl>::Config: NTTDomain<C::ScalarField>,
|
||||
{
|
||||
ecntt(points, is_inverse, config, batch_ntt_result).unwrap();
|
||||
}
|
||||
|
||||
static INIT: OnceLock<()> = OnceLock::new();
|
||||
|
||||
fn benchmark_ecntt<C: Curve>(c: &mut Criterion)
|
||||
where
|
||||
C::ScalarField: ArkConvertible,
|
||||
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
|
||||
<C::ScalarField as FieldImpl>::Config: NTTDomain<C::ScalarField>,
|
||||
{
|
||||
use criterion::SamplingMode;
|
||||
use icicle_core::ntt::ntt;
|
||||
use icicle_core::ntt::tests::init_domain;
|
||||
use icicle_core::ntt::NTTDomain;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
|
||||
let group_id = format!("{} EC NTT", $field_prefix);
|
||||
let mut group = c.benchmark_group(&group_id);
|
||||
group.sampling_mode(SamplingMode::Flat);
|
||||
group.sample_size(10);
|
||||
|
||||
const MAX_SIZE: u64 = 1 << 18;
|
||||
const FAST_TWIDDLES_MODE: bool = false;
|
||||
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
|
||||
let test_sizes = [1 << 4, 1 << 8];
|
||||
let batch_sizes = [1, 1 << 4, 128];
|
||||
for test_size in test_sizes {
|
||||
for batch_size in batch_sizes {
|
||||
let points = C::generate_random_projective_points(test_size);
|
||||
let points = HostSlice::from_slice(&points);
|
||||
let mut batch_ntt_result = vec![Projective::<C>::zero(); batch_size * test_size];
|
||||
let batch_ntt_result = HostSlice::from_mut_slice(&mut batch_ntt_result);
|
||||
let mut config = NTTConfig::default();
|
||||
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
|
||||
for ordering in [
|
||||
Ordering::kNN,
|
||||
// Ordering::kNR, // times are ~ same as kNN
|
||||
// Ordering::kRN,
|
||||
// Ordering::kRR,
|
||||
// Ordering::kNM, // no mixed radix ecntt
|
||||
// Ordering::kMN,
|
||||
] {
|
||||
config.ordering = ordering;
|
||||
for alg in [NttAlgorithm::Radix2] {
|
||||
config.batch_size = batch_size as i32;
|
||||
config.ntt_algorithm = alg;
|
||||
let bench_descr = format!(
|
||||
"{:?} {:?} {:?} {} x {}",
|
||||
alg, ordering, is_inverse, test_size, batch_size
|
||||
);
|
||||
group.bench_function(&bench_descr, |b| {
|
||||
b.iter(|| {
|
||||
ecntt_for_bench::<C>(
|
||||
points,
|
||||
batch_ntt_result,
|
||||
test_size,
|
||||
batch_size,
|
||||
is_inverse,
|
||||
ordering,
|
||||
&mut config,
|
||||
black_box(1),
|
||||
)
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, benchmark_ecntt<$curve>);
|
||||
criterion_main!(benches);
|
||||
};
|
||||
}
|
||||
92
wrappers/rust/icicle-core/src/ecntt/tests.rs
Normal file
92
wrappers/rust/icicle-core/src/ecntt/tests.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
#![cfg(feature = "ec_ntt")]
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
use crate::curve::Curve;
|
||||
use crate::curve::*;
|
||||
use crate::{
|
||||
ecntt::*,
|
||||
ntt::{NTTDir, NttAlgorithm, Ordering},
|
||||
traits::FieldImpl,
|
||||
};
|
||||
|
||||
use crate::ntt::NTTConfig;
|
||||
|
||||
pub fn check_ecntt<C: Curve>()
|
||||
where
|
||||
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
|
||||
{
|
||||
let test_sizes = [1 << 4, 1 << 9];
|
||||
for test_size in test_sizes {
|
||||
let points = C::generate_random_projective_points(test_size);
|
||||
|
||||
let slice = &points.clone();
|
||||
let config: NTTConfig<'_, C::ScalarField> = NTTConfig::default();
|
||||
let mut out_p = vec![Projective::<C>::zero(); test_size];
|
||||
let ecntt_result = HostSlice::from_mut_slice(&mut out_p);
|
||||
let input = HostSlice::from_slice(slice);
|
||||
ecntt(input, NTTDir::kForward, &config, ecntt_result).unwrap();
|
||||
assert_ne!(ecntt_result.as_slice(), points);
|
||||
|
||||
let mut slice = vec![Projective::<C>::zero(); test_size];
|
||||
let iecntt_result = HostSlice::from_mut_slice(&mut slice);
|
||||
ecntt(ecntt_result, NTTDir::kInverse, &config, iecntt_result).unwrap();
|
||||
|
||||
assert_eq!(iecntt_result.as_slice(), points);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_ecntt_batch<C: Curve>()
|
||||
where
|
||||
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
|
||||
{
|
||||
let test_sizes = [1 << 4, 1 << 9];
|
||||
let batch_sizes = [1, 1 << 4, 21];
|
||||
for test_size in test_sizes {
|
||||
// let coset_generators = [F::one(), F::Config::generate_random(1)[0]];
|
||||
let mut config: NTTConfig<'_, C::ScalarField> = NTTConfig::default();
|
||||
for batch_size in batch_sizes {
|
||||
let slice = &C::generate_random_projective_points(test_size * batch_size);
|
||||
let points = HostSlice::from_slice(slice);
|
||||
|
||||
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
|
||||
for ordering in [
|
||||
Ordering::kNN, // ~same performance
|
||||
// Ordering::kNR,
|
||||
// Ordering::kRN,
|
||||
// Ordering::kRR,
|
||||
// Ordering::kNM, // no mixed radix ecntt
|
||||
// Ordering::kMN,
|
||||
] {
|
||||
config.ordering = ordering;
|
||||
let mut slice = vec![Projective::zero(); batch_size * test_size];
|
||||
let batch_ntt_result = HostSlice::from_mut_slice(&mut slice);
|
||||
for alg in [NttAlgorithm::Radix2] {
|
||||
config.batch_size = batch_size as i32;
|
||||
config.ntt_algorithm = alg;
|
||||
ecntt(points, is_inverse, &config, batch_ntt_result).unwrap();
|
||||
config.batch_size = 1;
|
||||
let mut slice = vec![Projective::zero(); test_size];
|
||||
let one_ntt_result = HostSlice::from_mut_slice(&mut slice);
|
||||
for i in 0..batch_size {
|
||||
ecntt(
|
||||
HostSlice::from_slice(
|
||||
&points[i * test_size..(i + 1) * test_size]
|
||||
.as_slice()
|
||||
.to_vec(),
|
||||
),
|
||||
is_inverse,
|
||||
&config,
|
||||
one_ntt_result,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
batch_ntt_result[i * test_size..(i + 1) * test_size].as_slice(),
|
||||
one_ntt_result.as_slice()
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,15 +3,16 @@ use crate::traits::ArkConvertible;
|
||||
use crate::traits::{FieldConfig, FieldImpl, MontgomeryConvertible};
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_ff::{BigInteger, Field as ArkField, PrimeField};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::DeviceSlice;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
#[derive(PartialEq, Copy, Clone)]
|
||||
#[repr(C)]
|
||||
pub struct Field<const NUM_LIMBS: usize, F: FieldConfig> {
|
||||
limbs: [u64; NUM_LIMBS],
|
||||
limbs: [u32; NUM_LIMBS],
|
||||
p: PhantomData<F>,
|
||||
}
|
||||
|
||||
@@ -26,7 +27,7 @@ impl<const NUM_LIMBS: usize, F: FieldConfig> Display for Field<NUM_LIMBS, F> {
|
||||
.iter()
|
||||
.rev()
|
||||
{
|
||||
write!(f, "{:016x}", b)?;
|
||||
write!(f, "{:08x}", b)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -38,21 +39,21 @@ impl<const NUM_LIMBS: usize, F: FieldConfig> Debug for Field<NUM_LIMBS, F> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize, F: FieldConfig> Into<[u64; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
|
||||
fn into(self) -> [u64; NUM_LIMBS] {
|
||||
impl<const NUM_LIMBS: usize, F: FieldConfig> Into<[u32; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
|
||||
fn into(self) -> [u32; NUM_LIMBS] {
|
||||
self.limbs
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize, F: FieldConfig> From<[u64; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
|
||||
fn from(limbs: [u64; NUM_LIMBS]) -> Self {
|
||||
impl<const NUM_LIMBS: usize, F: FieldConfig> From<[u32; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
|
||||
fn from(limbs: [u32; NUM_LIMBS]) -> Self {
|
||||
Self { limbs, p: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize, F: FieldConfig> FieldImpl for Field<NUM_LIMBS, F> {
|
||||
type Config = F;
|
||||
type Repr = [u64; NUM_LIMBS];
|
||||
type Repr = [u32; NUM_LIMBS];
|
||||
|
||||
fn to_bytes_le(&self) -> Vec<u8> {
|
||||
self.limbs
|
||||
@@ -68,49 +69,50 @@ impl<const NUM_LIMBS: usize, F: FieldConfig> FieldImpl for Field<NUM_LIMBS, F> {
|
||||
// please note that this function zero-pads if there are not enough bytes
|
||||
// and only takes the first bytes in there are too many of them
|
||||
fn from_bytes_le(bytes: &[u8]) -> Self {
|
||||
let mut limbs: [u64; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
let mut limbs: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
|
||||
for (i, chunk) in bytes
|
||||
.chunks(8)
|
||||
.chunks(4)
|
||||
.take(NUM_LIMBS)
|
||||
.enumerate()
|
||||
{
|
||||
let mut chunk_array: [u8; 8] = [0; 8];
|
||||
let mut chunk_array: [u8; 4] = [0; 4];
|
||||
chunk_array[..chunk.len()].clone_from_slice(chunk);
|
||||
limbs[i] = u64::from_le_bytes(chunk_array);
|
||||
limbs[i] = u32::from_le_bytes(chunk_array);
|
||||
}
|
||||
Self::from(limbs)
|
||||
}
|
||||
|
||||
fn zero() -> Self {
|
||||
Field {
|
||||
limbs: [0u64; NUM_LIMBS],
|
||||
p: PhantomData,
|
||||
}
|
||||
FieldImpl::from_u32(0)
|
||||
}
|
||||
|
||||
fn one() -> Self {
|
||||
let mut limbs = [0u64; NUM_LIMBS];
|
||||
limbs[0] = 1;
|
||||
FieldImpl::from_u32(1)
|
||||
}
|
||||
|
||||
fn from_u32(val: u32) -> Self {
|
||||
let mut limbs = [0u32; NUM_LIMBS];
|
||||
limbs[0] = val;
|
||||
Field { limbs, p: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub trait MontgomeryConvertibleField<F: FieldImpl> {
|
||||
fn to_mont(values: &mut HostOrDeviceSlice<F>) -> CudaError;
|
||||
fn from_mont(values: &mut HostOrDeviceSlice<F>) -> CudaError;
|
||||
pub trait MontgomeryConvertibleField<'a, F: FieldImpl> {
|
||||
fn to_mont(values: &mut DeviceSlice<F>, ctx: &DeviceContext<'a>) -> CudaError;
|
||||
fn from_mont(values: &mut DeviceSlice<F>, ctx: &DeviceContext<'a>) -> CudaError;
|
||||
}
|
||||
|
||||
impl<const NUM_LIMBS: usize, F: FieldConfig> MontgomeryConvertible for Field<NUM_LIMBS, F>
|
||||
impl<'a, const NUM_LIMBS: usize, F: FieldConfig> MontgomeryConvertible<'a> for Field<NUM_LIMBS, F>
|
||||
where
|
||||
F: MontgomeryConvertibleField<Self>,
|
||||
F: MontgomeryConvertibleField<'a, Self>,
|
||||
{
|
||||
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
|
||||
F::to_mont(values)
|
||||
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
F::to_mont(values, ctx)
|
||||
}
|
||||
|
||||
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
|
||||
F::from_mont(values)
|
||||
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
F::from_mont(values, ctx)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -168,13 +170,13 @@ macro_rules! impl_scalar_field {
|
||||
impl_field!($num_limbs, $field_name, $field_cfg, $ark_equiv);
|
||||
|
||||
mod $field_prefix_ident {
|
||||
use crate::curve::{$field_name, CudaError, DeviceContext, HostOrDeviceSlice};
|
||||
use super::{$field_name, CudaError, DeviceContext, HostOrDeviceSlice};
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "GenerateScalars")]
|
||||
#[link_name = concat!($field_prefix, "_generate_scalars")]
|
||||
pub(crate) fn generate_scalars(scalars: *mut $field_name, size: usize);
|
||||
|
||||
#[link_name = concat!($field_prefix, "ScalarConvertMontgomery")]
|
||||
#[link_name = concat!($field_prefix, "_scalar_convert_montgomery")]
|
||||
fn _convert_scalars_montgomery(
|
||||
scalars: *mut $field_name,
|
||||
size: usize,
|
||||
@@ -184,17 +186,12 @@ macro_rules! impl_scalar_field {
|
||||
}
|
||||
|
||||
pub(crate) fn convert_scalars_montgomery(
|
||||
scalars: &mut HostOrDeviceSlice<$field_name>,
|
||||
scalars: *mut $field_name,
|
||||
len: usize,
|
||||
is_into: bool,
|
||||
ctx: &DeviceContext,
|
||||
) -> CudaError {
|
||||
unsafe {
|
||||
_convert_scalars_montgomery(
|
||||
scalars.as_mut_ptr(),
|
||||
scalars.len(),
|
||||
is_into,
|
||||
&DeviceContext::default() as *const _ as *const DeviceContext,
|
||||
)
|
||||
}
|
||||
unsafe { _convert_scalars_montgomery(scalars, len, is_into, ctx as *const DeviceContext) }
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,13 +203,34 @@ macro_rules! impl_scalar_field {
|
||||
}
|
||||
}
|
||||
|
||||
impl MontgomeryConvertibleField<$field_name> for $field_cfg {
|
||||
fn to_mont(values: &mut HostOrDeviceSlice<$field_name>) -> CudaError {
|
||||
$field_prefix_ident::convert_scalars_montgomery(values, true)
|
||||
impl<'a> MontgomeryConvertibleField<'a, $field_name> for $field_cfg {
|
||||
fn to_mont(values: &mut DeviceSlice<$field_name>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
check_device(ctx.device_id);
|
||||
assert_eq!(
|
||||
values
|
||||
.device_id()
|
||||
.unwrap(),
|
||||
ctx.device_id,
|
||||
"Device ids are different in slice and context"
|
||||
);
|
||||
$field_prefix_ident::convert_scalars_montgomery(unsafe { values.as_mut_ptr() }, values.len(), true, ctx)
|
||||
}
|
||||
|
||||
fn from_mont(values: &mut HostOrDeviceSlice<$field_name>) -> CudaError {
|
||||
$field_prefix_ident::convert_scalars_montgomery(values, false)
|
||||
fn from_mont(values: &mut DeviceSlice<$field_name>, ctx: &DeviceContext<'a>) -> CudaError {
|
||||
check_device(ctx.device_id);
|
||||
assert_eq!(
|
||||
values
|
||||
.device_id()
|
||||
.unwrap(),
|
||||
ctx.device_id,
|
||||
"Device ids are different in slice and context"
|
||||
);
|
||||
$field_prefix_ident::convert_scalars_montgomery(
|
||||
unsafe { values.as_mut_ptr() },
|
||||
values.len(),
|
||||
false,
|
||||
ctx,
|
||||
)
|
||||
}
|
||||
}
|
||||
};
|
||||
@@ -227,5 +245,10 @@ macro_rules! impl_field_tests {
|
||||
fn test_field_convert_montgomery() {
|
||||
check_field_convert_montgomery::<$field_name>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_field_equality() {
|
||||
check_field_equality::<$field_name>()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
pub mod curve;
|
||||
pub mod ecntt;
|
||||
pub mod error;
|
||||
pub mod field;
|
||||
pub mod msm;
|
||||
pub mod ntt;
|
||||
pub mod polynomials;
|
||||
pub mod poseidon;
|
||||
#[cfg(feature = "arkworks")]
|
||||
#[doc(hidden)]
|
||||
pub mod tests;
|
||||
pub mod traits;
|
||||
@@ -13,6 +14,6 @@ pub mod vec_ops;
|
||||
|
||||
pub trait SNARKCurve: curve::Curve + msm::MSM<Self>
|
||||
where
|
||||
<Self::ScalarField as traits::FieldImpl>::Config: ntt::NTT<Self::ScalarField>,
|
||||
<Self::ScalarField as traits::FieldImpl>::Config: ntt::NTT<Self::ScalarField, Self::ScalarField>,
|
||||
{
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use crate::curve::{Affine, Curve, Projective};
|
||||
use crate::error::IcicleResult;
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::{DeviceContext, DEFAULT_DEVICE_ID};
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
#[doc(hidden)]
|
||||
@@ -91,18 +92,18 @@ impl<'a> MSMConfig<'a> {
|
||||
#[doc(hidden)]
|
||||
pub trait MSM<C: Curve> {
|
||||
fn msm_unchecked(
|
||||
scalars: &HostOrDeviceSlice<C::ScalarField>,
|
||||
points: &HostOrDeviceSlice<Affine<C>>,
|
||||
scalars: &(impl HostOrDeviceSlice<C::ScalarField> + ?Sized),
|
||||
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
|
||||
cfg: &MSMConfig,
|
||||
results: &mut HostOrDeviceSlice<Projective<C>>,
|
||||
results: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
) -> IcicleResult<()>;
|
||||
|
||||
fn precompute_bases_unchecked(
|
||||
points: &HostOrDeviceSlice<Affine<C>>,
|
||||
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
|
||||
precompute_factor: i32,
|
||||
_c: i32,
|
||||
ctx: &DeviceContext,
|
||||
output_bases: &mut HostOrDeviceSlice<Affine<C>>,
|
||||
output_bases: &mut DeviceSlice<Affine<C>>,
|
||||
) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
@@ -122,10 +123,10 @@ pub trait MSM<C: Curve> {
|
||||
///
|
||||
/// Returns `Ok(())` if no errors occurred or a `CudaError` otherwise.
|
||||
pub fn msm<C: Curve + MSM<C>>(
|
||||
scalars: &HostOrDeviceSlice<C::ScalarField>,
|
||||
points: &HostOrDeviceSlice<Affine<C>>,
|
||||
scalars: &(impl HostOrDeviceSlice<C::ScalarField> + ?Sized),
|
||||
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
|
||||
cfg: &MSMConfig,
|
||||
results: &mut HostOrDeviceSlice<Projective<C>>,
|
||||
results: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
|
||||
) -> IcicleResult<()> {
|
||||
if points.len() % (cfg.precompute_factor as usize) != 0 {
|
||||
panic!(
|
||||
@@ -149,6 +150,28 @@ pub fn msm<C: Curve + MSM<C>>(
|
||||
scalars.len()
|
||||
);
|
||||
}
|
||||
let ctx_device_id = cfg
|
||||
.ctx
|
||||
.device_id;
|
||||
if let Some(device_id) = scalars.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in scalars and context are different"
|
||||
);
|
||||
}
|
||||
if let Some(device_id) = points.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in points and context are different"
|
||||
);
|
||||
}
|
||||
if let Some(device_id) = results.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in results and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
let mut local_cfg = cfg.clone();
|
||||
local_cfg.points_size = points_size as i32;
|
||||
local_cfg.batch_size = results.len() as i32;
|
||||
@@ -179,11 +202,11 @@ pub fn msm<C: Curve + MSM<C>>(
|
||||
///
|
||||
/// Returns `Ok(())` if no errors occurred or a `CudaError` otherwise.
|
||||
pub fn precompute_bases<C: Curve + MSM<C>>(
|
||||
points: &HostOrDeviceSlice<Affine<C>>,
|
||||
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
|
||||
precompute_factor: i32,
|
||||
_c: i32,
|
||||
ctx: &DeviceContext,
|
||||
output_bases: &mut HostOrDeviceSlice<Affine<C>>,
|
||||
output_bases: &mut DeviceSlice<Affine<C>>,
|
||||
) -> IcicleResult<()> {
|
||||
assert_eq!(
|
||||
output_bases.len(),
|
||||
@@ -208,7 +231,7 @@ macro_rules! impl_msm {
|
||||
use super::{$curve, Affine, CudaError, Curve, DeviceContext, MSMConfig, Projective};
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($curve_prefix, "MSMCuda")]
|
||||
#[link_name = concat!($curve_prefix, "_msm_cuda")]
|
||||
pub(crate) fn msm_cuda(
|
||||
scalars: *const <$curve as Curve>::ScalarField,
|
||||
points: *const Affine<$curve>,
|
||||
@@ -217,7 +240,7 @@ macro_rules! impl_msm {
|
||||
out: *mut Projective<$curve>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($curve_prefix, "PrecomputeMSMBases")]
|
||||
#[link_name = concat!($curve_prefix, "_precompute_msm_bases_cuda")]
|
||||
pub(crate) fn precompute_bases_cuda(
|
||||
points: *const Affine<$curve>,
|
||||
bases_size: i32,
|
||||
@@ -232,10 +255,10 @@ macro_rules! impl_msm {
|
||||
|
||||
impl MSM<$curve> for $curve {
|
||||
fn msm_unchecked(
|
||||
scalars: &HostOrDeviceSlice<<$curve as Curve>::ScalarField>,
|
||||
points: &HostOrDeviceSlice<Affine<$curve>>,
|
||||
scalars: &(impl HostOrDeviceSlice<<$curve as Curve>::ScalarField> + ?Sized),
|
||||
points: &(impl HostOrDeviceSlice<Affine<$curve>> + ?Sized),
|
||||
cfg: &MSMConfig,
|
||||
results: &mut HostOrDeviceSlice<Projective<$curve>>,
|
||||
results: &mut (impl HostOrDeviceSlice<Projective<$curve>> + ?Sized),
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$curve_prefix_indent::msm_cuda(
|
||||
@@ -250,11 +273,11 @@ macro_rules! impl_msm {
|
||||
}
|
||||
|
||||
fn precompute_bases_unchecked(
|
||||
points: &HostOrDeviceSlice<Affine<$curve>>,
|
||||
points: &(impl HostOrDeviceSlice<Affine<$curve>> + ?Sized),
|
||||
precompute_factor: i32,
|
||||
_c: i32,
|
||||
ctx: &DeviceContext,
|
||||
output_bases: &mut HostOrDeviceSlice<Affine<$curve>>,
|
||||
output_bases: &mut DeviceSlice<Affine<$curve>>,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$curve_prefix_indent::precompute_bases_cuda(
|
||||
|
||||
@@ -2,7 +2,7 @@ use crate::curve::{Affine, Curve, Projective};
|
||||
use crate::msm::{msm, precompute_bases, MSMConfig, MSM};
|
||||
use crate::traits::{FieldImpl, GenerateRandom};
|
||||
use icicle_cuda_runtime::device::{get_device_count, set_device, warmup};
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
|
||||
use icicle_cuda_runtime::stream::CudaStream;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
@@ -42,7 +42,7 @@ where
|
||||
|
||||
set_device(device_id).unwrap();
|
||||
let test_sizes = [4, 8, 16, 32, 64, 128, 256, 1000, 1 << 18];
|
||||
let mut msm_results = HostOrDeviceSlice::cuda_malloc(1).unwrap();
|
||||
let mut msm_results = DeviceVec::<Projective<C>>::cuda_malloc_for_device(1, device_id).unwrap();
|
||||
for test_size in test_sizes {
|
||||
let points = generate_random_affine_points_with_zeroes(test_size, 2);
|
||||
let scalars = <C::ScalarField as FieldImpl>::Config::generate_random(test_size);
|
||||
@@ -58,10 +58,10 @@ where
|
||||
// (just beware the possible extra flag in affine point types, can't transmute ark Affine because of that)
|
||||
let scalars_mont = unsafe { &*(&scalars_ark[..] as *const _ as *const [C::ScalarField]) };
|
||||
|
||||
let mut scalars_d = HostOrDeviceSlice::cuda_malloc(test_size).unwrap();
|
||||
let mut scalars_d = DeviceVec::<C::ScalarField>::cuda_malloc(test_size).unwrap();
|
||||
let stream = CudaStream::create().unwrap();
|
||||
scalars_d
|
||||
.copy_from_host_async(&scalars_mont, &stream)
|
||||
.copy_from_host_async(HostSlice::from_slice(&scalars_mont), &stream)
|
||||
.unwrap();
|
||||
|
||||
let mut cfg = MSMConfig::default_for_device(device_id);
|
||||
@@ -69,17 +69,23 @@ where
|
||||
.stream = &stream;
|
||||
cfg.is_async = true;
|
||||
cfg.are_scalars_montgomery_form = true;
|
||||
msm(&scalars_d, &HostOrDeviceSlice::on_host(points), &cfg, &mut msm_results).unwrap();
|
||||
msm(
|
||||
&scalars_d[..],
|
||||
HostSlice::from_slice(&points),
|
||||
&cfg,
|
||||
&mut msm_results[..],
|
||||
)
|
||||
.unwrap();
|
||||
// need to make sure that scalars_d weren't mutated by the previous call
|
||||
let mut scalars_mont_after = vec![C::ScalarField::zero(); test_size];
|
||||
scalars_d
|
||||
.copy_to_host_async(&mut scalars_mont_after, &stream)
|
||||
.copy_to_host_async(HostSlice::from_mut_slice(&mut scalars_mont_after), &stream)
|
||||
.unwrap();
|
||||
assert_eq!(scalars_mont, scalars_mont_after);
|
||||
|
||||
let mut msm_host_result = vec![Projective::<C>::zero(); 1];
|
||||
msm_results
|
||||
.copy_to_host(&mut msm_host_result[..])
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut msm_host_result[..]))
|
||||
.unwrap();
|
||||
stream
|
||||
.synchronize()
|
||||
@@ -118,10 +124,9 @@ where
|
||||
for test_size in test_sizes {
|
||||
let precompute_factor = 8;
|
||||
let points = generate_random_affine_points_with_zeroes(test_size, 10);
|
||||
let points_h = HostOrDeviceSlice::on_host(points.clone());
|
||||
let mut precomputed_points_d = HostOrDeviceSlice::cuda_malloc(precompute_factor * test_size).unwrap();
|
||||
let mut precomputed_points_d = DeviceVec::cuda_malloc(precompute_factor * test_size).unwrap();
|
||||
precompute_bases(
|
||||
&points_h,
|
||||
HostSlice::from_slice(&points),
|
||||
precompute_factor as i32,
|
||||
0,
|
||||
&cfg.ctx,
|
||||
@@ -135,27 +140,27 @@ where
|
||||
.take(batch_size)
|
||||
.flatten()
|
||||
.collect();
|
||||
let scalars_h = HostOrDeviceSlice::on_host(scalars);
|
||||
let scalars_h = HostSlice::from_slice(&scalars);
|
||||
|
||||
let mut msm_results_1 = HostOrDeviceSlice::cuda_malloc(batch_size).unwrap();
|
||||
let mut msm_results_2 = HostOrDeviceSlice::cuda_malloc(batch_size).unwrap();
|
||||
let mut points_d = HostOrDeviceSlice::cuda_malloc(test_size * batch_size).unwrap();
|
||||
let mut msm_results_1 = DeviceVec::<Projective<C>>::cuda_malloc(batch_size).unwrap();
|
||||
let mut msm_results_2 = DeviceVec::<Projective<C>>::cuda_malloc(batch_size).unwrap();
|
||||
let mut points_d = DeviceVec::<Affine<C>>::cuda_malloc(test_size * batch_size).unwrap();
|
||||
points_d
|
||||
.copy_from_host_async(&points_cloned, &stream)
|
||||
.copy_from_host_async(HostSlice::from_slice(&points_cloned), &stream)
|
||||
.unwrap();
|
||||
|
||||
cfg.precompute_factor = precompute_factor as i32;
|
||||
msm(&scalars_h, &precomputed_points_d, &cfg, &mut msm_results_1).unwrap();
|
||||
msm(scalars_h, &precomputed_points_d[..], &cfg, &mut msm_results_1[..]).unwrap();
|
||||
cfg.precompute_factor = 1;
|
||||
msm(&scalars_h, &points_d, &cfg, &mut msm_results_2).unwrap();
|
||||
msm(scalars_h, &points_d[..], &cfg, &mut msm_results_2[..]).unwrap();
|
||||
|
||||
let mut msm_host_result_1 = vec![Projective::<C>::zero(); batch_size];
|
||||
let mut msm_host_result_2 = vec![Projective::<C>::zero(); batch_size];
|
||||
msm_results_1
|
||||
.copy_to_host_async(&mut msm_host_result_1[..], &stream)
|
||||
.copy_to_host_async(HostSlice::from_mut_slice(&mut msm_host_result_1), &stream)
|
||||
.unwrap();
|
||||
msm_results_2
|
||||
.copy_to_host_async(&mut msm_host_result_2[..], &stream)
|
||||
.copy_to_host_async(HostSlice::from_mut_slice(&mut msm_host_result_2), &stream)
|
||||
.unwrap();
|
||||
stream
|
||||
.synchronize()
|
||||
@@ -166,7 +171,6 @@ where
|
||||
.map(|x| x.to_ark())
|
||||
.collect();
|
||||
let scalars_ark: Vec<_> = scalars_h
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|x| x.to_ark())
|
||||
.collect();
|
||||
@@ -217,17 +221,17 @@ where
|
||||
.map(|x| x.to_ark())
|
||||
.collect();
|
||||
|
||||
let mut msm_results = HostOrDeviceSlice::on_host(vec![Projective::<C>::zero(); batch_size]);
|
||||
let mut msm_results = vec![Projective::<C>::zero(); batch_size];
|
||||
|
||||
let mut cfg = MSMConfig::default();
|
||||
if test_size < test_threshold {
|
||||
cfg.bitsize = 1;
|
||||
}
|
||||
msm(
|
||||
&HostOrDeviceSlice::on_host(scalars),
|
||||
&HostOrDeviceSlice::on_host(points),
|
||||
HostSlice::from_slice(&scalars),
|
||||
HostSlice::from_slice(&points),
|
||||
&cfg,
|
||||
&mut msm_results,
|
||||
HostSlice::from_mut_slice(&mut msm_results),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
@@ -238,7 +242,7 @@ where
|
||||
{
|
||||
let msm_result_ark: ark_ec::models::short_weierstrass::Projective<C::ArkSWConfig> =
|
||||
VariableBaseMSM::msm(&points_chunk, &scalars_chunk).unwrap();
|
||||
assert_eq!(msm_results.as_slice()[i].to_ark(), msm_result_ark);
|
||||
assert_eq!(msm_results[i].to_ark(), msm_result_ark);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::{DeviceContext, DEFAULT_DEVICE_ID};
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
use crate::{error::IcicleResult, traits::FieldImpl};
|
||||
pub use crate::{error::IcicleResult, traits::FieldImpl};
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
#[doc(hidden)]
|
||||
@@ -71,7 +72,7 @@ pub enum NttAlgorithm {
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct NTTConfig<'a, S> {
|
||||
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
|
||||
/// Details related to the device such as its id and stream id. See [DeviceContext](DeviceContext).
|
||||
pub ctx: DeviceContext<'a>,
|
||||
/// Coset generator. Used to perform coset (i)NTTs. Default value: `S::one()` (corresponding to no coset being used).
|
||||
pub coset_gen: S,
|
||||
@@ -79,15 +80,15 @@ pub struct NTTConfig<'a, S> {
|
||||
pub batch_size: i32,
|
||||
/// If true the function will compute the NTTs over the columns of the input matrix and not over the rows.
|
||||
pub columns_batch: bool,
|
||||
/// Ordering of inputs and outputs. See [Ordering](@ref Ordering). Default value: `Ordering::kNN`.
|
||||
/// Ordering of inputs and outputs. See [Ordering](Ordering). Default value: `Ordering::kNN`.
|
||||
pub ordering: Ordering,
|
||||
are_inputs_on_device: bool,
|
||||
are_outputs_on_device: bool,
|
||||
pub are_inputs_on_device: bool,
|
||||
pub are_outputs_on_device: bool,
|
||||
/// Whether to run the NTT asynchronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize
|
||||
/// it explicitly by running `stream.synchronize()`. If set to false, the NTT function will block the current CPU thread.
|
||||
pub is_async: bool,
|
||||
/// Explicitly select the NTT algorithm. Default value: Auto (the implementation selects radix-2 or mixed-radix algorithm based
|
||||
/// on heuristics
|
||||
/// on heuristics).
|
||||
pub ntt_algorithm: NttAlgorithm,
|
||||
}
|
||||
|
||||
@@ -114,17 +115,25 @@ impl<'a, S: FieldImpl> NTTConfig<'a, S> {
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub trait NTT<F: FieldImpl> {
|
||||
pub trait NTTDomain<F: FieldImpl> {
|
||||
fn get_root_of_unity(max_size: u64) -> F;
|
||||
fn initialize_domain(primitive_root: F, ctx: &DeviceContext, fast_twiddles: bool) -> IcicleResult<()>;
|
||||
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
#[doc(hidden)]
|
||||
pub trait NTT<T, F: FieldImpl>: NTTDomain<F> {
|
||||
fn ntt_unchecked(
|
||||
input: &HostOrDeviceSlice<F>,
|
||||
input: &(impl HostOrDeviceSlice<T> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<F>,
|
||||
output: &mut (impl HostOrDeviceSlice<T> + ?Sized),
|
||||
) -> IcicleResult<()>;
|
||||
fn ntt_inplace_unchecked(
|
||||
inout: &mut (impl HostOrDeviceSlice<T> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<F>,
|
||||
output: &mut HostOrDeviceSlice<F>,
|
||||
) -> IcicleResult<()>;
|
||||
fn ntt_inplace_unchecked(inout: &mut HostOrDeviceSlice<F>, dir: NTTDir, cfg: &NTTConfig<F>) -> IcicleResult<()>;
|
||||
fn initialize_domain(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>;
|
||||
fn initialize_domain_fast_twiddles_mode(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>;
|
||||
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
/// Computes the NTT, or a batch of several NTTs.
|
||||
@@ -138,15 +147,15 @@ pub trait NTT<F: FieldImpl> {
|
||||
/// * `cfg` - config used to specify extra arguments of the NTT.
|
||||
///
|
||||
/// * `output` - buffer to write the NTT outputs into. Must be of the same size as `input`.
|
||||
pub fn ntt<F>(
|
||||
input: &HostOrDeviceSlice<F>,
|
||||
pub fn ntt<T, F>(
|
||||
input: &(impl HostOrDeviceSlice<T> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<F>,
|
||||
output: &mut HostOrDeviceSlice<F>,
|
||||
output: &mut (impl HostOrDeviceSlice<T> + ?Sized),
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: NTT<F>,
|
||||
<F as FieldImpl>::Config: NTT<T, F>,
|
||||
{
|
||||
if input.len() != output.len() {
|
||||
panic!(
|
||||
@@ -155,11 +164,27 @@ where
|
||||
output.len()
|
||||
);
|
||||
}
|
||||
let ctx_device_id = cfg
|
||||
.ctx
|
||||
.device_id;
|
||||
if let Some(device_id) = input.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in input and context are different"
|
||||
);
|
||||
}
|
||||
if let Some(device_id) = output.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in output and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
let mut local_cfg = cfg.clone();
|
||||
local_cfg.are_inputs_on_device = input.is_on_device();
|
||||
local_cfg.are_outputs_on_device = output.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as NTT<F>>::ntt_unchecked(input, dir, &local_cfg, output)
|
||||
<<F as FieldImpl>::Config as NTT<T, F>>::ntt_unchecked(input, dir, &local_cfg, output)
|
||||
}
|
||||
|
||||
/// Computes the NTT, or a batch of several NTTs inplace.
|
||||
@@ -171,16 +196,20 @@ where
|
||||
/// * `dir` - whether to compute forward of inverse NTT.
|
||||
///
|
||||
/// * `cfg` - config used to specify extra arguments of the NTT.
|
||||
pub fn ntt_inplace<F>(inout: &mut HostOrDeviceSlice<F>, dir: NTTDir, cfg: &NTTConfig<F>) -> IcicleResult<()>
|
||||
pub fn ntt_inplace<T, F>(
|
||||
inout: &mut (impl HostOrDeviceSlice<T> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<F>,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: NTT<F>,
|
||||
<F as FieldImpl>::Config: NTT<T, F>,
|
||||
{
|
||||
let mut local_cfg = cfg.clone();
|
||||
local_cfg.are_inputs_on_device = inout.is_on_device();
|
||||
local_cfg.are_outputs_on_device = inout.is_on_device();
|
||||
|
||||
<<F as FieldImpl>::Config as NTT<F>>::ntt_inplace_unchecked(inout, dir, &local_cfg)
|
||||
<<F as FieldImpl>::Config as NTT<T, F>>::ntt_inplace_unchecked(inout, dir, &local_cfg)
|
||||
}
|
||||
|
||||
/// Generates twiddle factors which will be used to compute NTTs.
|
||||
@@ -192,71 +221,60 @@ where
|
||||
/// This function will panic if the order of `primitive_root` is not a power of two.
|
||||
///
|
||||
/// * `ctx` - GPU index and stream to perform the computation.
|
||||
pub fn initialize_domain<F>(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>
|
||||
pub fn initialize_domain<F>(primitive_root: F, ctx: &DeviceContext, fast_twiddles: bool) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: NTT<F>,
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as NTT<F>>::initialize_domain(primitive_root, ctx)
|
||||
}
|
||||
pub fn initialize_domain_fast_twiddles_mode<F>(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: NTT<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as NTT<F>>::initialize_domain_fast_twiddles_mode(primitive_root, ctx)
|
||||
<<F as FieldImpl>::Config as NTTDomain<F>>::initialize_domain(primitive_root, ctx, fast_twiddles)
|
||||
}
|
||||
|
||||
pub fn release_domain<F>(ctx: &DeviceContext) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: NTT<F>,
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as NTT<F>>::release_domain(ctx)
|
||||
<<F as FieldImpl>::Config as NTTDomain<F>>::release_domain(ctx)
|
||||
}
|
||||
|
||||
pub fn get_root_of_unity<F>(max_size: u64) -> F
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
<<F as FieldImpl>::Config as NTTDomain<F>>::get_root_of_unity(max_size)
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_ntt {
|
||||
macro_rules! impl_ntt_without_domain {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field_prefix_ident:ident,
|
||||
$field:ident,
|
||||
$field_config:ident
|
||||
$domain_field:ident,
|
||||
$domain_config:ident,
|
||||
$ntt_type:ident,
|
||||
$ntt_type_lit:literal,
|
||||
$inout:ident
|
||||
) => {
|
||||
mod $field_prefix_ident {
|
||||
use crate::ntt::{$field, $field_config, CudaError, DeviceContext, NTTConfig, NTTDir, DEFAULT_DEVICE_ID};
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "NTTCuda")]
|
||||
pub(crate) fn ntt_cuda(
|
||||
input: *const $field,
|
||||
size: i32,
|
||||
dir: NTTDir,
|
||||
config: &NTTConfig<$field>,
|
||||
output: *mut $field,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "InitializeDomain")]
|
||||
pub(crate) fn initialize_ntt_domain(
|
||||
primitive_root: &$field,
|
||||
ctx: &DeviceContext,
|
||||
fast_twiddles_mode: bool,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "ReleaseDomain")]
|
||||
pub(crate) fn release_ntt_domain(ctx: &DeviceContext) -> CudaError;
|
||||
}
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, concat!($ntt_type_lit, "_cuda"))]
|
||||
fn ntt_cuda(
|
||||
input: *const $inout,
|
||||
size: i32,
|
||||
dir: NTTDir,
|
||||
config: &NTTConfig<$domain_field>,
|
||||
output: *mut $inout,
|
||||
) -> CudaError;
|
||||
}
|
||||
|
||||
impl NTT<$field> for $field_config {
|
||||
impl $ntt_type<$inout, $domain_field> for $domain_config {
|
||||
fn ntt_unchecked(
|
||||
input: &HostOrDeviceSlice<$field>,
|
||||
input: &(impl HostOrDeviceSlice<$inout> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<$field>,
|
||||
output: &mut HostOrDeviceSlice<$field>,
|
||||
cfg: &NTTConfig<$domain_field>,
|
||||
output: &mut (impl HostOrDeviceSlice<$inout> + ?Sized),
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::ntt_cuda(
|
||||
ntt_cuda(
|
||||
input.as_ptr(),
|
||||
(input.len() / (cfg.batch_size as usize)) as i32,
|
||||
dir,
|
||||
@@ -268,13 +286,13 @@ macro_rules! impl_ntt {
|
||||
}
|
||||
|
||||
fn ntt_inplace_unchecked(
|
||||
inout: &mut HostOrDeviceSlice<$field>,
|
||||
inout: &mut (impl HostOrDeviceSlice<$inout> + ?Sized),
|
||||
dir: NTTDir,
|
||||
cfg: &NTTConfig<$field>,
|
||||
cfg: &NTTConfig<$domain_field>,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
$field_prefix_ident::ntt_cuda(
|
||||
inout.as_ptr(),
|
||||
ntt_cuda(
|
||||
inout.as_mut_ptr(),
|
||||
(inout.len() / (cfg.batch_size as usize)) as i32,
|
||||
dir,
|
||||
cfg,
|
||||
@@ -283,16 +301,55 @@ macro_rules! impl_ntt {
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn initialize_domain(primitive_root: $field, ctx: &DeviceContext) -> IcicleResult<()> {
|
||||
unsafe { $field_prefix_ident::initialize_ntt_domain(&primitive_root, ctx, false).wrap() }
|
||||
#[macro_export]
|
||||
macro_rules! impl_ntt {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field_prefix_ident:ident,
|
||||
$field:ident,
|
||||
$field_config:ident
|
||||
) => {
|
||||
mod $field_prefix_ident {
|
||||
use crate::ntt::*;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "_initialize_domain")]
|
||||
fn initialize_ntt_domain(
|
||||
primitive_root: &$field,
|
||||
ctx: &DeviceContext,
|
||||
fast_twiddles_mode: bool,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_release_domain")]
|
||||
fn release_ntt_domain(ctx: &DeviceContext) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_get_root_of_unity")]
|
||||
fn get_root_of_unity(max_size: u64) -> $field;
|
||||
}
|
||||
fn initialize_domain_fast_twiddles_mode(primitive_root: $field, ctx: &DeviceContext) -> IcicleResult<()> {
|
||||
unsafe { $field_prefix_ident::initialize_ntt_domain(&primitive_root, ctx, true).wrap() }
|
||||
}
|
||||
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()> {
|
||||
unsafe { $field_prefix_ident::release_ntt_domain(ctx).wrap() }
|
||||
|
||||
impl NTTDomain<$field> for $field_config {
|
||||
fn initialize_domain(
|
||||
primitive_root: $field,
|
||||
ctx: &DeviceContext,
|
||||
fast_twiddles: bool,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe { initialize_ntt_domain(&primitive_root, ctx, fast_twiddles).wrap() }
|
||||
}
|
||||
|
||||
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()> {
|
||||
unsafe { release_ntt_domain(ctx).wrap() }
|
||||
}
|
||||
|
||||
fn get_root_of_unity(max_size: u64) -> $field {
|
||||
unsafe { get_root_of_unity(max_size) }
|
||||
}
|
||||
}
|
||||
|
||||
impl_ntt_without_domain!($field_prefix, $field, $field_config, NTT, "_ntt", $field);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -308,40 +365,45 @@ macro_rules! impl_ntt_tests {
|
||||
const FAST_TWIDDLES_MODE: bool = false;
|
||||
|
||||
#[test]
|
||||
#[parallel]
|
||||
fn test_ntt() {
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
check_ntt::<$field>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[parallel]
|
||||
fn test_ntt_coset_from_subgroup() {
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
check_ntt_coset_from_subgroup::<$field>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[parallel]
|
||||
fn test_ntt_arbitrary_coset() {
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
check_ntt_arbitrary_coset::<$field>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[parallel]
|
||||
fn test_ntt_batch() {
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
check_ntt_batch::<$field>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[parallel]
|
||||
fn test_ntt_device_async() {
|
||||
// init_domain is in this test is performed per-device
|
||||
check_ntt_device_async::<$field>()
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn test_ntt_release_domain() {
|
||||
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
|
||||
check_release_domain::<$field>();
|
||||
*RELEASE.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE))
|
||||
check_release_domain::<$field>()
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@@ -3,39 +3,33 @@ use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
|
||||
use ark_std::{ops::Neg, test_rng, UniformRand};
|
||||
use icicle_cuda_runtime::device::{get_device_count, set_device};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
use crate::error::IcicleResult;
|
||||
use crate::{
|
||||
ntt::{
|
||||
initialize_domain, initialize_domain_fast_twiddles_mode, ntt, ntt_inplace, release_domain, NTTDir,
|
||||
NttAlgorithm, Ordering,
|
||||
initialize_domain, ntt, ntt_inplace, release_domain, NTTConfig, NTTDir, NTTDomain, NttAlgorithm, Ordering, NTT,
|
||||
},
|
||||
traits::{ArkConvertible, FieldImpl, GenerateRandom},
|
||||
vec_ops::{transpose_matrix, VecOps},
|
||||
};
|
||||
|
||||
use super::{NTTConfig, NTT};
|
||||
|
||||
pub fn init_domain<F: FieldImpl + ArkConvertible>(max_size: u64, device_id: usize, fast_twiddles_mode: bool)
|
||||
where
|
||||
F::ArkEquivalent: FftField,
|
||||
<F as FieldImpl>::Config: NTT<F>,
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
let ctx = DeviceContext::default_for_device(device_id);
|
||||
let ark_rou = F::ArkEquivalent::get_root_of_unity(max_size).unwrap();
|
||||
if fast_twiddles_mode {
|
||||
initialize_domain_fast_twiddles_mode(F::from_ark(ark_rou), &ctx).unwrap();
|
||||
} else {
|
||||
initialize_domain(F::from_ark(ark_rou), &ctx).unwrap();
|
||||
}
|
||||
initialize_domain(F::from_ark(ark_rou), &ctx, fast_twiddles_mode).unwrap();
|
||||
}
|
||||
|
||||
pub fn rel_domain<F: FieldImpl>(ctx: &DeviceContext)
|
||||
pub fn rel_domain<F: FieldImpl>(ctx: &DeviceContext) -> IcicleResult<()>
|
||||
where
|
||||
<F as FieldImpl>::Config: NTT<F>,
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
release_domain::<F>(&ctx).unwrap();
|
||||
release_domain::<F>(&ctx)
|
||||
}
|
||||
|
||||
pub fn reverse_bit_order(n: u32, order: u32) -> u32 {
|
||||
@@ -52,14 +46,6 @@ pub fn reverse_bit_order(n: u32, order: u32) -> u32 {
|
||||
u32::from_str_radix(&reversed, 2).unwrap()
|
||||
}
|
||||
|
||||
pub fn transpose_flattened_matrix<T: Copy>(m: &[T], nrows: usize) -> Vec<T> {
|
||||
let ncols = m.len() / nrows;
|
||||
assert!(nrows * ncols == m.len());
|
||||
(0..m.len())
|
||||
.map(|i| m[(i % nrows) * ncols + i / nrows])
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn list_to_reverse_bit_order<T: Copy>(l: &[T]) -> Vec<T> {
|
||||
l.iter()
|
||||
.enumerate()
|
||||
@@ -70,7 +56,7 @@ pub fn list_to_reverse_bit_order<T: Copy>(l: &[T]) -> Vec<T> {
|
||||
pub fn check_ntt<F: FieldImpl + ArkConvertible>()
|
||||
where
|
||||
F::ArkEquivalent: FftField,
|
||||
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
|
||||
{
|
||||
let test_sizes = [1 << 4, 1 << 17];
|
||||
for test_size in test_sizes {
|
||||
@@ -83,13 +69,14 @@ where
|
||||
.collect::<Vec<F::ArkEquivalent>>();
|
||||
// if we simply transmute arkworks types, we'll get scalars in Montgomery format
|
||||
let scalars_mont = unsafe { &*(&ark_scalars[..] as *const _ as *const [F]) };
|
||||
let scalars_mont_h = HostOrDeviceSlice::on_host(scalars_mont.to_vec());
|
||||
let scalars_mont_h = HostSlice::from_slice(&scalars_mont);
|
||||
|
||||
let mut config = NTTConfig::default();
|
||||
let mut config: NTTConfig<'_, F> = NTTConfig::default();
|
||||
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
|
||||
config.ntt_algorithm = alg;
|
||||
let mut ntt_result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
|
||||
ntt(&scalars_mont_h, NTTDir::kForward, &config, &mut ntt_result).unwrap();
|
||||
let mut ntt_result = vec![F::zero(); test_size];
|
||||
let ntt_result = HostSlice::from_mut_slice(&mut ntt_result);
|
||||
ntt(scalars_mont_h, NTTDir::kForward, &config, ntt_result).unwrap();
|
||||
assert_ne!(ntt_result.as_slice(), scalars_mont);
|
||||
|
||||
let mut ark_ntt_result = ark_scalars.clone();
|
||||
@@ -100,8 +87,14 @@ where
|
||||
unsafe { &*(ntt_result.as_slice() as *const _ as *const [<F as ArkConvertible>::ArkEquivalent]) };
|
||||
assert_eq!(ark_ntt_result, ntt_result_as_ark);
|
||||
|
||||
let mut intt_result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
|
||||
ntt(&ntt_result, NTTDir::kInverse, &config, &mut intt_result).unwrap();
|
||||
let mut intt_result = vec![F::zero(); test_size];
|
||||
ntt(
|
||||
ntt_result,
|
||||
NTTDir::kInverse,
|
||||
&config,
|
||||
HostSlice::from_mut_slice(&mut intt_result),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(intt_result.as_slice(), scalars_mont);
|
||||
}
|
||||
@@ -111,7 +104,7 @@ where
|
||||
pub fn check_ntt_coset_from_subgroup<F: FieldImpl + ArkConvertible>()
|
||||
where
|
||||
F::ArkEquivalent: FftField,
|
||||
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
|
||||
{
|
||||
let test_sizes = [1 << 4, 1 << 16];
|
||||
for test_size in test_sizes {
|
||||
@@ -124,7 +117,6 @@ where
|
||||
let ark_large_domain = GeneralEvaluationDomain::<F::ArkEquivalent>::new(test_size).unwrap();
|
||||
|
||||
let mut scalars: Vec<F> = F::Config::generate_random(small_size);
|
||||
let scalars_h = HostOrDeviceSlice::on_host(scalars.clone());
|
||||
let mut ark_scalars = scalars
|
||||
.iter()
|
||||
.map(|v| v.to_ark())
|
||||
@@ -134,21 +126,29 @@ where
|
||||
let mut config = NTTConfig::default();
|
||||
config.ordering = Ordering::kNR;
|
||||
config.ntt_algorithm = alg;
|
||||
let mut ntt_result_1 = HostOrDeviceSlice::on_host(vec![F::zero(); small_size]);
|
||||
let mut ntt_result_2 = HostOrDeviceSlice::on_host(vec![F::zero(); small_size]);
|
||||
ntt(&scalars_h, NTTDir::kForward, &config, &mut ntt_result_1).unwrap();
|
||||
let mut ntt_result_1 = vec![F::zero(); small_size];
|
||||
let mut ntt_result_2 = vec![F::zero(); small_size];
|
||||
let ntt_result_2 = HostSlice::from_mut_slice(&mut ntt_result_2);
|
||||
let scalars_h = HostSlice::from_slice(&scalars[..small_size]);
|
||||
ntt(
|
||||
scalars_h,
|
||||
NTTDir::kForward,
|
||||
&config,
|
||||
HostSlice::from_mut_slice(&mut ntt_result_1),
|
||||
)
|
||||
.unwrap();
|
||||
assert_ne!(*ntt_result_1.as_slice(), scalars);
|
||||
config.coset_gen = F::from_ark(test_size_rou);
|
||||
ntt(&scalars_h, NTTDir::kForward, &config, &mut ntt_result_2).unwrap();
|
||||
let mut ntt_large_result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
|
||||
ntt(scalars_h, NTTDir::kForward, &config, ntt_result_2).unwrap();
|
||||
let mut ntt_large_result = vec![F::zero(); test_size];
|
||||
// back to non-coset NTT
|
||||
config.coset_gen = F::one();
|
||||
scalars.resize(test_size, F::zero());
|
||||
ntt(
|
||||
&HostOrDeviceSlice::on_host(scalars.clone()),
|
||||
HostSlice::from_slice(&scalars),
|
||||
NTTDir::kForward,
|
||||
&config,
|
||||
&mut ntt_large_result,
|
||||
HostSlice::from_mut_slice(&mut ntt_large_result),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(*ntt_result_1.as_slice(), ntt_large_result.as_slice()[..small_size]);
|
||||
@@ -170,13 +170,18 @@ where
|
||||
|
||||
config.coset_gen = F::from_ark(test_size_rou);
|
||||
config.ordering = Ordering::kRN;
|
||||
let mut intt_result = HostOrDeviceSlice::on_host(vec![F::zero(); small_size]);
|
||||
ntt(&ntt_result_2, NTTDir::kInverse, &config, &mut intt_result).unwrap();
|
||||
let mut intt_result = vec![F::zero(); small_size];
|
||||
ntt(
|
||||
ntt_result_2,
|
||||
NTTDir::kInverse,
|
||||
&config,
|
||||
HostSlice::from_mut_slice(&mut intt_result),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(*intt_result.as_slice(), scalars[..small_size]);
|
||||
|
||||
ark_small_domain.ifft_in_place(&mut ark_scalars);
|
||||
let intt_result_as_ark = intt_result
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|p| p.to_ark())
|
||||
.collect::<Vec<F::ArkEquivalent>>();
|
||||
@@ -188,7 +193,7 @@ where
|
||||
pub fn check_ntt_arbitrary_coset<F: FieldImpl + ArkConvertible>()
|
||||
where
|
||||
F::ArkEquivalent: FftField + ArkField,
|
||||
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
|
||||
{
|
||||
let mut seed = test_rng();
|
||||
let test_sizes = [1 << 4, 1 << 17];
|
||||
@@ -204,10 +209,10 @@ where
|
||||
.get_coset(coset_gen)
|
||||
.unwrap();
|
||||
|
||||
let mut scalars = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
|
||||
let mut scalars = F::Config::generate_random(test_size);
|
||||
let scalars = HostSlice::from_mut_slice(&mut scalars);
|
||||
// here you can see how arkworks type can be easily created without any purpose-built conversions
|
||||
let mut ark_scalars = scalars
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|v| F::ArkEquivalent::from_random_bytes(&v.to_bytes_le()).unwrap())
|
||||
.collect::<Vec<F::ArkEquivalent>>();
|
||||
@@ -217,7 +222,7 @@ where
|
||||
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
|
||||
config.ordering = Ordering::kNR;
|
||||
config.ntt_algorithm = alg;
|
||||
ntt_inplace(&mut scalars, NTTDir::kForward, &config).unwrap();
|
||||
ntt_inplace(scalars, NTTDir::kForward, &config).unwrap();
|
||||
|
||||
let ark_scalars_copy = ark_scalars.clone();
|
||||
ark_domain.fft_in_place(&mut ark_scalars);
|
||||
@@ -231,9 +236,8 @@ where
|
||||
assert_eq!(ark_scalars, ark_scalars_copy);
|
||||
|
||||
config.ordering = Ordering::kRN;
|
||||
ntt_inplace(&mut scalars, NTTDir::kInverse, &config).unwrap();
|
||||
ntt_inplace(scalars, NTTDir::kInverse, &config).unwrap();
|
||||
let ntt_result_as_ark = scalars
|
||||
.as_slice()
|
||||
.iter()
|
||||
.map(|p| p.to_ark())
|
||||
.collect::<Vec<F::ArkEquivalent>>();
|
||||
@@ -245,7 +249,7 @@ where
|
||||
|
||||
pub fn check_ntt_batch<F: FieldImpl>()
|
||||
where
|
||||
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
let test_sizes = [1 << 4, 1 << 12];
|
||||
@@ -254,7 +258,8 @@ where
|
||||
let coset_generators = [F::one(), F::Config::generate_random(1)[0]];
|
||||
let mut config = NTTConfig::default();
|
||||
for batch_size in batch_sizes {
|
||||
let scalars = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size * batch_size));
|
||||
let scalars = F::Config::generate_random(test_size * batch_size);
|
||||
let scalars = HostSlice::from_slice(&scalars);
|
||||
|
||||
for coset_gen in coset_generators {
|
||||
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
|
||||
@@ -268,19 +273,25 @@ where
|
||||
] {
|
||||
config.coset_gen = coset_gen;
|
||||
config.ordering = ordering;
|
||||
let mut batch_ntt_result = HostOrDeviceSlice::on_host(vec![F::zero(); batch_size * test_size]);
|
||||
let mut batch_ntt_result = vec![F::zero(); batch_size * test_size];
|
||||
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
|
||||
config.batch_size = batch_size as i32;
|
||||
config.ntt_algorithm = alg;
|
||||
ntt(&scalars, is_inverse, &config, &mut batch_ntt_result).unwrap();
|
||||
ntt(
|
||||
scalars,
|
||||
is_inverse,
|
||||
&config,
|
||||
HostSlice::from_mut_slice(&mut batch_ntt_result),
|
||||
)
|
||||
.unwrap();
|
||||
config.batch_size = 1;
|
||||
let mut one_ntt_result = HostOrDeviceSlice::on_host(vec![F::one(); test_size]);
|
||||
let mut one_ntt_result = vec![F::one(); test_size];
|
||||
for i in 0..batch_size {
|
||||
ntt(
|
||||
&HostOrDeviceSlice::on_host(scalars[i * test_size..(i + 1) * test_size].to_vec()),
|
||||
&scalars[i * test_size..(i + 1) * test_size],
|
||||
is_inverse,
|
||||
&config,
|
||||
&mut one_ntt_result,
|
||||
HostSlice::from_mut_slice(&mut one_ntt_result),
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
@@ -297,25 +308,30 @@ where
|
||||
// for now, columns batching only works with MixedRadix NTT
|
||||
config.batch_size = batch_size as i32;
|
||||
config.columns_batch = true;
|
||||
let mut transposed_input = HostOrDeviceSlice::on_host(vec![F::zero(); batch_size * test_size]);
|
||||
let mut transposed_input = vec![F::zero(); batch_size * test_size];
|
||||
transpose_matrix(
|
||||
&scalars,
|
||||
scalars,
|
||||
row_size,
|
||||
column_size,
|
||||
&mut transposed_input,
|
||||
HostSlice::from_mut_slice(&mut transposed_input),
|
||||
&config.ctx,
|
||||
on_device,
|
||||
is_async,
|
||||
)
|
||||
.unwrap();
|
||||
let mut col_batch_ntt_result =
|
||||
HostOrDeviceSlice::on_host(vec![F::zero(); batch_size * test_size]);
|
||||
ntt(&transposed_input, is_inverse, &config, &mut col_batch_ntt_result).unwrap();
|
||||
let mut col_batch_ntt_result = vec![F::zero(); batch_size * test_size];
|
||||
ntt(
|
||||
HostSlice::from_slice(&transposed_input),
|
||||
is_inverse,
|
||||
&config,
|
||||
HostSlice::from_mut_slice(&mut col_batch_ntt_result),
|
||||
)
|
||||
.unwrap();
|
||||
transpose_matrix(
|
||||
&col_batch_ntt_result,
|
||||
HostSlice::from_slice(&col_batch_ntt_result),
|
||||
column_size,
|
||||
row_size,
|
||||
&mut transposed_input,
|
||||
HostSlice::from_mut_slice(&mut transposed_input),
|
||||
&config.ctx,
|
||||
on_device,
|
||||
is_async,
|
||||
@@ -333,7 +349,7 @@ where
|
||||
pub fn check_ntt_device_async<F: FieldImpl + ArkConvertible>()
|
||||
where
|
||||
F::ArkEquivalent: FftField,
|
||||
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
|
||||
{
|
||||
let device_count = get_device_count().unwrap();
|
||||
|
||||
@@ -352,14 +368,14 @@ where
|
||||
.ctx
|
||||
.stream;
|
||||
for batch_size in batch_sizes {
|
||||
let scalars_h: Vec<F> = F::Config::generate_random(test_size * batch_size);
|
||||
let sum_of_coeffs: F::ArkEquivalent = scalars_h[..test_size]
|
||||
let scalars: Vec<F> = F::Config::generate_random(test_size * batch_size);
|
||||
let sum_of_coeffs: F::ArkEquivalent = scalars[..test_size]
|
||||
.iter()
|
||||
.map(|x| x.to_ark())
|
||||
.sum();
|
||||
let mut scalars_d = HostOrDeviceSlice::cuda_malloc(test_size * batch_size).unwrap();
|
||||
let mut scalars_d = DeviceVec::<F>::cuda_malloc(test_size * batch_size).unwrap();
|
||||
scalars_d
|
||||
.copy_from_host(&scalars_h)
|
||||
.copy_from_host(HostSlice::from_slice(&scalars))
|
||||
.unwrap();
|
||||
|
||||
for coset_gen in coset_generators {
|
||||
@@ -374,21 +390,22 @@ where
|
||||
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
|
||||
config.ntt_algorithm = alg;
|
||||
let mut ntt_result_h = vec![F::zero(); test_size * batch_size];
|
||||
ntt_inplace(&mut scalars_d, NTTDir::kForward, &config).unwrap();
|
||||
let mut ntt_result_slice = HostSlice::from_mut_slice(&mut ntt_result_h);
|
||||
ntt_inplace(&mut *scalars_d, NTTDir::kForward, &config).unwrap();
|
||||
if coset_gen == F::one() {
|
||||
scalars_d
|
||||
.copy_to_host(&mut ntt_result_h)
|
||||
.copy_to_host(ntt_result_slice)
|
||||
.unwrap();
|
||||
assert_eq!(sum_of_coeffs, ntt_result_h[0].to_ark());
|
||||
assert_eq!(sum_of_coeffs, ntt_result_slice[0].to_ark());
|
||||
}
|
||||
ntt_inplace(&mut scalars_d, NTTDir::kInverse, &config).unwrap();
|
||||
ntt_inplace(&mut *scalars_d, NTTDir::kInverse, &config).unwrap();
|
||||
scalars_d
|
||||
.copy_to_host_async(&mut ntt_result_h, &stream)
|
||||
.copy_to_host_async(&mut ntt_result_slice, &stream)
|
||||
.unwrap();
|
||||
stream
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
assert_eq!(scalars_h, ntt_result_h);
|
||||
assert_eq!(scalars, *ntt_result_h.as_slice());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -397,11 +414,11 @@ where
|
||||
});
|
||||
}
|
||||
|
||||
pub fn check_release_domain<F: FieldImpl + ArkConvertible>()
|
||||
pub fn check_release_domain<F: FieldImpl>()
|
||||
where
|
||||
F::ArkEquivalent: FftField,
|
||||
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
let config: NTTConfig<'static, F> = NTTConfig::default();
|
||||
rel_domain::<F>(&config.ctx);
|
||||
let err = rel_domain::<F>(&config.ctx);
|
||||
assert!(err.is_ok())
|
||||
}
|
||||
|
||||
814
wrappers/rust/icicle-core/src/polynomials/mod.rs
Normal file
814
wrappers/rust/icicle-core/src/polynomials/mod.rs
Normal file
@@ -0,0 +1,814 @@
|
||||
use crate::traits::{FieldConfig, FieldImpl};
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
pub trait UnivariatePolynomial
|
||||
where
|
||||
Self::Field: FieldImpl,
|
||||
Self::FieldConfig: FieldConfig,
|
||||
{
|
||||
type Field;
|
||||
type FieldConfig;
|
||||
|
||||
fn from_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(coeffs: &S, size: usize) -> Self;
|
||||
fn from_rou_evals<S: HostOrDeviceSlice<Self::Field> + ?Sized>(evals: &S, size: usize) -> Self;
|
||||
fn divide(&self, denominator: &Self) -> (Self, Self)
|
||||
where
|
||||
Self: Sized;
|
||||
fn div_by_vanishing(&self, degree: u64) -> Self;
|
||||
fn add_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64);
|
||||
fn sub_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64);
|
||||
fn slice(&self, offset: u64, stride: u64, size: u64) -> Self;
|
||||
fn even(&self) -> Self;
|
||||
fn odd(&self) -> Self;
|
||||
fn eval(&self, x: &Self::Field) -> Self::Field;
|
||||
fn degree(&self) -> i64;
|
||||
fn eval_on_domain<D: HostOrDeviceSlice<Self::Field> + ?Sized, E: HostOrDeviceSlice<Self::Field> + ?Sized>(
|
||||
&self,
|
||||
domain: &D,
|
||||
evals: &mut E,
|
||||
);
|
||||
fn get_nof_coeffs(&self) -> u64;
|
||||
fn get_coeff(&self, idx: u64) -> Self::Field;
|
||||
fn copy_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(&self, start_idx: u64, coeffs: &mut S);
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_univariate_polynomial_api {
|
||||
(
|
||||
$field_prefix:literal,
|
||||
$field_prefix_ident:ident,
|
||||
$field:ident,
|
||||
$field_cfg:ident
|
||||
) => {
|
||||
use icicle_core::{polynomials::UnivariatePolynomial, traits::FieldImpl};
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
use std::{
|
||||
clone, cmp,
|
||||
ffi::c_void,
|
||||
ops::{Add, AddAssign, Div, Mul, Rem, Sub},
|
||||
ptr, slice,
|
||||
};
|
||||
|
||||
type PolynomialHandle = *const c_void;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "_polynomial_init_cuda_backend")]
|
||||
fn init_cuda_backend() -> bool;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_create_from_coefficients")]
|
||||
fn create_from_coeffs(coeffs: *const $field, size: usize) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_create_from_rou_evaluations")]
|
||||
fn create_from_rou_evals(coeffs: *const $field, size: usize) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_clone")]
|
||||
fn clone(p: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_delete")]
|
||||
fn delete(ptr: PolynomialHandle);
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_print")]
|
||||
fn print(ptr: PolynomialHandle);
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_add")]
|
||||
fn add(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_add_inplace")]
|
||||
fn add_inplace(a: PolynomialHandle, b: PolynomialHandle) -> c_void;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_subtract")]
|
||||
fn subtract(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_multiply")]
|
||||
fn multiply(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_multiply_by_scalar")]
|
||||
fn multiply_by_scalar(a: PolynomialHandle, b: &$field) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_quotient")]
|
||||
fn quotient(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_remainder")]
|
||||
fn remainder(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_division")]
|
||||
fn divide(a: PolynomialHandle, b: PolynomialHandle, q: *mut PolynomialHandle, r: *mut PolynomialHandle);
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_divide_by_vanishing")]
|
||||
fn div_by_vanishing(a: PolynomialHandle, deg: u64) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_add_monomial_inplace")]
|
||||
fn add_monomial_inplace(a: PolynomialHandle, monomial_coeff: &$field, monomial: u64) -> c_void;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_sub_monomial_inplace")]
|
||||
fn sub_monomial_inplace(a: PolynomialHandle, monomial_coeff: &$field, monomial: u64) -> c_void;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_slice")]
|
||||
fn slice(a: PolynomialHandle, offset: u64, stride: u64, size: u64) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_even")]
|
||||
fn even(a: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_odd")]
|
||||
fn odd(a: PolynomialHandle) -> PolynomialHandle;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_evaluate_on_domain")]
|
||||
fn eval_on_domain(a: PolynomialHandle, domain: *const $field, domain_size: u64, evals: *mut $field);
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_degree")]
|
||||
fn degree(a: PolynomialHandle) -> i64;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_copy_coeffs_range")]
|
||||
fn copy_coeffs(a: PolynomialHandle, host_coeffs: *mut $field, start_idx: u64, end_idx: u64) -> u64;
|
||||
|
||||
#[link_name = concat!($field_prefix, "_polynomial_get_coeffs_raw_ptr")]
|
||||
fn get_coeffs_ptr(a: PolynomialHandle, len: *mut u64, device_id: *mut u64) -> *mut $field;
|
||||
}
|
||||
|
||||
pub struct DensePolynomial {
|
||||
handle: PolynomialHandle,
|
||||
}
|
||||
|
||||
impl DensePolynomial {
|
||||
pub fn init_cuda_backend() -> bool {
|
||||
unsafe { init_cuda_backend() }
|
||||
}
|
||||
|
||||
// TODO Yuval: implement Display trait
|
||||
pub fn print(&self) {
|
||||
unsafe {
|
||||
print(self.handle);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn coeffs_mut_slice(&mut self) -> &mut DeviceSlice<$field> {
|
||||
unsafe {
|
||||
let mut len: u64 = 0;
|
||||
let mut device_id: u64 = 0;
|
||||
let mut coeffs_mut = get_coeffs_ptr(self.handle, &mut len, &mut device_id);
|
||||
let s = slice::from_raw_parts_mut(coeffs_mut, len as usize);
|
||||
DeviceSlice::from_mut_slice(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl UnivariatePolynomial for DensePolynomial {
|
||||
type Field = $field;
|
||||
type FieldConfig = $field_cfg;
|
||||
|
||||
fn from_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(coeffs: &S, size: usize) -> Self {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: create_from_coeffs(coeffs.as_ptr(), size),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_rou_evals<S: HostOrDeviceSlice<Self::Field> + ?Sized>(evals: &S, size: usize) -> Self {
|
||||
unsafe {
|
||||
Self {
|
||||
handle: create_from_rou_evals(evals.as_ptr(), size),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn divide(&self, denominator: &Self) -> (Self, Self) {
|
||||
let mut q_handle: PolynomialHandle = std::ptr::null_mut();
|
||||
let mut r_handle: PolynomialHandle = std::ptr::null_mut();
|
||||
unsafe {
|
||||
divide(self.handle, denominator.handle, &mut q_handle, &mut r_handle);
|
||||
}
|
||||
(Self { handle: q_handle }, Self { handle: r_handle })
|
||||
}
|
||||
|
||||
fn div_by_vanishing(&self, degree: u64) -> Self {
|
||||
unsafe {
|
||||
Self {
|
||||
handle: div_by_vanishing(self.handle, degree),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64) {
|
||||
unsafe {
|
||||
add_monomial_inplace(self.handle, monomial_coeff, monomial);
|
||||
}
|
||||
}
|
||||
|
||||
fn sub_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64) {
|
||||
unsafe {
|
||||
sub_monomial_inplace(self.handle, monomial_coeff, monomial);
|
||||
}
|
||||
}
|
||||
|
||||
fn slice(&self, offset: u64, stride: u64, size: u64) -> Self {
|
||||
unsafe {
|
||||
Self {
|
||||
handle: slice(self.handle, offset, stride, size),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn even(&self) -> Self {
|
||||
unsafe {
|
||||
Self {
|
||||
handle: even(self.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn odd(&self) -> Self {
|
||||
unsafe {
|
||||
Self {
|
||||
handle: odd(self.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eval(&self, x: &Self::Field) -> Self::Field {
|
||||
let mut eval = Self::Field::zero();
|
||||
unsafe {
|
||||
eval_on_domain(self.handle, x, 1, &mut eval);
|
||||
}
|
||||
eval
|
||||
}
|
||||
|
||||
fn eval_on_domain<
|
||||
D: HostOrDeviceSlice<Self::Field> + ?Sized,
|
||||
E: HostOrDeviceSlice<Self::Field> + ?Sized,
|
||||
>(
|
||||
&self,
|
||||
domain: &D,
|
||||
evals: &mut E,
|
||||
) {
|
||||
assert!(
|
||||
domain.len() <= evals.len(),
|
||||
"eval_on_domain(): eval size must not be smaller then domain"
|
||||
);
|
||||
unsafe {
|
||||
eval_on_domain(
|
||||
self.handle,
|
||||
domain.as_ptr(),
|
||||
domain.len() as u64,
|
||||
evals.as_mut_ptr(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn get_nof_coeffs(&self) -> u64 {
|
||||
unsafe {
|
||||
// returns total #coeffs. Not copying when null
|
||||
let nof_coeffs = copy_coeffs(self.handle, std::ptr::null_mut(), 0, 0);
|
||||
nof_coeffs
|
||||
}
|
||||
}
|
||||
|
||||
fn get_coeff(&self, idx: u64) -> Self::Field {
|
||||
let mut coeff: Self::Field = Self::Field::zero();
|
||||
unsafe { copy_coeffs(self.handle, &mut coeff, idx, idx) };
|
||||
coeff
|
||||
}
|
||||
|
||||
fn copy_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(&self, start_idx: u64, coeffs: &mut S) {
|
||||
let coeffs_len = coeffs.len() as u64;
|
||||
let nof_coeffs = self.get_nof_coeffs();
|
||||
let end_idx = cmp::min(nof_coeffs, start_idx + coeffs_len - 1);
|
||||
|
||||
unsafe {
|
||||
copy_coeffs(self.handle, coeffs.as_mut_ptr(), start_idx, end_idx);
|
||||
}
|
||||
}
|
||||
|
||||
fn degree(&self) -> i64 {
|
||||
unsafe { degree(self.handle) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for DensePolynomial {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
delete(self.handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for DensePolynomial {
|
||||
fn clone(&self) -> Self {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: clone(self.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Add for &DensePolynomial {
|
||||
type Output = DensePolynomial;
|
||||
|
||||
fn add(self: Self, rhs: Self) -> Self::Output {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: add(self.handle, rhs.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AddAssign<&DensePolynomial> for DensePolynomial {
|
||||
fn add_assign(&mut self, other: &DensePolynomial) {
|
||||
unsafe { add_inplace(self.handle, other.handle) };
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub for &DensePolynomial {
|
||||
type Output = DensePolynomial;
|
||||
|
||||
fn sub(self: Self, rhs: Self) -> Self::Output {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: subtract(self.handle, rhs.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Mul for &DensePolynomial {
|
||||
type Output = DensePolynomial;
|
||||
|
||||
fn mul(self: Self, rhs: Self) -> Self::Output {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: multiply(self.handle, rhs.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// poly * scalar
|
||||
impl Mul<&$field> for &DensePolynomial {
|
||||
type Output = DensePolynomial;
|
||||
|
||||
fn mul(self: Self, rhs: &$field) -> Self::Output {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: multiply_by_scalar(self.handle, rhs),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// scalar * poly
|
||||
impl Mul<&DensePolynomial> for &$field {
|
||||
type Output = DensePolynomial;
|
||||
|
||||
fn mul(self, rhs: &DensePolynomial) -> Self::Output {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: multiply_by_scalar(rhs.handle, self),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Div for &DensePolynomial {
|
||||
type Output = DensePolynomial;
|
||||
|
||||
fn div(self: Self, rhs: Self) -> Self::Output {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: quotient(self.handle, rhs.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Rem for &DensePolynomial {
|
||||
type Output = DensePolynomial;
|
||||
|
||||
fn rem(self: Self, rhs: Self) -> Self::Output {
|
||||
unsafe {
|
||||
DensePolynomial {
|
||||
handle: remainder(self.handle, rhs.handle),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! impl_polynomial_tests {
|
||||
(
|
||||
$field_prefix_ident:ident,
|
||||
$field:ident
|
||||
) => {
|
||||
use super::*;
|
||||
use icicle_core::ntt::{get_root_of_unity, initialize_domain, release_domain, NTTDomain};
|
||||
use icicle_core::vec_ops::{add_scalars, mul_scalars, sub_scalars, VecOps, VecOpsConfig};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
|
||||
use std::sync::Once;
|
||||
|
||||
use icicle_core::traits::{FieldImpl, GenerateRandom};
|
||||
|
||||
type Poly = DensePolynomial;
|
||||
|
||||
fn init_domain<F: FieldImpl>(max_size: u64, ctx: &DeviceContext, fast_twiddles_mode: bool)
|
||||
where
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
let rou: F = get_root_of_unity(max_size);
|
||||
initialize_domain(rou, &ctx, fast_twiddles_mode).unwrap();
|
||||
}
|
||||
|
||||
fn rel_domain<F: FieldImpl>(ctx: &DeviceContext)
|
||||
where
|
||||
<F as FieldImpl>::Config: NTTDomain<F>,
|
||||
{
|
||||
release_domain::<F>(&ctx).unwrap()
|
||||
}
|
||||
|
||||
fn randomize_coeffs<F: FieldImpl>(size: usize) -> Vec<F>
|
||||
where
|
||||
<F as FieldImpl>::Config: GenerateRandom<F>,
|
||||
{
|
||||
let coeffs = F::Config::generate_random(size);
|
||||
coeffs
|
||||
}
|
||||
|
||||
fn rand() -> $field {
|
||||
let r = randomize_coeffs::<$field>(1);
|
||||
// let coeffs = $field::Config::generate_random(1);
|
||||
r[0]
|
||||
}
|
||||
|
||||
// Note: implementing field arithmetic (+,-,*) for fields via vec_ops since they are not implemented on host
|
||||
fn add(a: &$field, b: &$field) -> $field {
|
||||
let a = [a.clone()];
|
||||
let b = [b.clone()];
|
||||
let mut result = [$field::zero()];
|
||||
|
||||
let cfg = VecOpsConfig::default();
|
||||
add_scalars(
|
||||
HostSlice::from_slice(&a),
|
||||
HostSlice::from_slice(&b),
|
||||
HostSlice::from_mut_slice(&mut result),
|
||||
&cfg,
|
||||
)
|
||||
.unwrap();
|
||||
result[0]
|
||||
}
|
||||
|
||||
fn sub(a: &$field, b: &$field) -> $field {
|
||||
let a = [a.clone()];
|
||||
let b = [b.clone()];
|
||||
let mut result = [$field::zero()];
|
||||
|
||||
let cfg = VecOpsConfig::default();
|
||||
sub_scalars(
|
||||
HostSlice::from_slice(&a),
|
||||
HostSlice::from_slice(&b),
|
||||
HostSlice::from_mut_slice(&mut result),
|
||||
&cfg,
|
||||
)
|
||||
.unwrap();
|
||||
result[0]
|
||||
}
|
||||
|
||||
fn mul(a: &$field, b: &$field) -> $field {
|
||||
let a = [a.clone()];
|
||||
let b = [b.clone()];
|
||||
let mut result = [$field::zero()];
|
||||
|
||||
let cfg = VecOpsConfig::default();
|
||||
mul_scalars(
|
||||
HostSlice::from_slice(&a),
|
||||
HostSlice::from_slice(&b),
|
||||
HostSlice::from_mut_slice(&mut result),
|
||||
&cfg,
|
||||
)
|
||||
.unwrap();
|
||||
result[0]
|
||||
}
|
||||
|
||||
fn randomize_poly(size: usize) -> Poly {
|
||||
let coeffs = randomize_coeffs::<$field>(size);
|
||||
let p = Poly::from_coeffs(HostSlice::from_slice(&coeffs), size);
|
||||
p
|
||||
}
|
||||
|
||||
static INIT: Once = Once::new();
|
||||
pub fn setup() -> () {
|
||||
INIT.call_once(|| {
|
||||
let device_id: usize = 0;
|
||||
// using logn=20 since babybear NTT tests is using it and I don't want order of tests to be a problem
|
||||
let domain_max_size: u64 = 1 << 20; //
|
||||
// TODO Yuval: how to consolidate this with NTT tests and avoid releaseDomain being called too early???
|
||||
let ctx = DeviceContext::default_for_device(device_id);
|
||||
init_domain::<ScalarField>(domain_max_size, &ctx, false /*=fast twiddle */);
|
||||
|
||||
Poly::init_cuda_backend();
|
||||
});
|
||||
}
|
||||
|
||||
// Note: tests are marked with #[ignore] since they conflict with NTT tests domain. This is a (hopefully temporary) workaround.
|
||||
// The poly tests are executed via 'cargo test -- --ignored' as an additional step
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_eval() {
|
||||
setup();
|
||||
|
||||
// testing correct evaluation of f(8) for f(x)=4x^2+2x+5
|
||||
let coeffs = [$field::from_u32(5), $field::from_u32(2), $field::from_u32(4)];
|
||||
let f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), coeffs.len());
|
||||
let x = $field::from_u32(8);
|
||||
let f_x = f.eval(&x);
|
||||
assert_eq!(f_x, $field::from_u32(277));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_clone() {
|
||||
setup();
|
||||
|
||||
// testing that the clone g(x) is independent of f(x) and cloned correctly
|
||||
let mut f = randomize_poly(8);
|
||||
let x = rand();
|
||||
let fx = f.eval(&x);
|
||||
|
||||
let g = f.clone();
|
||||
f += &g;
|
||||
|
||||
let gx = g.eval(&x);
|
||||
let new_fx = f.eval(&x);
|
||||
|
||||
assert_eq!(fx, gx); // cloned correctly
|
||||
assert_eq!(add(&fx, &gx), new_fx);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_add_sub_mul() {
|
||||
setup();
|
||||
|
||||
// testing add/sub operations
|
||||
let size = 1 << 10;
|
||||
let mut f = randomize_poly(size);
|
||||
let mut g = randomize_poly(size);
|
||||
|
||||
let x = rand();
|
||||
let fx = f.eval(&x);
|
||||
let gx = g.eval(&x);
|
||||
|
||||
let poly_add = &f + &g;
|
||||
let poly_sub = &f - &g;
|
||||
let poly_mul = &f * &g;
|
||||
|
||||
assert_eq!(poly_add.eval(&x), add(&fx, &gx));
|
||||
assert_eq!(poly_sub.eval(&x), sub(&fx, &gx));
|
||||
assert_eq!(poly_mul.eval(&x), mul(&fx, &gx));
|
||||
|
||||
// test scalar multiplication
|
||||
let s1 = rand();
|
||||
let s2 = rand();
|
||||
let poly_mul_s1 = &f * &s1;
|
||||
let poly_mul_s2 = &s2 * &f;
|
||||
assert_eq!(poly_mul_s1.eval(&x), mul(&fx, &s1));
|
||||
assert_eq!(poly_mul_s2.eval(&x), mul(&fx, &s2));
|
||||
|
||||
// test inplace add
|
||||
f += &g;
|
||||
assert_eq!(f.eval(&x), add(&fx, &gx));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_monomials() {
|
||||
setup();
|
||||
|
||||
// testing add/sub monomials inplace
|
||||
let zero = $field::from_u32(0);
|
||||
let one = $field::from_u32(1);
|
||||
let two = $field::from_u32(2);
|
||||
let three = $field::from_u32(3);
|
||||
|
||||
// f(x) = 1+2x^2
|
||||
let coeffs = [one, zero, two];
|
||||
let mut f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), coeffs.len());
|
||||
let x = rand();
|
||||
let fx = f.eval(&x);
|
||||
|
||||
f.add_monomial_inplace(&three, 1); // +3x
|
||||
let fx_add = f.eval(&x);
|
||||
assert_eq!(fx_add, add(&fx, &mul(&three, &x)));
|
||||
|
||||
f.sub_monomial_inplace(&one, 0); // -1
|
||||
let fx_sub = f.eval(&x);
|
||||
assert_eq!(fx_sub, sub(&fx_add, &one));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_read_coeffs() {
|
||||
setup();
|
||||
|
||||
let zero = $field::from_u32(0);
|
||||
let one = $field::from_u32(1);
|
||||
let two = $field::from_u32(2);
|
||||
let three = $field::from_u32(3);
|
||||
let four = $field::from_u32(4);
|
||||
|
||||
let coeffs = [one, two, three, four];
|
||||
let mut f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), coeffs.len());
|
||||
|
||||
// read coeffs to host memory
|
||||
let mut host_mem = vec![$field::zero(); coeffs.len()];
|
||||
f.copy_coeffs(0, HostSlice::from_mut_slice(&mut host_mem));
|
||||
assert_eq!(host_mem, coeffs);
|
||||
|
||||
// read coeffs to device memory
|
||||
let mut device_mem = DeviceVec::<$field>::cuda_malloc(coeffs.len()).unwrap();
|
||||
f.copy_coeffs(0, &mut device_mem[..]);
|
||||
let mut host_coeffs_from_dev = vec![ScalarField::zero(); coeffs.len() as usize];
|
||||
device_mem
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut host_coeffs_from_dev))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(host_mem, host_coeffs_from_dev);
|
||||
|
||||
// multiply by two and read single coeff
|
||||
f = &f * &two;
|
||||
// read single coeff
|
||||
let x_squared_coeff = f.get_coeff(2);
|
||||
assert_eq!(x_squared_coeff, mul(&two, &three));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_division() {
|
||||
setup();
|
||||
|
||||
// divide f(x)/g(x), compute q(x), r(x) and check f(x)=q(x)*g(x)+r(x)
|
||||
|
||||
let f = randomize_poly(1 << 12);
|
||||
let g = randomize_poly(1 << 4);
|
||||
|
||||
let (q, r) = f.divide(&g);
|
||||
|
||||
let f_reconstructed = &(&q * &g) + &r;
|
||||
let x = rand();
|
||||
|
||||
assert_eq!(f.eval(&x), f_reconstructed.eval(&x));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_divide_by_vanishing() {
|
||||
setup();
|
||||
|
||||
let zero = $field::from_u32(0);
|
||||
let one = $field::from_u32(1);
|
||||
let minus_one = sub(&zero, &one);
|
||||
// compute random f(x) and compute f(x)*v(x) for v(x) vanishing poly
|
||||
// divide by vanishing and check that f(x) is reconstructed
|
||||
|
||||
let f = randomize_poly(1 << 12);
|
||||
let v_coeffs = [minus_one, zero, zero, zero, one]; // x^4-1
|
||||
let v = Poly::from_coeffs(HostSlice::from_slice(&v_coeffs), v_coeffs.len());
|
||||
|
||||
let fv = &f * &v;
|
||||
let deg_f = f.degree();
|
||||
let deg_fv = fv.degree();
|
||||
assert_eq!(deg_f + 4, deg_fv);
|
||||
|
||||
let f_reconstructed = fv.div_by_vanishing(4);
|
||||
assert_eq!(deg_f, f_reconstructed.degree());
|
||||
|
||||
let x = rand();
|
||||
assert_eq!(f.eval(&x), f_reconstructed.eval(&x));
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_poly_eval_on_domain() {
|
||||
setup();
|
||||
|
||||
let one = $field::from_u32(1);
|
||||
let two = $field::from_u32(2);
|
||||
let three = $field::from_u32(3);
|
||||
|
||||
let f = randomize_poly(1 << 12);
|
||||
let domain = [one, two, three];
|
||||
|
||||
// evaluate to host memory
|
||||
let mut host_evals = vec![ScalarField::zero(); domain.len()];
|
||||
f.eval_on_domain(
|
||||
HostSlice::from_slice(&domain),
|
||||
HostSlice::from_mut_slice(&mut host_evals),
|
||||
);
|
||||
|
||||
// check eval on domain agrees with eval() method
|
||||
assert_eq!(f.eval(&one), host_evals[0]);
|
||||
assert_eq!(f.eval(&two), host_evals[1]);
|
||||
assert_eq!(f.eval(&three), host_evals[2]);
|
||||
|
||||
// evaluate to device memory
|
||||
let mut device_evals = DeviceVec::<ScalarField>::cuda_malloc(domain.len()).unwrap();
|
||||
f.eval_on_domain(HostSlice::from_slice(&domain), &mut device_evals[..]);
|
||||
let mut host_evals_from_device = vec![ScalarField::zero(); domain.len()];
|
||||
device_evals
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut host_evals_from_device))
|
||||
.unwrap();
|
||||
|
||||
// check that evaluation to device memory is equivalent
|
||||
assert_eq!(host_evals, host_evals_from_device);
|
||||
|
||||
// use evals as domain (on device) and evaluate from device to host
|
||||
f.eval_on_domain(&mut device_evals[..], HostSlice::from_mut_slice(&mut host_evals));
|
||||
// check that the evaluations are correct
|
||||
assert_eq!(f.eval(&host_evals_from_device[0]), host_evals[0]);
|
||||
assert_eq!(f.eval(&host_evals_from_device[1]), host_evals[1]);
|
||||
assert_eq!(f.eval(&host_evals_from_device[2]), host_evals[2]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_odd_even_slicing() {
|
||||
setup();
|
||||
let size = (1 << 10) - 3;
|
||||
// slicing even and odd parts and checking
|
||||
let f = randomize_poly(size);
|
||||
let x = rand();
|
||||
|
||||
let even = f.even();
|
||||
let odd = f.odd();
|
||||
assert_eq!(f.degree(), even.degree() + odd.degree() + 1);
|
||||
|
||||
// computing even(x) and odd(x) directly
|
||||
let expected_even = (0..=f.degree())
|
||||
.filter(|&i| i % 2 == 0)
|
||||
.rev()
|
||||
.fold($field::zero(), |acc, i| {
|
||||
add(&mul(&acc, &x), &f.get_coeff(i as u64))
|
||||
});
|
||||
|
||||
let expected_odd = (0..=f.degree())
|
||||
.filter(|&i| i % 2 != 0)
|
||||
.rev()
|
||||
.fold($field::zero(), |acc, i| {
|
||||
add(&mul(&acc, &x), &f.get_coeff(i as u64))
|
||||
});
|
||||
|
||||
// check that even(x) and odd(x) compute correctly
|
||||
|
||||
let evenx = even.eval(&x);
|
||||
let oddx = odd.eval(&x);
|
||||
assert_eq!(expected_even, evenx);
|
||||
assert_eq!(expected_odd, oddx);
|
||||
}
|
||||
|
||||
use icicle_core::ntt::{ntt, ntt_inplace, NTTConfig, NTTDir, Ordering};
|
||||
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn test_coeffs_slice() {
|
||||
setup();
|
||||
|
||||
let size = 4;
|
||||
let coeffs = randomize_coeffs::<$field>(size);
|
||||
let mut f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), size);
|
||||
|
||||
// take a mutable coeffs slice as a DeviceSlice
|
||||
let coeffs_slice_dev = f.coeffs_mut_slice();
|
||||
assert_eq!(coeffs_slice_dev.len(), size);
|
||||
assert!(coeffs_slice_dev.is_on_device());
|
||||
|
||||
// let g = &f + &f; // cannot borrow here since s is a mutable slice of f
|
||||
|
||||
// copy to host and check equality
|
||||
let mut coeffs_copied_from_slice = vec![ScalarField::zero(); coeffs_slice_dev.len()];
|
||||
coeffs_slice_dev
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut coeffs_copied_from_slice))
|
||||
.unwrap();
|
||||
assert_eq!(coeffs_copied_from_slice, coeffs);
|
||||
|
||||
// or can use the memory directly
|
||||
let mut config: NTTConfig<'_, $field> = NTTConfig::default();
|
||||
let mut ntt_result = vec![$field::zero(); coeffs_slice_dev.len()];
|
||||
ntt(
|
||||
coeffs_slice_dev,
|
||||
NTTDir::kForward,
|
||||
&config,
|
||||
HostSlice::from_mut_slice(&mut ntt_result),
|
||||
)
|
||||
.unwrap();
|
||||
// ntt[0] is f(one) because it's the sum of coeffs
|
||||
assert_eq!(ntt_result[0], f.eval(&$field::one()));
|
||||
|
||||
// after last use of coeffs_slice_dev, can borrow f again
|
||||
let g = &f * &f;
|
||||
assert_eq!(mul(&ntt_result[0], &ntt_result[0]), g.eval(&$field::one()));
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -2,14 +2,14 @@
|
||||
pub mod tests;
|
||||
|
||||
use icicle_cuda_runtime::{
|
||||
device::check_device,
|
||||
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
|
||||
memory::HostOrDeviceSlice,
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
};
|
||||
|
||||
use crate::{error::IcicleResult, traits::FieldImpl};
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PoseidonConstants<'a, F: FieldImpl> {
|
||||
arity: u32,
|
||||
|
||||
@@ -18,10 +18,10 @@ pub struct PoseidonConstants<'a, F: FieldImpl> {
|
||||
full_rounds_half: u32,
|
||||
|
||||
/// These should be pointers to data allocated on device
|
||||
round_constants: &'a [F],
|
||||
mds_matrix: &'a [F],
|
||||
non_sparse_matrix: &'a [F],
|
||||
sparse_matrices: &'a [F],
|
||||
round_constants: &'a DeviceSlice<F>,
|
||||
mds_matrix: &'a DeviceSlice<F>,
|
||||
non_sparse_matrix: &'a DeviceSlice<F>,
|
||||
sparse_matrices: &'a DeviceSlice<F>,
|
||||
|
||||
/// Domain tag is the first element in the Poseidon state.
|
||||
/// For the Merkle tree mode it should equal 2^arity - 1
|
||||
@@ -88,8 +88,8 @@ pub trait Poseidon<F: FieldImpl> {
|
||||
) -> IcicleResult<PoseidonConstants<'a, F>>;
|
||||
fn load_optimized_constants<'a>(arity: u32, ctx: &DeviceContext) -> IcicleResult<PoseidonConstants<'a, F>>;
|
||||
fn poseidon_unchecked(
|
||||
input: &mut HostOrDeviceSlice<F>,
|
||||
output: &mut HostOrDeviceSlice<F>,
|
||||
input: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
arity: u32,
|
||||
constants: &PoseidonConstants<F>,
|
||||
@@ -146,8 +146,8 @@ where
|
||||
///
|
||||
/// * `config` - config used to specify extra arguments of the Poseidon.
|
||||
pub fn poseidon_hash_many<F>(
|
||||
input: &mut HostOrDeviceSlice<F>,
|
||||
output: &mut HostOrDeviceSlice<F>,
|
||||
input: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
number_of_states: u32,
|
||||
arity: u32,
|
||||
constants: &PoseidonConstants<F>,
|
||||
@@ -179,6 +179,22 @@ where
|
||||
);
|
||||
}
|
||||
|
||||
let ctx_device_id = config
|
||||
.ctx
|
||||
.device_id;
|
||||
if let Some(device_id) = input.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in input and context are different"
|
||||
);
|
||||
}
|
||||
if let Some(device_id) = output.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in output and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
let mut local_cfg = config.clone();
|
||||
local_cfg.are_inputs_on_device = input.is_on_device();
|
||||
local_cfg.are_outputs_on_device = output.is_on_device();
|
||||
@@ -204,7 +220,7 @@ macro_rules! impl_poseidon {
|
||||
mod $field_prefix_ident {
|
||||
use crate::poseidon::{$field, $field_config, CudaError, DeviceContext, PoseidonConfig, PoseidonConstants};
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "CreateOptimizedPoseidonConstants")]
|
||||
#[link_name = concat!($field_prefix, "_create_optimized_poseidon_constants_cuda")]
|
||||
pub(crate) fn _create_optimized_constants(
|
||||
arity: u32,
|
||||
full_rounds_half: u32,
|
||||
@@ -214,14 +230,14 @@ macro_rules! impl_poseidon {
|
||||
poseidon_constants: *mut PoseidonConstants<$field>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "InitOptimizedPoseidonConstants")]
|
||||
#[link_name = concat!($field_prefix, "_init_optimized_poseidon_constants_cuda")]
|
||||
pub(crate) fn _load_optimized_constants(
|
||||
arity: u32,
|
||||
ctx: &DeviceContext,
|
||||
constants: *mut PoseidonConstants<$field>,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "PoseidonHash")]
|
||||
#[link_name = concat!($field_prefix, "_poseidon_hash_cuda")]
|
||||
pub(crate) fn hash_many(
|
||||
input: *mut $field,
|
||||
output: *mut $field,
|
||||
@@ -268,8 +284,8 @@ macro_rules! impl_poseidon {
|
||||
}
|
||||
|
||||
fn poseidon_unchecked(
|
||||
input: &mut HostOrDeviceSlice<$field>,
|
||||
output: &mut HostOrDeviceSlice<$field>,
|
||||
input: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
number_of_states: u32,
|
||||
arity: u32,
|
||||
constants: &PoseidonConstants<$field>,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::traits::FieldImpl;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
|
||||
|
||||
use std::io::Read;
|
||||
use std::path::PathBuf;
|
||||
@@ -26,16 +26,16 @@ where
|
||||
{
|
||||
let test_size = 1 << 10;
|
||||
let arity = 2u32;
|
||||
let inputs = vec![F::one(); test_size * arity as usize];
|
||||
let outputs = vec![F::zero(); test_size];
|
||||
let mut inputs = vec![F::one(); test_size * arity as usize];
|
||||
let mut outputs = vec![F::zero(); test_size];
|
||||
|
||||
let mut input_slice = HostOrDeviceSlice::on_host(inputs);
|
||||
let mut output_slice = HostOrDeviceSlice::on_host(outputs);
|
||||
let input_slice = HostSlice::from_mut_slice(&mut inputs);
|
||||
let output_slice = HostSlice::from_mut_slice(&mut outputs);
|
||||
|
||||
let config = PoseidonConfig::default();
|
||||
poseidon_hash_many::<F>(
|
||||
&mut input_slice,
|
||||
&mut output_slice,
|
||||
input_slice,
|
||||
output_slice,
|
||||
test_size as u32,
|
||||
arity as u32,
|
||||
&constants,
|
||||
@@ -43,8 +43,8 @@ where
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let a1 = output_slice[0..1][0];
|
||||
let a2 = output_slice[output_slice.len() - 2..output_slice.len() - 1][0];
|
||||
let a1 = output_slice[0];
|
||||
let a2 = output_slice[output_slice.len() - 2];
|
||||
|
||||
println!("first: {:?}, last: {:?}", a1, a2);
|
||||
assert_eq!(a1, a2);
|
||||
|
||||
@@ -7,9 +7,13 @@ use crate::{
|
||||
};
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_ec::short_weierstrass::{Affine as ArkAffine, Projective as ArkProjective};
|
||||
use icicle_cuda_runtime::{error::CudaResultWrap, memory::HostOrDeviceSlice};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
error::CudaResultWrap,
|
||||
memory::{DeviceVec, HostSlice},
|
||||
};
|
||||
|
||||
pub fn check_scalar_equality<F: FieldImpl>() {
|
||||
pub fn check_field_equality<F: FieldImpl>() {
|
||||
let left = F::zero();
|
||||
let right = F::one();
|
||||
assert_ne!(left, right);
|
||||
@@ -48,6 +52,7 @@ where
|
||||
assert_eq!(left, right);
|
||||
}
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
pub fn check_ark_scalar_convert<F: FieldImpl + ArkConvertible>()
|
||||
where
|
||||
F::Config: GenerateRandom<F>,
|
||||
@@ -60,6 +65,7 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
pub fn check_ark_point_convert<C: Curve>()
|
||||
where
|
||||
Affine<C>: ArkConvertible<ArkEquivalent = ArkAffine<C::ArkSWConfig>>,
|
||||
@@ -79,27 +85,28 @@ where
|
||||
|
||||
pub fn check_field_convert_montgomery<F>()
|
||||
where
|
||||
F: FieldImpl + MontgomeryConvertible,
|
||||
F: FieldImpl + MontgomeryConvertible<'static>,
|
||||
F::Config: GenerateRandom<F>,
|
||||
{
|
||||
let size = 1 << 10;
|
||||
let scalars = F::Config::generate_random(size);
|
||||
let device_ctx = DeviceContext::default();
|
||||
|
||||
let mut d_scalars = HostOrDeviceSlice::cuda_malloc(size).unwrap();
|
||||
let mut d_scalars = DeviceVec::cuda_malloc(size).unwrap();
|
||||
d_scalars
|
||||
.copy_from_host(&scalars)
|
||||
.copy_from_host(HostSlice::from_slice(&scalars))
|
||||
.unwrap();
|
||||
|
||||
F::to_mont(&mut d_scalars)
|
||||
F::to_mont(&mut d_scalars, &device_ctx)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
F::from_mont(&mut d_scalars)
|
||||
F::from_mont(&mut d_scalars, &device_ctx)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
|
||||
let mut scalars_copy = vec![F::zero(); size];
|
||||
d_scalars
|
||||
.copy_to_host(&mut scalars_copy)
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut scalars_copy))
|
||||
.unwrap();
|
||||
|
||||
for (s1, s2) in scalars
|
||||
@@ -112,27 +119,28 @@ where
|
||||
|
||||
pub fn check_points_convert_montgomery<C: Curve>()
|
||||
where
|
||||
Affine<C>: MontgomeryConvertible,
|
||||
Projective<C>: MontgomeryConvertible,
|
||||
Affine<C>: MontgomeryConvertible<'static>,
|
||||
Projective<C>: MontgomeryConvertible<'static>,
|
||||
{
|
||||
let size = 1 << 10;
|
||||
let device_ctx = DeviceContext::default();
|
||||
|
||||
let affine_points = C::generate_random_affine_points(size);
|
||||
let mut d_affine = HostOrDeviceSlice::cuda_malloc(size).unwrap();
|
||||
let mut d_affine = DeviceVec::cuda_malloc(size).unwrap();
|
||||
d_affine
|
||||
.copy_from_host(&affine_points)
|
||||
.copy_from_host(HostSlice::from_slice(&affine_points))
|
||||
.unwrap();
|
||||
|
||||
Affine::<C>::to_mont(&mut d_affine)
|
||||
Affine::<C>::to_mont(&mut d_affine, &device_ctx)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
Affine::<C>::from_mont(&mut d_affine)
|
||||
Affine::<C>::from_mont(&mut d_affine, &device_ctx)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
|
||||
let mut affine_copy = vec![Affine::<C>::zero(); size];
|
||||
d_affine
|
||||
.copy_to_host(&mut affine_copy)
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut affine_copy))
|
||||
.unwrap();
|
||||
|
||||
for (p1, p2) in affine_points
|
||||
@@ -143,21 +151,21 @@ where
|
||||
}
|
||||
|
||||
let proj_points = C::generate_random_projective_points(size);
|
||||
let mut d_proj = HostOrDeviceSlice::cuda_malloc(size).unwrap();
|
||||
let mut d_proj = DeviceVec::cuda_malloc(size).unwrap();
|
||||
d_proj
|
||||
.copy_from_host(&proj_points)
|
||||
.copy_from_host(HostSlice::from_slice(&proj_points))
|
||||
.unwrap();
|
||||
|
||||
Projective::<C>::to_mont(&mut d_proj)
|
||||
Projective::<C>::to_mont(&mut d_proj, &device_ctx)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
Projective::<C>::from_mont(&mut d_proj)
|
||||
Projective::<C>::from_mont(&mut d_proj, &device_ctx)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
|
||||
let mut projective_copy = vec![Projective::<C>::zero(); size];
|
||||
d_proj
|
||||
.copy_to_host(&mut projective_copy)
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut projective_copy))
|
||||
.unwrap();
|
||||
|
||||
for (p1, p2) in proj_points
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
use crate::error::IcicleResult;
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_ff::Field as ArkField;
|
||||
use icicle_cuda_runtime::{error::CudaError, memory::HostOrDeviceSlice};
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::DeviceSlice};
|
||||
use std::{
|
||||
fmt::{Debug, Display},
|
||||
mem::MaybeUninit,
|
||||
@@ -29,6 +29,7 @@ pub trait FieldImpl:
|
||||
fn from_bytes_le(bytes: &[u8]) -> Self;
|
||||
fn zero() -> Self;
|
||||
fn one() -> Self;
|
||||
fn from_u32(val: u32) -> Self;
|
||||
}
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
@@ -39,9 +40,9 @@ pub trait ArkConvertible {
|
||||
fn from_ark(ark: Self::ArkEquivalent) -> Self;
|
||||
}
|
||||
|
||||
pub trait MontgomeryConvertible: Sized {
|
||||
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError;
|
||||
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError;
|
||||
pub trait MontgomeryConvertible<'a>: Sized {
|
||||
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError;
|
||||
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError;
|
||||
}
|
||||
|
||||
pub trait IcicleResultWrap {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
|
||||
memory::HostOrDeviceSlice,
|
||||
@@ -54,7 +55,7 @@ pub fn merkle_tree_digests_len(height: u32, arity: u32) -> usize {
|
||||
|
||||
pub trait TreeBuilder<F: FieldImpl> {
|
||||
fn build_poseidon_tree_unchecked(
|
||||
leaves: &mut HostOrDeviceSlice<F>,
|
||||
leaves: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
digests: &mut [F],
|
||||
height: u32,
|
||||
arity: u32,
|
||||
@@ -75,7 +76,7 @@ pub trait TreeBuilder<F: FieldImpl> {
|
||||
///
|
||||
/// * `config` - config used to specify extra arguments of the Tree builder.
|
||||
pub fn build_poseidon_merkle_tree<F>(
|
||||
leaves: &mut HostOrDeviceSlice<F>,
|
||||
leaves: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
digests: &mut [F],
|
||||
height: u32,
|
||||
arity: u32,
|
||||
@@ -100,6 +101,16 @@ where
|
||||
);
|
||||
}
|
||||
|
||||
let ctx_device_id = config
|
||||
.ctx
|
||||
.device_id;
|
||||
if let Some(device_id) = leaves.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in leaves and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
let mut local_cfg = config.clone();
|
||||
local_cfg.are_inputs_on_device = leaves.is_on_device();
|
||||
|
||||
@@ -121,7 +132,7 @@ macro_rules! impl_tree_builder {
|
||||
use icicle_core::poseidon::PoseidonConstants;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "BuildPoseidonMerkleTree")]
|
||||
#[link_name = concat!($field_prefix, "_build_poseidon_merkle_tree")]
|
||||
pub(crate) fn _build_poseidon_merkle_tree(
|
||||
leaves: *mut $field,
|
||||
digests: *mut $field,
|
||||
@@ -135,7 +146,7 @@ macro_rules! impl_tree_builder {
|
||||
|
||||
impl TreeBuilder<$field> for $field_config {
|
||||
fn build_poseidon_tree_unchecked(
|
||||
leaves: &mut HostOrDeviceSlice<$field>,
|
||||
leaves: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
digests: &mut [$field],
|
||||
height: u32,
|
||||
arity: u32,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
use crate::{
|
||||
poseidon::{tests::init_poseidon, Poseidon},
|
||||
@@ -15,16 +15,16 @@ where
|
||||
let height = 20;
|
||||
let arity = 2;
|
||||
let keep_rows = 1;
|
||||
let leaves = vec![F::one(); 1 << (height - 1)];
|
||||
let mut leaves = vec![F::one(); 1 << (height - 1)];
|
||||
let mut digests = vec![F::zero(); merkle_tree_digests_len(height, arity)];
|
||||
|
||||
let mut leaves_slice = HostOrDeviceSlice::on_host(leaves);
|
||||
let leaves_slice = HostSlice::from_mut_slice(&mut leaves);
|
||||
|
||||
let constants = init_poseidon(arity as u32);
|
||||
|
||||
let mut config = TreeBuilderConfig::default();
|
||||
config.keep_rows = keep_rows;
|
||||
build_poseidon_merkle_tree::<F>(&mut leaves_slice, &mut digests, height, arity, &constants, &config).unwrap();
|
||||
build_poseidon_merkle_tree::<F>(leaves_slice, &mut digests, height, arity, &constants, &config).unwrap();
|
||||
|
||||
println!("Root: {:?}", digests[0..1][0]);
|
||||
println!("Root: {:?}", digests[0]);
|
||||
}
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
|
||||
memory::HostOrDeviceSlice,
|
||||
@@ -16,7 +17,6 @@ pub struct VecOpsConfig<'a> {
|
||||
is_a_on_device: bool,
|
||||
is_b_on_device: bool,
|
||||
is_result_on_device: bool,
|
||||
is_result_montgomery_form: bool,
|
||||
/// Whether to run the vector operations asynchronously. If set to `true`, the functions will be non-blocking and you'd need to synchronize
|
||||
/// it explicitly by running `stream.synchronize()`. If set to false, the functions will block the current CPU thread.
|
||||
pub is_async: bool,
|
||||
@@ -35,7 +35,6 @@ impl<'a> VecOpsConfig<'a> {
|
||||
is_a_on_device: false,
|
||||
is_b_on_device: false,
|
||||
is_result_on_device: false,
|
||||
is_result_montgomery_form: false,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
@@ -44,38 +43,43 @@ impl<'a> VecOpsConfig<'a> {
|
||||
#[doc(hidden)]
|
||||
pub trait VecOps<F> {
|
||||
fn add(
|
||||
a: &HostOrDeviceSlice<F>,
|
||||
b: &HostOrDeviceSlice<F>,
|
||||
result: &mut HostOrDeviceSlice<F>,
|
||||
a: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()>;
|
||||
|
||||
fn sub(
|
||||
a: &HostOrDeviceSlice<F>,
|
||||
b: &HostOrDeviceSlice<F>,
|
||||
result: &mut HostOrDeviceSlice<F>,
|
||||
a: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()>;
|
||||
|
||||
fn mul(
|
||||
a: &HostOrDeviceSlice<F>,
|
||||
b: &HostOrDeviceSlice<F>,
|
||||
result: &mut HostOrDeviceSlice<F>,
|
||||
a: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()>;
|
||||
|
||||
fn transpose(
|
||||
input: &HostOrDeviceSlice<F>,
|
||||
input: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
row_size: u32,
|
||||
column_size: u32,
|
||||
output: &mut HostOrDeviceSlice<F>,
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
ctx: &DeviceContext,
|
||||
on_device: bool,
|
||||
is_async: bool,
|
||||
) -> IcicleResult<()>;
|
||||
}
|
||||
|
||||
fn check_vec_ops_args<F>(a: &HostOrDeviceSlice<F>, b: &HostOrDeviceSlice<F>, result: &mut HostOrDeviceSlice<F>) {
|
||||
fn check_vec_ops_args<'a, F>(
|
||||
a: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
result: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
cfg: &VecOpsConfig<'a>,
|
||||
) -> VecOpsConfig<'a> {
|
||||
if a.len() != b.len() || a.len() != result.len() {
|
||||
panic!(
|
||||
"left, right and output lengths {}; {}; {} do not match",
|
||||
@@ -84,55 +88,77 @@ fn check_vec_ops_args<F>(a: &HostOrDeviceSlice<F>, b: &HostOrDeviceSlice<F>, res
|
||||
result.len()
|
||||
);
|
||||
}
|
||||
let ctx_device_id = cfg
|
||||
.ctx
|
||||
.device_id;
|
||||
if let Some(device_id) = a.device_id() {
|
||||
assert_eq!(device_id, ctx_device_id, "Device ids in a and context are different");
|
||||
}
|
||||
if let Some(device_id) = b.device_id() {
|
||||
assert_eq!(device_id, ctx_device_id, "Device ids in b and context are different");
|
||||
}
|
||||
if let Some(device_id) = result.device_id() {
|
||||
assert_eq!(
|
||||
device_id, ctx_device_id,
|
||||
"Device ids in result and context are different"
|
||||
);
|
||||
}
|
||||
check_device(ctx_device_id);
|
||||
|
||||
let mut res_cfg = cfg.clone();
|
||||
res_cfg.is_a_on_device = a.is_on_device();
|
||||
res_cfg.is_b_on_device = b.is_on_device();
|
||||
res_cfg.is_result_on_device = result.is_on_device();
|
||||
res_cfg
|
||||
}
|
||||
|
||||
pub fn add_scalars<F>(
|
||||
a: &HostOrDeviceSlice<F>,
|
||||
b: &HostOrDeviceSlice<F>,
|
||||
result: &mut HostOrDeviceSlice<F>,
|
||||
a: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
check_vec_ops_args(a, b, result);
|
||||
<<F as FieldImpl>::Config as VecOps<F>>::add(a, b, result, cfg)
|
||||
let cfg = check_vec_ops_args(a, b, result, cfg);
|
||||
<<F as FieldImpl>::Config as VecOps<F>>::add(a, b, result, &cfg)
|
||||
}
|
||||
|
||||
pub fn sub_scalars<F>(
|
||||
a: &HostOrDeviceSlice<F>,
|
||||
b: &HostOrDeviceSlice<F>,
|
||||
result: &mut HostOrDeviceSlice<F>,
|
||||
a: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
check_vec_ops_args(a, b, result);
|
||||
<<F as FieldImpl>::Config as VecOps<F>>::sub(a, b, result, cfg)
|
||||
let cfg = check_vec_ops_args(a, b, result, cfg);
|
||||
<<F as FieldImpl>::Config as VecOps<F>>::sub(a, b, result, &cfg)
|
||||
}
|
||||
|
||||
pub fn mul_scalars<F>(
|
||||
a: &HostOrDeviceSlice<F>,
|
||||
b: &HostOrDeviceSlice<F>,
|
||||
result: &mut HostOrDeviceSlice<F>,
|
||||
a: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()>
|
||||
where
|
||||
F: FieldImpl,
|
||||
<F as FieldImpl>::Config: VecOps<F>,
|
||||
{
|
||||
check_vec_ops_args(a, b, result);
|
||||
<<F as FieldImpl>::Config as VecOps<F>>::mul(a, b, result, cfg)
|
||||
let cfg = check_vec_ops_args(a, b, result, cfg);
|
||||
<<F as FieldImpl>::Config as VecOps<F>>::mul(a, b, result, &cfg)
|
||||
}
|
||||
|
||||
pub fn transpose_matrix<F>(
|
||||
input: &HostOrDeviceSlice<F>,
|
||||
input: &(impl HostOrDeviceSlice<F> + ?Sized),
|
||||
row_size: u32,
|
||||
column_size: u32,
|
||||
output: &mut HostOrDeviceSlice<F>,
|
||||
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
|
||||
ctx: &DeviceContext,
|
||||
on_device: bool,
|
||||
is_async: bool,
|
||||
@@ -157,7 +183,7 @@ macro_rules! impl_vec_ops_field {
|
||||
use icicle_core::vec_ops::VecOpsConfig;
|
||||
|
||||
extern "C" {
|
||||
#[link_name = concat!($field_prefix, "AddCuda")]
|
||||
#[link_name = concat!($field_prefix, "_add_cuda")]
|
||||
pub(crate) fn add_scalars_cuda(
|
||||
a: *const $field,
|
||||
b: *const $field,
|
||||
@@ -166,7 +192,7 @@ macro_rules! impl_vec_ops_field {
|
||||
result: *mut $field,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "SubCuda")]
|
||||
#[link_name = concat!($field_prefix, "_sub_cuda")]
|
||||
pub(crate) fn sub_scalars_cuda(
|
||||
a: *const $field,
|
||||
b: *const $field,
|
||||
@@ -175,7 +201,7 @@ macro_rules! impl_vec_ops_field {
|
||||
result: *mut $field,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "MulCuda")]
|
||||
#[link_name = concat!($field_prefix, "_mul_cuda")]
|
||||
pub(crate) fn mul_scalars_cuda(
|
||||
a: *const $field,
|
||||
b: *const $field,
|
||||
@@ -184,7 +210,7 @@ macro_rules! impl_vec_ops_field {
|
||||
result: *mut $field,
|
||||
) -> CudaError;
|
||||
|
||||
#[link_name = concat!($field_prefix, "TransposeMatrix")]
|
||||
#[link_name = concat!($field_prefix, "_transpose_matrix_cuda")]
|
||||
pub(crate) fn transpose_cuda(
|
||||
input: *const $field,
|
||||
row_size: u32,
|
||||
@@ -199,9 +225,9 @@ macro_rules! impl_vec_ops_field {
|
||||
|
||||
impl VecOps<$field> for $field_config {
|
||||
fn add(
|
||||
a: &HostOrDeviceSlice<$field>,
|
||||
b: &HostOrDeviceSlice<$field>,
|
||||
result: &mut HostOrDeviceSlice<$field>,
|
||||
a: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
@@ -209,7 +235,7 @@ macro_rules! impl_vec_ops_field {
|
||||
a.as_ptr(),
|
||||
b.as_ptr(),
|
||||
a.len() as u32,
|
||||
cfg as *const _ as *const VecOpsConfig,
|
||||
cfg as *const VecOpsConfig,
|
||||
result.as_mut_ptr(),
|
||||
)
|
||||
.wrap()
|
||||
@@ -217,9 +243,9 @@ macro_rules! impl_vec_ops_field {
|
||||
}
|
||||
|
||||
fn sub(
|
||||
a: &HostOrDeviceSlice<$field>,
|
||||
b: &HostOrDeviceSlice<$field>,
|
||||
result: &mut HostOrDeviceSlice<$field>,
|
||||
a: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
@@ -227,7 +253,7 @@ macro_rules! impl_vec_ops_field {
|
||||
a.as_ptr(),
|
||||
b.as_ptr(),
|
||||
a.len() as u32,
|
||||
cfg as *const _ as *const VecOpsConfig,
|
||||
cfg as *const VecOpsConfig,
|
||||
result.as_mut_ptr(),
|
||||
)
|
||||
.wrap()
|
||||
@@ -235,9 +261,9 @@ macro_rules! impl_vec_ops_field {
|
||||
}
|
||||
|
||||
fn mul(
|
||||
a: &HostOrDeviceSlice<$field>,
|
||||
b: &HostOrDeviceSlice<$field>,
|
||||
result: &mut HostOrDeviceSlice<$field>,
|
||||
a: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
b: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
result: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
cfg: &VecOpsConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
@@ -245,7 +271,7 @@ macro_rules! impl_vec_ops_field {
|
||||
a.as_ptr(),
|
||||
b.as_ptr(),
|
||||
a.len() as u32,
|
||||
cfg as *const _ as *const VecOpsConfig,
|
||||
cfg as *const VecOpsConfig,
|
||||
result.as_mut_ptr(),
|
||||
)
|
||||
.wrap()
|
||||
@@ -253,10 +279,10 @@ macro_rules! impl_vec_ops_field {
|
||||
}
|
||||
|
||||
fn transpose(
|
||||
input: &HostOrDeviceSlice<$field>,
|
||||
input: &(impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
row_size: u32,
|
||||
column_size: u32,
|
||||
output: &mut HostOrDeviceSlice<$field>,
|
||||
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
|
||||
ctx: &DeviceContext,
|
||||
on_device: bool,
|
||||
is_async: bool,
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::traits::GenerateRandom;
|
||||
use crate::vec_ops::{add_scalars, mul_scalars, sub_scalars, FieldImpl, HostOrDeviceSlice, VecOps, VecOpsConfig};
|
||||
use crate::vec_ops::{add_scalars, mul_scalars, sub_scalars, FieldImpl, VecOps, VecOpsConfig};
|
||||
use icicle_cuda_runtime::memory::HostSlice;
|
||||
|
||||
pub fn check_vec_ops_scalars<F: FieldImpl>()
|
||||
where
|
||||
@@ -7,21 +8,27 @@ where
|
||||
{
|
||||
let test_size = 1 << 14;
|
||||
|
||||
let a = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
|
||||
let b = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
|
||||
let ones = HostOrDeviceSlice::on_host(vec![F::one(); test_size]);
|
||||
let mut result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
|
||||
let mut result2 = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
|
||||
let mut result3 = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
|
||||
let a = F::Config::generate_random(test_size);
|
||||
let b = F::Config::generate_random(test_size);
|
||||
let ones = vec![F::one(); test_size];
|
||||
let mut result = vec![F::zero(); test_size];
|
||||
let mut result2 = vec![F::zero(); test_size];
|
||||
let mut result3 = vec![F::zero(); test_size];
|
||||
let a = HostSlice::from_slice(&a);
|
||||
let b = HostSlice::from_slice(&b);
|
||||
let ones = HostSlice::from_slice(&ones);
|
||||
let result = HostSlice::from_mut_slice(&mut result);
|
||||
let result2 = HostSlice::from_mut_slice(&mut result2);
|
||||
let result3 = HostSlice::from_mut_slice(&mut result3);
|
||||
|
||||
let cfg = VecOpsConfig::default();
|
||||
add_scalars(&a, &b, &mut result, &cfg).unwrap();
|
||||
add_scalars(a, b, result, &cfg).unwrap();
|
||||
|
||||
sub_scalars(&result, &b, &mut result2, &cfg).unwrap();
|
||||
sub_scalars(result, b, result2, &cfg).unwrap();
|
||||
|
||||
assert_eq!(a[0..1][0], result2[0..1][0]);
|
||||
assert_eq!(a[0], result2[0]);
|
||||
|
||||
mul_scalars(&a, &ones, &mut result3, &cfg).unwrap();
|
||||
mul_scalars(a, ones, result3, &cfg).unwrap();
|
||||
|
||||
assert_eq!(a[0..1][0], result3[0..1][0]);
|
||||
assert_eq!(a[0], result3[0]);
|
||||
}
|
||||
|
||||
@@ -83,6 +83,11 @@ fn main() {
|
||||
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
|
||||
.allowlist_function("cudaFreeAsync")
|
||||
.allowlist_function("cudaMallocAsync")
|
||||
// Unified Addressing
|
||||
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__UNIFIED.html
|
||||
.rustified_enum("cudaMemoryType")
|
||||
.allowlist_type("cudaPointerAttributes")
|
||||
.allowlist_function("cudaPointerGetAttributes")
|
||||
//
|
||||
.generate()
|
||||
.expect("Unable to generate bindings");
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
use crate::{
|
||||
bindings::{cudaFreeAsync, cudaGetDevice, cudaGetDeviceCount, cudaMallocAsync, cudaMemGetInfo, cudaSetDevice},
|
||||
bindings::{
|
||||
cudaFreeAsync, cudaGetDevice, cudaGetDeviceCount, cudaMallocAsync, cudaMemGetInfo, cudaPointerAttributes,
|
||||
cudaPointerGetAttributes, cudaSetDevice,
|
||||
},
|
||||
error::{CudaResult, CudaResultWrap},
|
||||
stream::CudaStream,
|
||||
};
|
||||
@@ -19,6 +22,23 @@ pub fn get_device() -> CudaResult<usize> {
|
||||
unsafe { cudaGetDevice(&mut device_id) }.wrap_value(device_id as usize)
|
||||
}
|
||||
|
||||
pub fn get_device_from_pointer(ptr: *const ::std::os::raw::c_void) -> CudaResult<usize> {
|
||||
let mut ptr_attributes = MaybeUninit::<cudaPointerAttributes>::uninit();
|
||||
unsafe {
|
||||
cudaPointerGetAttributes(ptr_attributes.as_mut_ptr(), ptr).wrap()?;
|
||||
Ok(ptr_attributes
|
||||
.assume_init()
|
||||
.device as usize)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_device(device_id: usize) {
|
||||
match device_id == get_device().unwrap() {
|
||||
true => (),
|
||||
false => panic!("Attempt to use on a different device"),
|
||||
}
|
||||
}
|
||||
|
||||
// This function pre-allocates default memory pool and warms the GPU up
|
||||
// so that subsequent memory allocations and other calls are not slowed down
|
||||
pub fn warmup(stream: &CudaStream) -> CudaResult<()> {
|
||||
|
||||
@@ -3,8 +3,6 @@ use crate::stream::CudaStream;
|
||||
|
||||
pub const DEFAULT_DEVICE_ID: usize = 0;
|
||||
|
||||
use crate::device::get_device;
|
||||
|
||||
/// Properties of the device used in Icicle functions.
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
@@ -38,10 +36,3 @@ impl DeviceContext<'_> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn check_device(device_id: i32) {
|
||||
match device_id == get_device().unwrap() as i32 {
|
||||
true => (),
|
||||
false => panic!("Attempt to use on a different device"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#![allow(non_upper_case_globals)]
|
||||
#![allow(non_camel_case_types)]
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[allow(dead_code, non_snake_case)]
|
||||
mod bindings;
|
||||
pub mod device;
|
||||
pub mod device_context;
|
||||
|
||||
@@ -1,87 +1,233 @@
|
||||
use crate::bindings::{
|
||||
cudaFree, cudaMalloc, cudaMallocAsync, cudaMemPool_t, cudaMemcpy, cudaMemcpyAsync, cudaMemcpyKind,
|
||||
};
|
||||
use crate::device::get_device;
|
||||
use crate::device_context::check_device;
|
||||
use crate::device::{check_device, get_device_from_pointer};
|
||||
use crate::error::{CudaError, CudaResult, CudaResultWrap};
|
||||
use crate::stream::CudaStream;
|
||||
use std::mem::{size_of, MaybeUninit};
|
||||
use std::ops::{Index, IndexMut, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
|
||||
use std::mem::{size_of, ManuallyDrop, MaybeUninit};
|
||||
use std::ops::{
|
||||
Deref, DerefMut, Index, IndexMut, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive,
|
||||
};
|
||||
use std::os::raw::c_void;
|
||||
use std::slice::from_raw_parts_mut;
|
||||
use std::slice::SliceIndex;
|
||||
|
||||
pub enum HostOrDeviceSlice<'a, T> {
|
||||
Host(Vec<T>),
|
||||
Device(&'a mut [T], i32),
|
||||
#[derive(Debug)]
|
||||
pub struct HostSlice<T>([T]);
|
||||
pub struct DeviceVec<T>(ManuallyDrop<Box<[T]>>);
|
||||
pub struct DeviceSlice<T>([T]);
|
||||
|
||||
pub trait HostOrDeviceSlice<T> {
|
||||
fn is_on_device(&self) -> bool;
|
||||
fn device_id(&self) -> Option<usize>;
|
||||
unsafe fn as_ptr(&self) -> *const T;
|
||||
unsafe fn as_mut_ptr(&mut self) -> *mut T;
|
||||
fn len(&self) -> usize;
|
||||
fn is_empty(&self) -> bool;
|
||||
}
|
||||
|
||||
impl<'a, T> HostOrDeviceSlice<'a, T> {
|
||||
// Function to get the device_id for Device variant
|
||||
pub fn get_device_id(&self) -> Option<i32> {
|
||||
match self {
|
||||
HostOrDeviceSlice::Device(_, device_id) => Some(*device_id),
|
||||
HostOrDeviceSlice::Host(_) => None,
|
||||
}
|
||||
impl<T> HostOrDeviceSlice<T> for HostSlice<T> {
|
||||
fn is_on_device(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
match self {
|
||||
Self::Device(s, _) => s.len(),
|
||||
Self::Host(v) => v.len(),
|
||||
}
|
||||
fn device_id(&self) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match self {
|
||||
Self::Device(s, _) => s.is_empty(),
|
||||
Self::Host(v) => v.is_empty(),
|
||||
}
|
||||
unsafe fn as_ptr(&self) -> *const T {
|
||||
self.0
|
||||
.as_ptr()
|
||||
}
|
||||
|
||||
pub fn is_on_device(&self) -> bool {
|
||||
match self {
|
||||
Self::Device(_, _) => true,
|
||||
Self::Host(_) => false,
|
||||
}
|
||||
unsafe fn as_mut_ptr(&mut self) -> *mut T {
|
||||
self.0
|
||||
.as_mut_ptr()
|
||||
}
|
||||
|
||||
pub fn as_mut_slice(&mut self) -> &mut [T] {
|
||||
match self {
|
||||
Self::Device(_, _) => {
|
||||
panic!("Use copy_to_host and copy_to_host_async to move device data to a slice")
|
||||
}
|
||||
Self::Host(v) => v.as_mut_slice(),
|
||||
}
|
||||
fn len(&self) -> usize {
|
||||
self.0
|
||||
.len()
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> HostOrDeviceSlice<T> for DeviceSlice<T> {
|
||||
fn is_on_device(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn device_id(&self) -> Option<usize> {
|
||||
Some(
|
||||
get_device_from_pointer(unsafe { self.as_ptr() as *const ::std::os::raw::c_void })
|
||||
.expect("Invalid pointer. Maybe host pointer was used here?"),
|
||||
)
|
||||
}
|
||||
|
||||
unsafe fn as_ptr(&self) -> *const T {
|
||||
self.0
|
||||
.as_ptr()
|
||||
}
|
||||
|
||||
unsafe fn as_mut_ptr(&mut self) -> *mut T {
|
||||
self.0
|
||||
.as_mut_ptr()
|
||||
}
|
||||
|
||||
fn len(&self) -> usize {
|
||||
self.0
|
||||
.len()
|
||||
}
|
||||
|
||||
fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> HostSlice<T> {
|
||||
// Currently this function just transmutes types. However it is not guaranteed that this function
|
||||
// will always be cheap as it might at some point e.g. pin the memory which takes some time.
|
||||
pub fn from_slice(slice: &[T]) -> &Self {
|
||||
unsafe { &*(slice as *const [T] as *const Self) }
|
||||
}
|
||||
|
||||
// Currently this function just transmutes types. However it is not guaranteed that this function
|
||||
// will always be cheap as it might at some point e.g. pin the memory which takes some time.
|
||||
pub fn from_mut_slice(slice: &mut [T]) -> &mut Self {
|
||||
unsafe { &mut *(slice as *mut [T] as *mut Self) }
|
||||
}
|
||||
|
||||
pub fn as_slice(&self) -> &[T] {
|
||||
match self {
|
||||
Self::Device(_, _) => {
|
||||
panic!("Use copy_to_host and copy_to_host_async to move device data to a slice")
|
||||
&self.0
|
||||
}
|
||||
|
||||
pub fn as_mut_slice(&mut self) -> &mut [T] {
|
||||
&mut self.0
|
||||
}
|
||||
|
||||
pub fn iter(&self) -> impl Iterator<Item = &T> {
|
||||
self.0
|
||||
.iter()
|
||||
}
|
||||
|
||||
pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut T> {
|
||||
self.0
|
||||
.iter_mut()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DeviceSlice<T> {
|
||||
pub unsafe fn from_slice(slice: &[T]) -> &Self {
|
||||
&*(slice as *const [T] as *const Self)
|
||||
}
|
||||
|
||||
pub unsafe fn from_mut_slice(slice: &mut [T]) -> &mut Self {
|
||||
&mut *(slice as *mut [T] as *mut Self)
|
||||
}
|
||||
|
||||
pub fn copy_from_host(&mut self, val: &HostSlice<T>) -> CudaResult<()> {
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"In copy from host, destination and source slices have different lengths"
|
||||
);
|
||||
check_device(
|
||||
self.device_id()
|
||||
.unwrap(),
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpy(
|
||||
self.as_mut_ptr() as *mut c_void,
|
||||
val.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyHostToDevice,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
Self::Host(v) => v.as_slice(),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn as_ptr(&self) -> *const T {
|
||||
match self {
|
||||
Self::Device(s, _) => s.as_ptr(),
|
||||
Self::Host(v) => v.as_ptr(),
|
||||
pub fn copy_to_host(&self, val: &mut HostSlice<T>) -> CudaResult<()> {
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"In copy to host, destination and source slices have different lengths"
|
||||
);
|
||||
check_device(
|
||||
self.device_id()
|
||||
.unwrap(),
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpy(
|
||||
val.as_mut_ptr() as *mut c_void,
|
||||
self.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyDeviceToHost,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn as_mut_ptr(&mut self) -> *mut T {
|
||||
match self {
|
||||
Self::Device(s, _) => s.as_mut_ptr(),
|
||||
Self::Host(v) => v.as_mut_ptr(),
|
||||
pub fn copy_from_host_async(&mut self, val: &HostSlice<T>, stream: &CudaStream) -> CudaResult<()> {
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"In copy from host async, destination and source slices have different lengths"
|
||||
);
|
||||
check_device(
|
||||
self.device_id()
|
||||
.unwrap(),
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpyAsync(
|
||||
self.as_mut_ptr() as *mut c_void,
|
||||
val.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyHostToDevice,
|
||||
stream.handle,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn on_host(src: Vec<T>) -> Self {
|
||||
//TODO: HostOrDeviceSlice on_host() with slice input without actually copying the data
|
||||
Self::Host(src)
|
||||
pub fn copy_to_host_async(&self, val: &mut HostSlice<T>, stream: &CudaStream) -> CudaResult<()> {
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"In copy to host async, destination and source slices have different lengths"
|
||||
);
|
||||
check_device(
|
||||
self.device_id()
|
||||
.unwrap(),
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpyAsync(
|
||||
val.as_mut_ptr() as *mut c_void,
|
||||
self.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyDeviceToHost,
|
||||
stream.handle,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DeviceVec<T> {
|
||||
pub fn cuda_malloc(count: usize) -> CudaResult<Self> {
|
||||
let size = count
|
||||
.checked_mul(size_of::<T>())
|
||||
@@ -93,10 +239,11 @@ impl<'a, T> HostOrDeviceSlice<'a, T> {
|
||||
let mut device_ptr = MaybeUninit::<*mut c_void>::uninit();
|
||||
unsafe {
|
||||
cudaMalloc(device_ptr.as_mut_ptr(), size).wrap()?;
|
||||
Ok(Self::Device(
|
||||
from_raw_parts_mut(device_ptr.assume_init() as *mut T, count),
|
||||
get_device().unwrap() as i32,
|
||||
))
|
||||
let res = Self(ManuallyDrop::new(Box::from_raw(from_raw_parts_mut(
|
||||
device_ptr.assume_init() as *mut T,
|
||||
count,
|
||||
))));
|
||||
Ok(res)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -105,146 +252,58 @@ impl<'a, T> HostOrDeviceSlice<'a, T> {
|
||||
.checked_mul(size_of::<T>())
|
||||
.unwrap_or(0);
|
||||
if size == 0 {
|
||||
return Err(CudaError::cudaErrorMemoryAllocation);
|
||||
return Err(CudaError::cudaErrorMemoryAllocation); //TODO: only CUDA backend should return CudaError
|
||||
}
|
||||
|
||||
let mut device_ptr = MaybeUninit::<*mut c_void>::uninit();
|
||||
unsafe {
|
||||
cudaMallocAsync(device_ptr.as_mut_ptr(), size, stream.handle as *mut _ as *mut _).wrap()?;
|
||||
Ok(Self::Device(
|
||||
from_raw_parts_mut(device_ptr.assume_init() as *mut T, count),
|
||||
get_device().unwrap() as i32,
|
||||
))
|
||||
cudaMallocAsync(device_ptr.as_mut_ptr(), size, stream.handle).wrap()?;
|
||||
Ok(Self(ManuallyDrop::new(Box::from_raw(from_raw_parts_mut(
|
||||
device_ptr.assume_init() as *mut T,
|
||||
count,
|
||||
)))))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn copy_from_host(&mut self, val: &[T]) -> CudaResult<()> {
|
||||
match self {
|
||||
Self::Device(_, device_id) => check_device(*device_id),
|
||||
Self::Host(_) => panic!("Need device memory to copy into, and not host"),
|
||||
};
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"destination and source slices have different lengths"
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpy(
|
||||
self.as_mut_ptr() as *mut c_void,
|
||||
val.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyHostToDevice,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
pub fn cuda_malloc_for_device(count: usize, device_id: usize) -> CudaResult<Self> {
|
||||
check_device(device_id);
|
||||
Self::cuda_malloc(count)
|
||||
}
|
||||
|
||||
pub fn copy_to_host(&self, val: &mut [T]) -> CudaResult<()> {
|
||||
match self {
|
||||
Self::Device(_, device_id) => check_device(*device_id),
|
||||
Self::Host(_) => panic!("Need device memory to copy from, and not host"),
|
||||
};
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"destination and source slices have different lengths"
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpy(
|
||||
val.as_mut_ptr() as *mut c_void,
|
||||
self.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyDeviceToHost,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn copy_from_host_async(&mut self, val: &[T], stream: &CudaStream) -> CudaResult<()> {
|
||||
match self {
|
||||
Self::Device(_, device_id) => check_device(*device_id),
|
||||
Self::Host(_) => panic!("Need device memory to copy into, and not host"),
|
||||
};
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"destination and source slices have different lengths"
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpyAsync(
|
||||
self.as_mut_ptr() as *mut c_void,
|
||||
val.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyHostToDevice,
|
||||
stream.handle as *mut _ as *mut _,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn copy_to_host_async(&self, val: &mut [T], stream: &CudaStream) -> CudaResult<()> {
|
||||
match self {
|
||||
Self::Device(_, device_id) => check_device(*device_id),
|
||||
Self::Host(_) => panic!("Need device memory to copy from, and not host"),
|
||||
};
|
||||
assert!(
|
||||
self.len() == val.len(),
|
||||
"destination and source slices have different lengths"
|
||||
);
|
||||
let size = size_of::<T>() * self.len();
|
||||
if size != 0 {
|
||||
unsafe {
|
||||
cudaMemcpyAsync(
|
||||
val.as_mut_ptr() as *mut c_void,
|
||||
self.as_ptr() as *const c_void,
|
||||
size,
|
||||
cudaMemcpyKind::cudaMemcpyDeviceToHost,
|
||||
stream.handle as *mut _ as *mut _,
|
||||
)
|
||||
.wrap()?
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
pub fn cuda_malloc_async_for_device(count: usize, stream: &CudaStream, device_id: usize) -> CudaResult<Self> {
|
||||
check_device(device_id);
|
||||
Self::cuda_malloc_async(count, stream)
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! impl_index {
|
||||
macro_rules! impl_host_index {
|
||||
($($t:ty)*) => {
|
||||
$(
|
||||
impl<'a, T> Index<$t> for HostOrDeviceSlice<'a, T>
|
||||
impl<T> Index<$t> for HostSlice<T>
|
||||
{
|
||||
type Output = [T];
|
||||
type Output = Self;
|
||||
|
||||
fn index(&self, index: $t) -> &Self::Output {
|
||||
match self {
|
||||
Self::Device(s, _) => s.index(index),
|
||||
Self::Host(v) => v.index(index),
|
||||
}
|
||||
Self::from_slice(
|
||||
self.0
|
||||
.index(index),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> IndexMut<$t> for HostOrDeviceSlice<'a, T>
|
||||
impl<T> IndexMut<$t> for HostSlice<T>
|
||||
{
|
||||
fn index_mut(&mut self, index: $t) -> &mut Self::Output {
|
||||
match self {
|
||||
Self::Device(s,_) => s.index_mut(index),
|
||||
Self::Host(v) => v.index_mut(index),
|
||||
}
|
||||
Self::from_mut_slice(
|
||||
self.0
|
||||
.index_mut(index),
|
||||
)
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
impl_index! {
|
||||
impl_host_index! {
|
||||
Range<usize>
|
||||
RangeFull
|
||||
RangeFrom<usize>
|
||||
@@ -253,22 +312,112 @@ impl_index! {
|
||||
RangeToInclusive<usize>
|
||||
}
|
||||
|
||||
impl<'a, T> Drop for HostOrDeviceSlice<'a, T> {
|
||||
fn drop(&mut self) {
|
||||
match self {
|
||||
Self::Device(s, device_id) => {
|
||||
check_device(*device_id);
|
||||
if s.is_empty() {
|
||||
return;
|
||||
}
|
||||
impl<T> Index<usize> for HostSlice<T> {
|
||||
type Output = T;
|
||||
|
||||
unsafe {
|
||||
cudaFree(s.as_mut_ptr() as *mut c_void)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
Self::Host(_) => {}
|
||||
fn index(&self, index: usize) -> &Self::Output {
|
||||
self.0
|
||||
.index(index)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IndexMut<usize> for HostSlice<T> {
|
||||
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
|
||||
self.0
|
||||
.index_mut(index)
|
||||
}
|
||||
}
|
||||
|
||||
impl<Idx, T> Index<Idx> for DeviceVec<T>
|
||||
where
|
||||
Idx: SliceIndex<[T], Output = [T]>,
|
||||
{
|
||||
type Output = DeviceSlice<T>;
|
||||
|
||||
fn index(&self, index: Idx) -> &Self::Output {
|
||||
unsafe {
|
||||
Self::Output::from_slice(
|
||||
self.0
|
||||
.index(index),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Idx, T> IndexMut<Idx> for DeviceVec<T>
|
||||
where
|
||||
Idx: SliceIndex<[T], Output = [T]>,
|
||||
{
|
||||
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
|
||||
unsafe {
|
||||
Self::Output::from_mut_slice(
|
||||
self.0
|
||||
.index_mut(index),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Idx, T> Index<Idx> for DeviceSlice<T>
|
||||
where
|
||||
Idx: SliceIndex<[T], Output = [T]>,
|
||||
{
|
||||
type Output = Self;
|
||||
|
||||
fn index(&self, index: Idx) -> &Self::Output {
|
||||
unsafe {
|
||||
Self::from_slice(
|
||||
self.0
|
||||
.index(index),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<Idx, T> IndexMut<Idx> for DeviceSlice<T>
|
||||
where
|
||||
Idx: SliceIndex<[T], Output = [T]>,
|
||||
{
|
||||
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
|
||||
unsafe {
|
||||
Self::from_mut_slice(
|
||||
self.0
|
||||
.index_mut(index),
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Deref for DeviceVec<T> {
|
||||
type Target = DeviceSlice<T>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self[..]
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> DerefMut for DeviceVec<T> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self[..]
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for DeviceVec<T> {
|
||||
fn drop(&mut self) {
|
||||
if self
|
||||
.0
|
||||
.is_empty()
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe {
|
||||
let ptr = self
|
||||
.0
|
||||
.as_mut_ptr() as *mut c_void;
|
||||
cudaFree(ptr)
|
||||
.wrap()
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,11 +11,13 @@ repository.workspace = true
|
||||
icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
ark-bls12-377 = { version = "0.4.0", optional = true }
|
||||
criterion = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
ark-bls12-377 = "0.4.0"
|
||||
ark-std = "0.4.0"
|
||||
ark-ff = "0.4.0"
|
||||
@@ -23,6 +25,7 @@ ark-ec = "0.4.0"
|
||||
ark-poly = "0.4.0"
|
||||
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
|
||||
icicle-bls12-377 = { path = ".", features = ["arkworks"] }
|
||||
serial_test = "3.0.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
@@ -32,3 +35,7 @@ g2 = ["icicle-core/g2"]
|
||||
ec_ntt = ["icicle-core/ec_ntt"]
|
||||
devmode = ["icicle-core/devmode"]
|
||||
arkworks = ["ark-bls12-377", "icicle-core/arkworks"]
|
||||
|
||||
[[bench]]
|
||||
name = "ecntt"
|
||||
harness = false # Criterion provides own harness
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_bls12_377::curve::{CurveCfg, ScalarField};
|
||||
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_core::impl_ecntt_bench;
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
impl_ecntt_bench!("BLS12_377", ScalarField, CurveCfg);
|
||||
|
||||
#[cfg(not(feature = "ec_ntt"))]
|
||||
fn main() {}
|
||||
@@ -13,47 +13,50 @@ fn main() {
|
||||
|
||||
// Optional Features
|
||||
#[cfg(feature = "g2")]
|
||||
config.define("G2_DEFINED", "ON");
|
||||
config.define("G2", "ON");
|
||||
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
config.define("ECNTT_DEFINED", "ON");
|
||||
config.define("ECNTT", "ON");
|
||||
|
||||
#[cfg(feature = "devmode")]
|
||||
config.define("DEVMODE", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle")
|
||||
.build_target("icicle_curve")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build", out_dir.display());
|
||||
println!("cargo:rustc-link-lib=ingo_bls12_377");
|
||||
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_field_bls12_377");
|
||||
println!("cargo:rustc-link-lib=ingo_curve_bls12_377");
|
||||
|
||||
if cfg!(feature = "bw6-761") {
|
||||
// Base config
|
||||
let mut config = Config::new("../../../../icicle");
|
||||
config
|
||||
.define("BUILD_TESTS", "OFF")
|
||||
.define("CURVE", "bw6_761")
|
||||
.define("CMAKE_BUILD_TYPE", "Release");
|
||||
|
||||
// Optional Features
|
||||
#[cfg(feature = "bw6-761-g2")]
|
||||
config.define("G2_DEFINED", "ON");
|
||||
config.define("G2", "ON");
|
||||
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
config.define("ECNTT_DEFINED", "OFF");
|
||||
config.define("ECNTT", "OFF");
|
||||
|
||||
#[cfg(feature = "devmode")]
|
||||
config.define("DEVMODE", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle")
|
||||
.build_target("icicle_curve")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build", out_dir.display());
|
||||
println!("cargo:rustc-link-lib=ingo_bw6_761");
|
||||
println!("cargo:rustc-link-search={}/build/lib/", out_dir.display());
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_field_bw6_761");
|
||||
println!("cargo:rustc-link-lib=ingo_curve_bw6_761");
|
||||
}
|
||||
|
||||
println!("cargo:rustc-link-lib=stdc++");
|
||||
|
||||
@@ -6,14 +6,15 @@ use icicle_core::curve::{Affine, Curve, Projective};
|
||||
use icicle_core::field::{Field, MontgomeryConvertibleField};
|
||||
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
|
||||
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
|
||||
pub(crate) const SCALAR_LIMBS: usize = 4;
|
||||
pub(crate) const BASE_LIMBS: usize = 6;
|
||||
pub(crate) const SCALAR_LIMBS: usize = 8;
|
||||
pub(crate) const BASE_LIMBS: usize = 12;
|
||||
#[cfg(feature = "g2")]
|
||||
pub(crate) const G2_BASE_LIMBS: usize = 12;
|
||||
pub(crate) const G2_BASE_LIMBS: usize = 24;
|
||||
|
||||
impl_scalar_field!("bls12_377", bls12_377_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
|
||||
#[cfg(feature = "bw6-761")]
|
||||
@@ -34,7 +35,7 @@ impl_curve!(
|
||||
);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_curve!(
|
||||
"bls12_377G2",
|
||||
"bls12_377_g2",
|
||||
bls12_377_g2,
|
||||
G2CurveCfg,
|
||||
ScalarField,
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
#![cfg(feature = "ec_ntt")]
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_ecntt;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
|
||||
use crate::curve::{CurveCfg, ScalarCfg, ScalarField};
|
||||
use icicle_core::ecntt::Projective;
|
||||
|
||||
impl_ecntt!("bls12_377", bls12_377, ScalarField, ScalarCfg, CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::{CurveCfg, ScalarField};
|
||||
|
||||
use icicle_core::ecntt::tests::*;
|
||||
use icicle_core::impl_ecntt_tests;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ecntt_tests!(ScalarField, CurveCfg);
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
pub mod curve;
|
||||
pub mod ecntt;
|
||||
pub mod msm;
|
||||
pub mod ntt;
|
||||
pub mod polynomials;
|
||||
pub mod poseidon;
|
||||
pub mod tree;
|
||||
pub mod vec_ops;
|
||||
|
||||
@@ -8,11 +8,15 @@ use icicle_core::{
|
||||
msm::{MSMConfig, MSM},
|
||||
traits::IcicleResultWrap,
|
||||
};
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
error::CudaError,
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
};
|
||||
|
||||
impl_msm!("bls12_377", bls12_377, CurveCfg);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_msm!("bls12_377G2", bls12_377_g2, G2CurveCfg);
|
||||
impl_msm!("bls12_377_g2", bls12_377_g2, G2CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
|
||||
@@ -3,11 +3,10 @@ use crate::curve::{BaseCfg, BaseField};
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_ntt;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_core::{impl_ntt, impl_ntt_without_domain};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
@@ -18,9 +17,10 @@ impl_ntt!("bw6_761", bw6_761, BaseField, BaseCfg);
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::ScalarField;
|
||||
use crate::ntt::DEFAULT_DEVICE_ID;
|
||||
use icicle_core::impl_ntt_tests;
|
||||
use icicle_core::ntt::tests::*;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use serial_test::{parallel, serial};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ntt_tests!(ScalarField);
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
use icicle_core::impl_univariate_polynomial_api;
|
||||
|
||||
impl_univariate_polynomial_api!("bls12_377", bls12_377, ScalarField, ScalarCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use icicle_core::impl_polynomial_tests;
|
||||
impl_polynomial_tests!(bls12_377, ScalarField);
|
||||
}
|
||||
@@ -11,11 +11,13 @@ repository.workspace = true
|
||||
icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
ark-bls12-381 = { version = "0.4.0", optional = true }
|
||||
criterion = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
ark-bls12-381 = "0.4.0"
|
||||
ark-std = "0.4.0"
|
||||
ark-ff = "0.4.0"
|
||||
@@ -23,6 +25,7 @@ ark-ec = "0.4.0"
|
||||
ark-poly = "0.4.0"
|
||||
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
|
||||
icicle-bls12-381 = { path = ".", features = ["arkworks"] }
|
||||
serial_test = "3.0.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
@@ -30,3 +33,7 @@ g2 = ["icicle-core/g2"]
|
||||
ec_ntt = ["icicle-core/ec_ntt"]
|
||||
devmode = ["icicle-core/devmode"]
|
||||
arkworks = ["ark-bls12-381", "icicle-core/arkworks"]
|
||||
|
||||
[[bench]]
|
||||
name = "ecntt"
|
||||
harness = false # Criterion provides own harness
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_bls12_381::curve::{CurveCfg, ScalarField};
|
||||
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_core::impl_ecntt_bench;
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
impl_ecntt_bench!("BLS12_381", ScalarField, CurveCfg);
|
||||
|
||||
#[cfg(not(feature = "ec_ntt"))]
|
||||
fn main() {}
|
||||
@@ -7,28 +7,29 @@ fn main() {
|
||||
// Base config
|
||||
let mut config = Config::new("../../../../icicle");
|
||||
config
|
||||
.define("BUILD_TESTS", "OFF")
|
||||
.define("CURVE", "bls12_381")
|
||||
.define("CMAKE_BUILD_TYPE", "Release");
|
||||
|
||||
// Optional Features
|
||||
#[cfg(feature = "g2")]
|
||||
config.define("G2_DEFINED", "ON");
|
||||
config.define("G2", "ON");
|
||||
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
config.define("ECNTT_DEFINED", "ON");
|
||||
config.define("ECNTT", "ON");
|
||||
|
||||
#[cfg(feature = "devmode")]
|
||||
config.define("DEVMODE", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle")
|
||||
.build_target("icicle_curve")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build", out_dir.display());
|
||||
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_field_bls12_381");
|
||||
println!("cargo:rustc-link-lib=ingo_curve_bls12_381");
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_bls12_381");
|
||||
println!("cargo:rustc-link-lib=stdc++");
|
||||
println!("cargo:rustc-link-lib=cudart");
|
||||
}
|
||||
|
||||
@@ -6,14 +6,15 @@ use icicle_core::curve::{Affine, Curve, Projective};
|
||||
use icicle_core::field::{Field, MontgomeryConvertibleField};
|
||||
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
|
||||
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
|
||||
pub(crate) const SCALAR_LIMBS: usize = 4;
|
||||
pub(crate) const BASE_LIMBS: usize = 6;
|
||||
pub(crate) const SCALAR_LIMBS: usize = 8;
|
||||
pub(crate) const BASE_LIMBS: usize = 12;
|
||||
#[cfg(feature = "g2")]
|
||||
pub(crate) const G2_BASE_LIMBS: usize = 12;
|
||||
pub(crate) const G2_BASE_LIMBS: usize = 24;
|
||||
|
||||
impl_scalar_field!("bls12_381", bls12_381_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
|
||||
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
|
||||
@@ -31,7 +32,7 @@ impl_curve!(
|
||||
);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_curve!(
|
||||
"bls12_381G2",
|
||||
"bls12_381_g2",
|
||||
bls12_381_g2,
|
||||
G2CurveCfg,
|
||||
ScalarField,
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
#![cfg(feature = "ec_ntt")]
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_ecntt;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
|
||||
use crate::curve::{CurveCfg, ScalarCfg, ScalarField};
|
||||
use icicle_core::ecntt::Projective;
|
||||
|
||||
impl_ecntt!("bls12_381", bls12_381, ScalarField, ScalarCfg, CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::{CurveCfg, ScalarField};
|
||||
|
||||
use icicle_core::ecntt::tests::*;
|
||||
use icicle_core::impl_ecntt_tests;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ecntt_tests!(ScalarField, CurveCfg);
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
pub mod curve;
|
||||
pub mod ecntt;
|
||||
pub mod msm;
|
||||
pub mod ntt;
|
||||
pub mod polynomials;
|
||||
pub mod poseidon;
|
||||
pub mod tree;
|
||||
pub mod vec_ops;
|
||||
|
||||
@@ -8,11 +8,15 @@ use icicle_core::{
|
||||
msm::{MSMConfig, MSM},
|
||||
traits::IcicleResultWrap,
|
||||
};
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
error::CudaError,
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
};
|
||||
|
||||
impl_msm!("bls12_381", bls12_381, CurveCfg);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_msm!("bls12_381G2", bls12_381_g2, G2CurveCfg);
|
||||
impl_msm!("bls12_381_g2", bls12_381_g2, G2CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_ntt;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_core::{impl_ntt, impl_ntt_without_domain};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
@@ -14,9 +13,10 @@ impl_ntt!("bls12_381", bls12_381, ScalarField, ScalarCfg);
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::ScalarField;
|
||||
use crate::ntt::DEFAULT_DEVICE_ID;
|
||||
use icicle_core::impl_ntt_tests;
|
||||
use icicle_core::ntt::tests::*;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use serial_test::{parallel, serial};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ntt_tests!(ScalarField);
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
use icicle_core::impl_univariate_polynomial_api;
|
||||
|
||||
impl_univariate_polynomial_api!("bls12_381", bls12_381, ScalarField, ScalarCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use icicle_core::impl_polynomial_tests;
|
||||
impl_polynomial_tests!(bls12_381, ScalarField);
|
||||
}
|
||||
@@ -11,11 +11,13 @@ repository.workspace = true
|
||||
icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
ark-bn254 = { version = "0.4.0", optional = true }
|
||||
criterion = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
ark-bn254 = "0.4.0"
|
||||
ark-std = "0.4.0"
|
||||
ark-ff = "0.4.0"
|
||||
@@ -23,6 +25,7 @@ ark-ec = "0.4.0"
|
||||
ark-poly = "0.4.0"
|
||||
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
|
||||
icicle-bn254 = { path = ".", features = ["arkworks"] }
|
||||
serial_test = "3.0.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
@@ -30,3 +33,7 @@ g2 = ["icicle-core/g2"]
|
||||
ec_ntt = ["icicle-core/ec_ntt"]
|
||||
devmode = ["icicle-core/devmode"]
|
||||
arkworks = ["ark-bn254", "icicle-core/arkworks"]
|
||||
|
||||
[[bench]]
|
||||
name = "ecntt"
|
||||
harness = false # Criterion provides own harness
|
||||
10
wrappers/rust/icicle-curves/icicle-bn254/benches/ecntt.rs
Normal file
10
wrappers/rust/icicle-curves/icicle-bn254/benches/ecntt.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_bn254::curve::{CurveCfg, ScalarField};
|
||||
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
use icicle_core::impl_ecntt_bench;
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
impl_ecntt_bench!("bn254", ScalarField, CurveCfg);
|
||||
|
||||
#[cfg(not(feature = "ec_ntt"))]
|
||||
fn main() {}
|
||||
@@ -5,30 +5,30 @@ fn main() {
|
||||
println!("cargo:rerun-if-changed=../../../../icicle");
|
||||
|
||||
// Base config
|
||||
let mut config = Config::new("../../../../icicle");
|
||||
let mut config = Config::new("../../../../icicle/");
|
||||
config
|
||||
.define("BUILD_TESTS", "OFF")
|
||||
.define("CURVE", "bn254")
|
||||
.define("CMAKE_BUILD_TYPE", "Release");
|
||||
|
||||
// Optional Features
|
||||
#[cfg(feature = "g2")]
|
||||
config.define("G2_DEFINED", "ON");
|
||||
config.define("G2", "ON");
|
||||
|
||||
#[cfg(feature = "ec_ntt")]
|
||||
config.define("ECNTT_DEFINED", "ON");
|
||||
config.define("ECNTT", "ON");
|
||||
|
||||
#[cfg(feature = "devmode")]
|
||||
config.define("DEVMODE", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle")
|
||||
.build_target("icicle_curve")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build", out_dir.display());
|
||||
println!("cargo:rustc-link-search={}/build/lib/", out_dir.display());
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_bn254");
|
||||
println!("cargo:rustc-link-lib=ingo_field_bn254");
|
||||
println!("cargo:rustc-link-lib=ingo_curve_bn254");
|
||||
println!("cargo:rustc-link-lib=stdc++");
|
||||
println!("cargo:rustc-link-lib=cudart");
|
||||
}
|
||||
|
||||
@@ -6,14 +6,15 @@ use icicle_core::curve::{Affine, Curve, Projective};
|
||||
use icicle_core::field::{Field, MontgomeryConvertibleField};
|
||||
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
|
||||
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
|
||||
pub(crate) const SCALAR_LIMBS: usize = 4;
|
||||
pub(crate) const BASE_LIMBS: usize = 4;
|
||||
pub(crate) const SCALAR_LIMBS: usize = 8;
|
||||
pub(crate) const BASE_LIMBS: usize = 8;
|
||||
#[cfg(feature = "g2")]
|
||||
pub(crate) const G2_BASE_LIMBS: usize = 8;
|
||||
pub(crate) const G2_BASE_LIMBS: usize = 16;
|
||||
|
||||
impl_scalar_field!("bn254", bn254_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
|
||||
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
|
||||
@@ -31,7 +32,7 @@ impl_curve!(
|
||||
);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_curve!(
|
||||
"bn254G2",
|
||||
"bn254_g2",
|
||||
bn254_g2,
|
||||
G2CurveCfg,
|
||||
ScalarField,
|
||||
|
||||
23
wrappers/rust/icicle-curves/icicle-bn254/src/ecntt/mod.rs
Normal file
23
wrappers/rust/icicle-curves/icicle-bn254/src/ecntt/mod.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
#![cfg(feature = "ec_ntt")]
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_ecntt;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
|
||||
use crate::curve::{CurveCfg, ScalarCfg, ScalarField};
|
||||
use icicle_core::ecntt::Projective;
|
||||
|
||||
impl_ecntt!("bn254", bn254, ScalarField, ScalarCfg, CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::{CurveCfg, ScalarField};
|
||||
|
||||
use icicle_core::ecntt::tests::*;
|
||||
use icicle_core::impl_ecntt_tests;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ecntt_tests!(ScalarField, CurveCfg);
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
pub mod curve;
|
||||
pub mod ecntt;
|
||||
pub mod msm;
|
||||
pub mod ntt;
|
||||
pub mod polynomials;
|
||||
pub mod poseidon;
|
||||
pub mod tree;
|
||||
pub mod vec_ops;
|
||||
|
||||
@@ -8,11 +8,15 @@ use icicle_core::{
|
||||
msm::{MSMConfig, MSM},
|
||||
traits::IcicleResultWrap,
|
||||
};
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
error::CudaError,
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
};
|
||||
|
||||
impl_msm!("bn254", bn254, CurveCfg);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_msm!("bn254G2", bn254_g2, G2CurveCfg);
|
||||
impl_msm!("bn254_g2", bn254_g2, G2CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_ntt;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_core::{impl_ntt, impl_ntt_without_domain};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
@@ -14,9 +13,10 @@ impl_ntt!("bn254", bn254, ScalarField, ScalarCfg);
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::ScalarField;
|
||||
use crate::ntt::DEFAULT_DEVICE_ID;
|
||||
use icicle_core::impl_ntt_tests;
|
||||
use icicle_core::ntt::tests::*;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use serial_test::{parallel, serial};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ntt_tests!(ScalarField);
|
||||
|
||||
@@ -0,0 +1,10 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
use icicle_core::impl_univariate_polynomial_api;
|
||||
|
||||
impl_univariate_polynomial_api!("bn254", bn254, ScalarField, ScalarCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use icicle_core::impl_polynomial_tests;
|
||||
impl_polynomial_tests!(bn254, ScalarField);
|
||||
}
|
||||
@@ -12,11 +12,13 @@ icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
icicle-bls12-377 = { path = "../../icicle-curves/icicle-bls12-377", features = ["bw6-761"] }
|
||||
ark-bw6-761 = { version = "0.4.0", optional = true }
|
||||
criterion = "0.3"
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = "0.3"
|
||||
ark-bw6-761 = "0.4.0"
|
||||
ark-std = "0.4.0"
|
||||
ark-ff = "0.4.0"
|
||||
@@ -24,9 +26,10 @@ ark-ec = "0.4.0"
|
||||
ark-poly = "0.4.0"
|
||||
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
|
||||
icicle-bw6-761 = { path = ".", features = ["arkworks"] }
|
||||
serial_test = "3.0.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
g2 = ["icicle-bls12-377/bw6-761-g2"]
|
||||
devmode = ["icicle-core/devmode"]
|
||||
arkworks = ["ark-bw6-761", "icicle-core/arkworks", "icicle-bls12-377/arkworks"]
|
||||
arkworks = ["ark-bw6-761", "icicle-core/arkworks", "icicle-bls12-377/arkworks"]
|
||||
@@ -9,9 +9,8 @@ use icicle_core::traits::FieldConfig;
|
||||
use icicle_core::{impl_curve, impl_field};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
pub(crate) const BASE_LIMBS: usize = 12;
|
||||
pub(crate) const BASE_LIMBS: usize = 24;
|
||||
|
||||
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
|
||||
pub type ScalarField = bls12_377BaseField;
|
||||
@@ -27,7 +26,7 @@ impl_curve!(
|
||||
);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_curve!(
|
||||
"bw6_761G2",
|
||||
"bw6_761_g2",
|
||||
bw6_761_g2,
|
||||
G2CurveCfg,
|
||||
ScalarField,
|
||||
|
||||
@@ -8,11 +8,15 @@ use icicle_core::{
|
||||
msm::{MSMConfig, MSM},
|
||||
traits::IcicleResultWrap,
|
||||
};
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
error::CudaError,
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
};
|
||||
|
||||
impl_msm!("bw6_761", bw6_761, CurveCfg);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_msm!("bw6_761G2", bw6_761_g2, G2CurveCfg);
|
||||
impl_msm!("bw6_761_g2", bw6_761_g2, G2CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
|
||||
@@ -4,6 +4,7 @@ pub(crate) mod tests {
|
||||
use icicle_core::impl_ntt_tests;
|
||||
use icicle_core::ntt::tests::*;
|
||||
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
|
||||
use serial_test::{parallel, serial};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ntt_tests!(ScalarField);
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
[package]
|
||||
name = "icicle-<CURVE>"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
description = "Rust wrapper for the CUDA implementation of <CURVE> elliptic curve by Ingonyama"
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
ark-<CURVE> = { version = "0.4.0", optional = true }
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
ark-<CURVE> = "0.4.0"
|
||||
ark-std = "0.4.0"
|
||||
ark-ff = "0.4.0"
|
||||
ark-ec = "0.4.0"
|
||||
ark-poly = "0.4.0"
|
||||
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
|
||||
icicle-<CURVE> = { path = ".", features = ["arkworks"] }
|
||||
|
||||
[features]
|
||||
default = []
|
||||
g2 = ["icicle-core/g2"]
|
||||
arkworks = ["ark-<CURVE>", "icicle-core/arkworks"]
|
||||
@@ -1,28 +0,0 @@
|
||||
use cmake::Config;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=../../../../icicle");
|
||||
|
||||
// Base config
|
||||
let mut config = Config::new("../../../../icicle");
|
||||
config
|
||||
.define("BUILD_TESTS", "OFF")
|
||||
.define("CURVE", "<CURVE>")
|
||||
.define("CMAKE_BUILD_TYPE", "Release");
|
||||
|
||||
// Optional Features
|
||||
#[cfg(feature = "g2")]
|
||||
config.define("G2_DEFINED", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build", out_dir.display());
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_<CURVE>");
|
||||
println!("cargo:rustc-link-lib=stdc++");
|
||||
println!("cargo:rustc-link-lib=cudart");
|
||||
}
|
||||
@@ -1,61 +0,0 @@
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_<CURVE>::{g1::Config as ArkG1Config, Fq, Fr};
|
||||
#[cfg(all(feature = "arkworks", feature = "g2"))]
|
||||
use ark_<CURVE>::{g2::Config as ArkG2Config, Fq2};
|
||||
use icicle_core::curve::{Affine, Curve, Projective};
|
||||
use icicle_core::field::{Field, MontgomeryConvertibleField};
|
||||
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
|
||||
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
pub(crate) const SCALAR_LIMBS: usize = ;
|
||||
pub(crate) const BASE_LIMBS: usize = ;
|
||||
#[cfg(feature = "g2")]
|
||||
pub(crate) const G2_BASE_LIMBS: usize = ;
|
||||
|
||||
impl_scalar_field!("<CURVE>", <CURVE>_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
|
||||
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_field!(G2_BASE_LIMBS, G2BaseField, G2BaseCfg, Fq2);
|
||||
impl_curve!(
|
||||
"<CURVE>",
|
||||
<CURVE>,
|
||||
CurveCfg,
|
||||
ScalarField,
|
||||
BaseField,
|
||||
ArkG1Config,
|
||||
G1Affine,
|
||||
G1Projective
|
||||
);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_curve!(
|
||||
"<CURVE>G2",
|
||||
<CURVE>_g2,
|
||||
G2CurveCfg,
|
||||
ScalarField,
|
||||
G2BaseField,
|
||||
ArkG2Config,
|
||||
G2Affine,
|
||||
G2Projective
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{CurveCfg, ScalarField, BASE_LIMBS};
|
||||
#[cfg(feature = "g2")]
|
||||
use super::{G2CurveCfg, G2_BASE_LIMBS};
|
||||
use icicle_core::curve::Curve;
|
||||
use icicle_core::tests::*;
|
||||
use icicle_core::traits::FieldImpl;
|
||||
use icicle_core::{impl_curve_tests, impl_field_tests};
|
||||
|
||||
impl_field_tests!(ScalarField);
|
||||
impl_curve_tests!(BASE_LIMBS, CurveCfg);
|
||||
#[cfg(feature = "g2")]
|
||||
mod g2 {
|
||||
use super::*;
|
||||
impl_curve_tests!(G2_BASE_LIMBS, G2CurveCfg);
|
||||
}
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
pub mod curve;
|
||||
pub mod msm;
|
||||
pub mod ntt;
|
||||
pub mod poseidon;
|
||||
pub mod tree;
|
||||
pub mod vec_ops;
|
||||
|
||||
impl icicle_core::SNARKCurve for curve::CurveCfg {}
|
||||
@@ -1,31 +0,0 @@
|
||||
use crate::curve::CurveCfg;
|
||||
#[cfg(feature = "g2")]
|
||||
use crate::curve::G2CurveCfg;
|
||||
use icicle_core::{
|
||||
curve::{Affine, Curve, Projective},
|
||||
error::IcicleResult,
|
||||
impl_msm,
|
||||
msm::{MSMConfig, MSM},
|
||||
traits::IcicleResultWrap,
|
||||
};
|
||||
use icicle_cuda_runtime::{error::CudaError, memory::HostOrDeviceSlice};
|
||||
|
||||
impl_msm!("<CURVE>", <CURVE>, CurveCfg);
|
||||
#[cfg(feature = "g2")]
|
||||
impl_msm!("<CURVE>G2", <CURVE>_g2, G2CurveCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::CurveCfg;
|
||||
#[cfg(feature = "g2")]
|
||||
use crate::curve::G2CurveCfg;
|
||||
use icicle_core::impl_msm_tests;
|
||||
use icicle_core::msm::tests::*;
|
||||
|
||||
impl_msm_tests!(CurveCfg);
|
||||
#[cfg(feature = "g2")]
|
||||
mod g2 {
|
||||
use super::*;
|
||||
impl_msm_tests!(G2CurveCfg);
|
||||
}
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_ntt;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_cuda_runtime::device_context::{DeviceContext, DEFAULT_DEVICE_ID};
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
impl_ntt!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::ScalarField;
|
||||
use crate::ntt::DEFAULT_DEVICE_ID;
|
||||
use icicle_core::impl_ntt_tests;
|
||||
use icicle_core::ntt::tests::*;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
impl_ntt_tests!(ScalarField);
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_poseidon;
|
||||
use icicle_core::poseidon::{Poseidon, PoseidonConfig, PoseidonConstants};
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
use core::mem::MaybeUninit;
|
||||
|
||||
impl_poseidon!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::ScalarField;
|
||||
use icicle_core::impl_poseidon_tests;
|
||||
use icicle_core::poseidon::tests::*;
|
||||
|
||||
impl_poseidon_tests!(ScalarField);
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_tree_builder;
|
||||
use icicle_core::poseidon::PoseidonConstants;
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_core::tree::{TreeBuilder, TreeBuilderConfig};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
impl_tree_builder!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::ScalarField;
|
||||
use icicle_core::impl_tree_builder_tests;
|
||||
use icicle_core::tree::tests::*;
|
||||
|
||||
impl_tree_builder_tests!(ScalarField);
|
||||
}
|
||||
@@ -4,24 +4,22 @@ fn main() {
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=../../../../icicle");
|
||||
|
||||
// Base config
|
||||
let mut config = Config::new("../../../../icicle");
|
||||
config
|
||||
.define("BUILD_TESTS", "OFF")
|
||||
.define("CURVE", "grumpkin")
|
||||
.define("CMAKE_BUILD_TYPE", "Release");
|
||||
|
||||
#[cfg(feature = "devmode")]
|
||||
config.define("DEVMODE", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle")
|
||||
.build_target("icicle_curve")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build", out_dir.display());
|
||||
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_grumpkin");
|
||||
println!("cargo:rustc-link-lib=ingo_field_grumpkin");
|
||||
println!("cargo:rustc-link-lib=ingo_curve_grumpkin");
|
||||
println!("cargo:rustc-link-lib=stdc++");
|
||||
println!("cargo:rustc-link-lib=cudart");
|
||||
}
|
||||
|
||||
@@ -4,12 +4,13 @@ use icicle_core::curve::{Affine, Curve, Projective};
|
||||
use icicle_core::field::{Field, MontgomeryConvertibleField};
|
||||
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
|
||||
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
|
||||
pub(crate) const SCALAR_LIMBS: usize = 4;
|
||||
pub(crate) const BASE_LIMBS: usize = 4;
|
||||
pub(crate) const SCALAR_LIMBS: usize = 8;
|
||||
pub(crate) const BASE_LIMBS: usize = 8;
|
||||
|
||||
impl_scalar_field!("grumpkin", grumpkin_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
|
||||
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
|
||||
|
||||
@@ -6,7 +6,11 @@ use icicle_core::{
|
||||
msm::{MSMConfig, MSM},
|
||||
traits::IcicleResultWrap,
|
||||
};
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
error::CudaError,
|
||||
memory::{DeviceSlice, HostOrDeviceSlice},
|
||||
};
|
||||
|
||||
impl_msm!("grumpkin", grumpkin, CurveCfg);
|
||||
|
||||
|
||||
28
wrappers/rust/icicle-fields/icicle-babybear/Cargo.toml
Normal file
28
wrappers/rust/icicle-fields/icicle-babybear/Cargo.toml
Normal file
@@ -0,0 +1,28 @@
|
||||
[package]
|
||||
name = "icicle-babybear"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
description = "Rust wrapper for the CUDA implementation of baby bear prime field by Ingonyama"
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1.50"
|
||||
|
||||
[dev-dependencies]
|
||||
risc0-core = "0.21.0"
|
||||
risc0-zkp = "0.21.0"
|
||||
p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
|
||||
p3-field = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
|
||||
p3-dft = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
|
||||
p3-matrix = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
|
||||
serial_test = "3.0.0"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
devmode = ["icicle-core/devmode"]
|
||||
24
wrappers/rust/icicle-fields/icicle-babybear/build.rs
Normal file
24
wrappers/rust/icicle-fields/icicle-babybear/build.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
use cmake::Config;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=../../../../icicle");
|
||||
|
||||
// Base config
|
||||
let mut config = Config::new("../../../../icicle/");
|
||||
config
|
||||
.define("FIELD", "babybear")
|
||||
.define("CMAKE_BUILD_TYPE", "Release")
|
||||
.define("EXT_FIELD", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle_field")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
|
||||
|
||||
println!("cargo:rustc-link-lib=ingo_field_babybear");
|
||||
println!("cargo:rustc-link-lib=stdc++");
|
||||
println!("cargo:rustc-link-lib=cudart");
|
||||
}
|
||||
34
wrappers/rust/icicle-fields/icicle-babybear/src/field.rs
Normal file
34
wrappers/rust/icicle-fields/icicle-babybear/src/field.rs
Normal file
@@ -0,0 +1,34 @@
|
||||
use icicle_core::field::{Field, MontgomeryConvertibleField};
|
||||
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
|
||||
use icicle_core::{impl_field, impl_scalar_field};
|
||||
use icicle_cuda_runtime::device::check_device;
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
|
||||
|
||||
pub(crate) const SCALAR_LIMBS: usize = 1;
|
||||
pub(crate) const EXTENSION_LIMBS: usize = 4;
|
||||
|
||||
impl_scalar_field!("babybear", babybear, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
|
||||
impl_scalar_field!(
|
||||
"babybear_extension",
|
||||
babybear_extension,
|
||||
EXTENSION_LIMBS,
|
||||
ExtensionField,
|
||||
ExtensionCfg,
|
||||
Fr
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{ExtensionField, ScalarField};
|
||||
use icicle_core::impl_field_tests;
|
||||
use icicle_core::tests::*;
|
||||
|
||||
impl_field_tests!(ScalarField);
|
||||
mod extension {
|
||||
use super::*;
|
||||
|
||||
impl_field_tests!(ExtensionField);
|
||||
}
|
||||
}
|
||||
4
wrappers/rust/icicle-fields/icicle-babybear/src/lib.rs
Normal file
4
wrappers/rust/icicle-fields/icicle-babybear/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod field;
|
||||
pub mod ntt;
|
||||
pub mod polynomials;
|
||||
pub mod vec_ops;
|
||||
174
wrappers/rust/icicle-fields/icicle-babybear/src/ntt/mod.rs
Normal file
174
wrappers/rust/icicle-fields/icicle-babybear/src/ntt/mod.rs
Normal file
@@ -0,0 +1,174 @@
|
||||
use crate::field::{ExtensionField, ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
use icicle_core::{impl_ntt, impl_ntt_without_domain};
|
||||
use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
impl_ntt!("babybear", babybear, ScalarField, ScalarCfg);
|
||||
impl_ntt_without_domain!(
|
||||
"babybear_extension",
|
||||
ScalarField,
|
||||
ScalarCfg,
|
||||
NTT,
|
||||
"_ntt",
|
||||
ExtensionField
|
||||
);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use super::{ExtensionField, ScalarField};
|
||||
use icicle_core::{
|
||||
ntt::{initialize_domain, ntt_inplace, release_domain, NTTConfig, NTTDir},
|
||||
traits::{FieldImpl, GenerateRandom},
|
||||
};
|
||||
use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostSlice};
|
||||
use p3_baby_bear::BabyBear;
|
||||
use p3_dft::{Radix2Dit, TwoAdicSubgroupDft};
|
||||
use p3_field::{
|
||||
extension::BinomialExtensionField, AbstractExtensionField, AbstractField, PrimeField32, TwoAdicField,
|
||||
};
|
||||
use p3_matrix::dense::RowMajorMatrix;
|
||||
use risc0_core::field::{
|
||||
baby_bear::{Elem, ExtElem},
|
||||
Elem as FieldElem, RootsOfUnity,
|
||||
};
|
||||
use serial_test::serial;
|
||||
|
||||
// Note that risc0 and plonky3 tests shouldn't be ran simultaneously in parallel as they use different roots of unity
|
||||
#[test]
|
||||
#[serial]
|
||||
fn test_against_risc0() {
|
||||
let log_sizes = [15, 20];
|
||||
let ctx = DeviceContext::default();
|
||||
let risc0_rou = Elem::ROU_FWD[log_sizes[1]];
|
||||
initialize_domain(ScalarField::from([risc0_rou.as_u32()]), &ctx, false).unwrap();
|
||||
for log_size in log_sizes {
|
||||
let ntt_size = 1 << log_size;
|
||||
|
||||
let mut scalars: Vec<ScalarField> = <ScalarField as FieldImpl>::Config::generate_random(ntt_size);
|
||||
let mut scalars_risc0: Vec<Elem> = scalars
|
||||
.iter()
|
||||
.map(|x| Elem::new(Into::<[u32; 1]>::into(*x)[0]))
|
||||
.collect();
|
||||
|
||||
let ntt_cfg: NTTConfig<'_, ScalarField> = NTTConfig::default();
|
||||
ntt_inplace(HostSlice::from_mut_slice(&mut scalars[..]), NTTDir::kForward, &ntt_cfg).unwrap();
|
||||
|
||||
risc0_zkp::core::ntt::bit_reverse(&mut scalars_risc0[..]);
|
||||
risc0_zkp::core::ntt::evaluate_ntt::<Elem, Elem>(&mut scalars_risc0[..], ntt_size);
|
||||
|
||||
for (s1, s2) in scalars
|
||||
.iter()
|
||||
.zip(scalars_risc0)
|
||||
{
|
||||
assert_eq!(Into::<[u32; 1]>::into(*s1)[0], s2.as_u32());
|
||||
}
|
||||
|
||||
let mut ext_scalars: Vec<ExtensionField> = <ExtensionField as FieldImpl>::Config::generate_random(ntt_size);
|
||||
let mut ext_scalars_risc0: Vec<ExtElem> = ext_scalars
|
||||
.iter()
|
||||
.map(|x| ExtElem::from_u32_words(&Into::<[u32; 4]>::into(*x)[..]))
|
||||
.collect();
|
||||
|
||||
ntt_inplace(
|
||||
HostSlice::from_mut_slice(&mut ext_scalars[..]),
|
||||
NTTDir::kForward,
|
||||
&ntt_cfg,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
risc0_zkp::core::ntt::bit_reverse(&mut ext_scalars_risc0[..]);
|
||||
risc0_zkp::core::ntt::evaluate_ntt::<Elem, ExtElem>(&mut ext_scalars_risc0[..], ntt_size);
|
||||
|
||||
for (s1, s2) in ext_scalars
|
||||
.iter()
|
||||
.zip(ext_scalars_risc0)
|
||||
{
|
||||
assert_eq!(Into::<[u32; 4]>::into(*s1)[..], s2.to_u32_words()[..]);
|
||||
}
|
||||
}
|
||||
|
||||
release_domain::<ScalarField>(&ctx).unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[serial]
|
||||
fn test_against_plonky3() {
|
||||
let log_ncols = [15, 18];
|
||||
let nrows = 4;
|
||||
let ctx = DeviceContext::default();
|
||||
let plonky3_rou = BabyBear::two_adic_generator(log_ncols[1]);
|
||||
// To compute FFTs using icicle, we first need to initialize it using plonky3's "two adic generator"
|
||||
initialize_domain(ScalarField::from([plonky3_rou.as_canonical_u32()]), &ctx, false).unwrap();
|
||||
for log_ncol in log_ncols {
|
||||
let ntt_size = 1 << log_ncol;
|
||||
|
||||
let mut scalars: Vec<ScalarField> = <ScalarField as FieldImpl>::Config::generate_random(nrows * ntt_size);
|
||||
let scalars_p3: Vec<BabyBear> = scalars
|
||||
.iter()
|
||||
.map(|x| BabyBear::from_wrapped_u32(Into::<[u32; 1]>::into(*x)[0]))
|
||||
.collect();
|
||||
let matrix_p3 = RowMajorMatrix::new(scalars_p3, nrows);
|
||||
|
||||
let mut ntt_cfg: NTTConfig<'_, ScalarField> = NTTConfig::default();
|
||||
// Next two lines signalize that we want to compute `nrows` FFTs in column-ordered fashion
|
||||
ntt_cfg.batch_size = nrows as i32;
|
||||
ntt_cfg.columns_batch = true;
|
||||
ntt_inplace(HostSlice::from_mut_slice(&mut scalars[..]), NTTDir::kForward, &ntt_cfg).unwrap();
|
||||
|
||||
let result_p3 = Radix2Dit.dft_batch(matrix_p3);
|
||||
|
||||
for i in 0..nrows {
|
||||
for j in 0..ntt_size {
|
||||
assert_eq!(
|
||||
Into::<[u32; 1]>::into(scalars[i + j * nrows])[0],
|
||||
result_p3.values[i + j * nrows].as_canonical_u32()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
type Plonky3Extension = BinomialExtensionField<BabyBear, 4>;
|
||||
|
||||
let mut ext_scalars: Vec<ExtensionField> =
|
||||
<ExtensionField as FieldImpl>::Config::generate_random(nrows * ntt_size);
|
||||
let ext_scalars_p3: Vec<Plonky3Extension> = ext_scalars
|
||||
.iter()
|
||||
.map(|x| {
|
||||
let arr: [u32; 4] = (*x).into();
|
||||
Plonky3Extension::from_base_slice(
|
||||
&(arr
|
||||
.iter()
|
||||
.map(|y| BabyBear::from_wrapped_u32(*y))
|
||||
.collect::<Vec<BabyBear>>())[..],
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
let ext_matrix_p3 = RowMajorMatrix::new(ext_scalars_p3, nrows);
|
||||
|
||||
ntt_inplace(
|
||||
HostSlice::from_mut_slice(&mut ext_scalars[..]),
|
||||
NTTDir::kForward,
|
||||
&ntt_cfg,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let ext_result_p3 = Radix2Dit.dft_batch(ext_matrix_p3);
|
||||
|
||||
for i in 0..nrows {
|
||||
for j in 0..ntt_size {
|
||||
let arr: [u32; 4] = ext_scalars[i + j * nrows].into();
|
||||
let base_slice: &[BabyBear] = ext_result_p3.values[i + j * nrows].as_base_slice();
|
||||
for k in 0..4 {
|
||||
assert_eq!(arr[k], base_slice[k].as_canonical_u32());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
release_domain::<ScalarField>(&ctx).unwrap();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
use crate::field::{ScalarCfg, ScalarField};
|
||||
use icicle_core::impl_univariate_polynomial_api;
|
||||
|
||||
impl_univariate_polynomial_api!("babybear", babybear, ScalarField, ScalarCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use icicle_core::impl_polynomial_tests;
|
||||
impl_polynomial_tests!(babybear, ScalarField);
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
use crate::curve::{ScalarCfg, ScalarField};
|
||||
use crate::field::{ExtensionCfg, ExtensionField, ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::impl_vec_ops_field;
|
||||
@@ -8,13 +8,19 @@ use icicle_cuda_runtime::device_context::DeviceContext;
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
|
||||
|
||||
impl_vec_ops_field!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
|
||||
impl_vec_ops_field!("babybear", babybear, ScalarField, ScalarCfg);
|
||||
impl_vec_ops_field!("babybear_extension", babybear_extension, ExtensionField, ExtensionCfg);
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::curve::ScalarField;
|
||||
use crate::field::{ExtensionField, ScalarField};
|
||||
use icicle_core::impl_vec_add_tests;
|
||||
use icicle_core::vec_ops::tests::*;
|
||||
|
||||
impl_vec_add_tests!(ScalarField);
|
||||
mod extension {
|
||||
use super::*;
|
||||
|
||||
impl_vec_add_tests!(ExtensionField);
|
||||
}
|
||||
}
|
||||
18
wrappers/rust/icicle-hash/Cargo.toml
Normal file
18
wrappers/rust/icicle-hash/Cargo.toml
Normal file
@@ -0,0 +1,18 @@
|
||||
[package]
|
||||
name = "icicle-hash"
|
||||
version.workspace = true
|
||||
edition.workspace = true
|
||||
authors.workspace = true
|
||||
description = "Rust wrapper for the CUDA implementation of hash functions by Ingonyama"
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
|
||||
[dependencies]
|
||||
icicle-core = { workspace = true }
|
||||
icicle-cuda-runtime = { workspace = true }
|
||||
|
||||
[build-dependencies]
|
||||
cmake = "0.1.50"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
21
wrappers/rust/icicle-hash/build.rs
Normal file
21
wrappers/rust/icicle-hash/build.rs
Normal file
@@ -0,0 +1,21 @@
|
||||
use cmake::Config;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-env-changed=CXXFLAGS");
|
||||
println!("cargo:rerun-if-changed=../../../icicle");
|
||||
|
||||
// Base config
|
||||
let mut config = Config::new("../../../icicle/");
|
||||
config.define("CMAKE_BUILD_TYPE", "Release");
|
||||
config.define("BUILD_HASH", "ON");
|
||||
|
||||
// Build
|
||||
let out_dir = config
|
||||
.build_target("icicle_hash")
|
||||
.build();
|
||||
|
||||
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
|
||||
println!("cargo:rustc-link-lib=ingo_hash");
|
||||
println!("cargo:rustc-link-lib=stdc++");
|
||||
println!("cargo:rustc-link-lib=cudart");
|
||||
}
|
||||
102
wrappers/rust/icicle-hash/src/keccak/mod.rs
Normal file
102
wrappers/rust/icicle-hash/src/keccak/mod.rs
Normal file
@@ -0,0 +1,102 @@
|
||||
use icicle_cuda_runtime::error::CudaError;
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
|
||||
memory::HostOrDeviceSlice,
|
||||
};
|
||||
|
||||
use icicle_core::error::IcicleResult;
|
||||
use icicle_core::traits::IcicleResultWrap;
|
||||
|
||||
pub mod tests;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct KeccakConfig<'a> {
|
||||
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
|
||||
pub ctx: DeviceContext<'a>,
|
||||
|
||||
/// True if inputs are on device and false if they're on host. Default value: false.
|
||||
are_inputs_on_device: bool,
|
||||
|
||||
/// If true, output is preserved on device, otherwise on host. Default value: false.
|
||||
are_outputs_on_device: bool,
|
||||
|
||||
/// Whether to run the Keccak asynchronously. If set to `true`, the keccak_hash function will be
|
||||
/// non-blocking and you'd need to synchronize it explicitly by running
|
||||
/// `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, keccak_hash
|
||||
/// function will block the current CPU thread.
|
||||
is_async: bool,
|
||||
}
|
||||
|
||||
impl<'a> Default for KeccakConfig<'a> {
|
||||
fn default() -> Self {
|
||||
Self::default_for_device(DEFAULT_DEVICE_ID)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> KeccakConfig<'a> {
|
||||
pub fn default_for_device(device_id: usize) -> Self {
|
||||
KeccakConfig {
|
||||
ctx: DeviceContext::default_for_device(device_id),
|
||||
are_inputs_on_device: false,
|
||||
are_outputs_on_device: false,
|
||||
is_async: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
pub(crate) fn keccak256_cuda(
|
||||
input: *const u8,
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: *mut u8,
|
||||
config: KeccakConfig,
|
||||
) -> CudaError;
|
||||
|
||||
pub(crate) fn keccak512_cuda(
|
||||
input: *const u8,
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: *mut u8,
|
||||
config: KeccakConfig,
|
||||
) -> CudaError;
|
||||
}
|
||||
|
||||
pub fn keccak256(
|
||||
input: &(impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: KeccakConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
keccak256_cuda(
|
||||
input.as_ptr(),
|
||||
input_block_size,
|
||||
number_of_blocks,
|
||||
output.as_mut_ptr(),
|
||||
config,
|
||||
)
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn keccak512(
|
||||
input: &(impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
input_block_size: i32,
|
||||
number_of_blocks: i32,
|
||||
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
|
||||
config: KeccakConfig,
|
||||
) -> IcicleResult<()> {
|
||||
unsafe {
|
||||
keccak512_cuda(
|
||||
input.as_ptr(),
|
||||
input_block_size,
|
||||
number_of_blocks,
|
||||
output.as_mut_ptr(),
|
||||
config,
|
||||
)
|
||||
.wrap()
|
||||
}
|
||||
}
|
||||
1
wrappers/rust/icicle-hash/src/keccak/tests.rs
Normal file
1
wrappers/rust/icicle-hash/src/keccak/tests.rs
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
1
wrappers/rust/icicle-hash/src/lib.rs
Normal file
1
wrappers/rust/icicle-hash/src/lib.rs
Normal file
@@ -0,0 +1 @@
|
||||
pub mod keccak;
|
||||
Reference in New Issue
Block a user