ICICLE V2 Release (#492)

This PR introduces major updates for ICICLE Core, Rust and Golang
bindings

---------

Co-authored-by: Yuval Shekel <yshekel@gmail.com>
Co-authored-by: DmytroTym <dmytrotym1@gmail.com>
Co-authored-by: Otsar <122266060+Otsar-Raikou@users.noreply.github.com>
Co-authored-by: VitaliiH <vhnatyk@gmail.com>
Co-authored-by: release-bot <release-bot@ingonyama.com>
Co-authored-by: Stas <spolonsky@icloud.com>
Co-authored-by: Jeremy Felder <jeremy.felder1@gmail.com>
Co-authored-by: ImmanuelSegol <3ditds@gmail.com>
Co-authored-by: JimmyHongjichuan <45908291+JimmyHongjichuan@users.noreply.github.com>
Co-authored-by: pierre <pierreuu@gmail.com>
Co-authored-by: Leon Hibnik <107353745+LeonHibnik@users.noreply.github.com>
Co-authored-by: nonam3e <timur@ingonyama.com>
Co-authored-by: Vlad <88586482+vladfdp@users.noreply.github.com>
Co-authored-by: LeonHibnik <leon@ingonyama.com>
Co-authored-by: nonam3e <71525212+nonam3e@users.noreply.github.com>
Co-authored-by: vladfdp <vlad.heintz@gmail.com>
This commit is contained in:
ChickenLover
2024-04-23 09:26:40 +07:00
committed by GitHub
parent a1dc0539ce
commit 7265d18d48
584 changed files with 33460 additions and 8856 deletions

View File

@@ -8,6 +8,8 @@ members = [
"icicle-curves/icicle-bls12-381",
"icicle-curves/icicle-bn254",
"icicle-curves/icicle-grumpkin",
"icicle-fields/icicle-babybear",
"icicle-hash",
]
exclude = [
"icicle-curves/icicle-curve-template",

View File

@@ -19,6 +19,13 @@ ark-std = { version = "0.4.0", optional = true }
rayon = "1.8.1"
criterion = "0.3"
[dev-dependencies]
criterion = "0.3"
serial_test = "3.0.0"
[features]
default = []
arkworks = ["ark-ff", "ark-ec", "ark-poly", "ark-std"]

View File

@@ -7,8 +7,10 @@ use ark_ec::models::CurveConfig as ArkCurveConfig;
use ark_ec::short_weierstrass::{Affine as ArkAffine, Projective as ArkProjective, SWCurveConfig};
#[cfg(feature = "arkworks")]
use ark_ec::AffineRepr;
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
use std::fmt::Debug;
pub trait Curve: Debug + PartialEq + Copy + Clone {
@@ -24,9 +26,19 @@ pub trait Curve: Debug + PartialEq + Copy + Clone {
#[doc(hidden)]
fn generate_random_affine_points(size: usize) -> Vec<Affine<Self>>;
#[doc(hidden)]
fn convert_affine_montgomery(points: &mut HostOrDeviceSlice<Affine<Self>>, is_into: bool) -> CudaError;
fn convert_affine_montgomery(
points: *mut Affine<Self>,
len: usize,
is_into: bool,
ctx: &DeviceContext,
) -> CudaError;
#[doc(hidden)]
fn convert_projective_montgomery(points: &mut HostOrDeviceSlice<Projective<Self>>, is_into: bool) -> CudaError;
fn convert_projective_montgomery(
points: *mut Projective<Self>,
len: usize,
is_into: bool,
ctx: &DeviceContext,
) -> CudaError;
#[cfg(feature = "arkworks")]
type ArkSWConfig: SWCurveConfig;
@@ -77,6 +89,9 @@ impl<C: Curve> Affine<C> {
impl<C: Curve> From<Affine<C>> for Projective<C> {
fn from(item: Affine<C>) -> Self {
if item == (Affine::<C>::zero()) {
return Self::zero();
}
Self {
x: item.x,
y: item.y,
@@ -109,35 +124,67 @@ impl<C: Curve> Projective<C> {
impl<C: Curve> PartialEq for Projective<C> {
fn eq(&self, other: &Self) -> bool {
C::eq_proj(self as *const _, other as *const _)
C::eq_proj(self as *const Self, other as *const Self)
}
}
impl<C: Curve> From<Projective<C>> for Affine<C> {
fn from(proj: Projective<C>) -> Self {
let mut aff = Self::zero();
C::to_affine(&proj as *const _, &mut aff as *mut _);
C::to_affine(&proj as *const Projective<C>, &mut aff as *mut Self);
aff
}
}
impl<C: Curve> MontgomeryConvertible for Affine<C> {
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
C::convert_affine_montgomery(values, true)
impl<'a, C: Curve> MontgomeryConvertible<'a> for Affine<C> {
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
check_device(ctx.device_id);
assert_eq!(
values
.device_id()
.unwrap(),
ctx.device_id,
"Device ids are different in slice and context"
);
C::convert_affine_montgomery(unsafe { values.as_mut_ptr() }, values.len(), true, ctx)
}
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
C::convert_affine_montgomery(values, false)
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
check_device(ctx.device_id);
assert_eq!(
values
.device_id()
.unwrap(),
ctx.device_id,
"Device ids are different in slice and context"
);
C::convert_affine_montgomery(unsafe { values.as_mut_ptr() }, values.len(), false, ctx)
}
}
impl<C: Curve> MontgomeryConvertible for Projective<C> {
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
C::convert_projective_montgomery(values, true)
impl<'a, C: Curve> MontgomeryConvertible<'a> for Projective<C> {
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
check_device(ctx.device_id);
assert_eq!(
values
.device_id()
.unwrap(),
ctx.device_id,
"Device ids are different in slice and context"
);
C::convert_projective_montgomery(unsafe { values.as_mut_ptr() }, values.len(), true, ctx)
}
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
C::convert_projective_montgomery(values, false)
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
check_device(ctx.device_id);
assert_eq!(
values
.device_id()
.unwrap(),
ctx.device_id,
"Device ids are different in slice and context"
);
C::convert_projective_montgomery(unsafe { values.as_mut_ptr() }, values.len(), false, ctx)
}
}
@@ -228,22 +275,22 @@ macro_rules! impl_curve {
use super::{$affine_type, $projective_type, CudaError, DeviceContext};
extern "C" {
#[link_name = concat!($curve_prefix, "Eq")]
#[link_name = concat!($curve_prefix, "_eq")]
pub(crate) fn eq(point1: *const $projective_type, point2: *const $projective_type) -> bool;
#[link_name = concat!($curve_prefix, "ToAffine")]
#[link_name = concat!($curve_prefix, "_to_affine")]
pub(crate) fn proj_to_affine(point: *const $projective_type, point_out: *mut $affine_type);
#[link_name = concat!($curve_prefix, "GenerateProjectivePoints")]
#[link_name = concat!($curve_prefix, "_generate_projective_points")]
pub(crate) fn generate_projective_points(points: *mut $projective_type, size: usize);
#[link_name = concat!($curve_prefix, "GenerateAffinePoints")]
#[link_name = concat!($curve_prefix, "_generate_affine_points")]
pub(crate) fn generate_affine_points(points: *mut $affine_type, size: usize);
#[link_name = concat!($curve_prefix, "AffineConvertMontgomery")]
#[link_name = concat!($curve_prefix, "_affine_convert_montgomery")]
pub(crate) fn _convert_affine_montgomery(
points: *mut $affine_type,
size: usize,
is_into: bool,
ctx: *const DeviceContext,
) -> CudaError;
#[link_name = concat!($curve_prefix, "ProjectiveConvertMontgomery")]
#[link_name = concat!($curve_prefix, "_projective_convert_montgomery")]
pub(crate) fn _convert_projective_montgomery(
points: *mut $projective_type,
size: usize,
@@ -284,27 +331,29 @@ macro_rules! impl_curve {
res
}
fn convert_affine_montgomery(points: &mut HostOrDeviceSlice<$affine_type>, is_into: bool) -> CudaError {
fn convert_affine_montgomery(
points: *mut $affine_type,
len: usize,
is_into: bool,
ctx: &DeviceContext,
) -> CudaError {
unsafe {
$curve_prefix_ident::_convert_affine_montgomery(
points.as_mut_ptr(),
points.len(),
is_into,
&DeviceContext::default() as *const _ as *const DeviceContext,
)
$curve_prefix_ident::_convert_affine_montgomery(points, len, is_into, ctx as *const DeviceContext)
}
}
fn convert_projective_montgomery(
points: &mut HostOrDeviceSlice<$projective_type>,
points: *mut $projective_type,
len: usize,
is_into: bool,
ctx: &DeviceContext,
) -> CudaError {
unsafe {
$curve_prefix_ident::_convert_projective_montgomery(
points.as_mut_ptr(),
points.len(),
points,
len,
is_into,
&DeviceContext::default() as *const _ as *const DeviceContext,
ctx as *const DeviceContext,
)
}
}
@@ -321,11 +370,6 @@ macro_rules! impl_curve_tests {
$base_limbs:ident,
$curve:ident
) => {
#[test]
fn test_scalar_equality() {
check_scalar_equality::<<$curve as Curve>::ScalarField>()
}
#[test]
fn test_affine_projective_convert() {
check_affine_projective_convert::<$curve>()

View File

@@ -0,0 +1,271 @@
#![cfg(feature = "ec_ntt")]
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use crate::{
curve::Curve,
ntt::{FieldImpl, IcicleResult, NTTConfig, NTTDir},
};
pub use crate::curve::Projective;
// #[cfg(feature = "arkworks")] //TODO: uncomment on correctness test
#[doc(hidden)]
pub mod tests;
#[doc(hidden)]
pub trait ECNTT<C: Curve>: ECNTTUnchecked<Projective<C>, C::ScalarField> {}
#[doc(hidden)]
pub trait ECNTTUnchecked<T, F: FieldImpl> {
fn ntt_unchecked(
input: &(impl HostOrDeviceSlice<T> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<F>,
output: &mut (impl HostOrDeviceSlice<T> + ?Sized),
) -> IcicleResult<()>;
fn ntt_inplace_unchecked(
inout: &mut (impl HostOrDeviceSlice<T> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<F>,
) -> IcicleResult<()>;
}
#[macro_export]
macro_rules! impl_ecntt {
(
$field_prefix:literal,
$field_prefix_ident:ident,
$field:ident,
$field_config:ident,
$curve:ident
) => {
mod $field_prefix_ident {
use crate::curve;
use crate::curve::BaseCfg;
use crate::ecntt::IcicleResult;
use crate::ecntt::Projective;
use crate::ecntt::{
$curve, $field, $field_config, CudaError, DeviceContext, NTTConfig, NTTDir, DEFAULT_DEVICE_ID,
};
use icicle_core::ecntt::ECNTTUnchecked;
use icicle_core::ecntt::ECNTT;
use icicle_core::impl_ntt_without_domain;
use icicle_core::ntt::NTT;
use icicle_core::traits::IcicleResultWrap;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
pub type ProjectiveC = Projective<$curve>;
impl_ntt_without_domain!(
$field_prefix,
$field,
$field_config,
ECNTTUnchecked,
"_ecntt_",
ProjectiveC
);
impl ECNTT<$curve> for $field_config {}
}
};
}
/// Computes the ECNTT, or a batch of several ECNTTs.
///
/// # Arguments
///
/// * `input` - inputs of the ECNTT.
///
/// * `dir` - whether to compute forward of inverse ECNTT.
///
/// * `cfg` - config used to specify extra arguments of the ECNTT.
///
/// * `output` - buffer to write the ECNTT outputs into. Must be of the same size as `input`.
pub fn ecntt<C: Curve>(
input: &(impl HostOrDeviceSlice<Projective<C>> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<C::ScalarField>,
output: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
) -> IcicleResult<()>
where
C::ScalarField: FieldImpl,
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
{
<<C::ScalarField as FieldImpl>::Config as ECNTTUnchecked<Projective<C>, C::ScalarField>>::ntt_unchecked(
input, dir, &cfg, output,
)
}
/// Computes the ECNTT, or a batch of several ECNTTs inplace.
///
/// # Arguments
///
/// * `inout` - buffer with inputs to also write the ECNTT outputs into.
///
/// * `dir` - whether to compute forward of inverse ECNTT.
///
/// * `cfg` - config used to specify extra arguments of the ECNTT.
pub fn ecntt_inplace<C: Curve>(
inout: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<C::ScalarField>,
) -> IcicleResult<()>
where
C::ScalarField: FieldImpl,
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
{
<<C::ScalarField as FieldImpl>::Config as ECNTTUnchecked<Projective<C>, C::ScalarField>>::ntt_inplace_unchecked(
inout, dir, &cfg,
)
}
#[macro_export]
macro_rules! impl_ecntt_tests {
(
$field:ident,
$curve:ident
) => {
use icicle_core::ntt::tests::init_domain;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
const MAX_SIZE: u64 = 1 << 18;
static INIT: OnceLock<()> = OnceLock::new();
const FAST_TWIDDLES_MODE: bool = false;
#[test]
fn test_ecntt() {
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
check_ecntt::<$curve>()
}
#[test]
fn test_ecntt_batch() {
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
check_ecntt_batch::<$curve>()
}
// #[test] //TODO: multi-device test
// fn test_ntt_device_async() {
// // init_domain is in this test is performed per-device
// check_ecntt_device_async::<$field>()
// }
};
}
#[macro_export]
macro_rules! impl_ecntt_bench {
(
$field_prefix:literal,
$field:ident,
$curve:ident
) => {
use icicle_core::ntt::ntt;
use icicle_core::ntt::NTTDomain;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use std::sync::OnceLock;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use icicle_core::{
curve::Curve,
ecntt::{ecntt, Projective},
ntt::{FieldImpl, NTTConfig, NTTDir, NttAlgorithm, Ordering},
traits::ArkConvertible,
};
use icicle_core::ecntt::ECNTT;
use icicle_core::ntt::NTT;
use icicle_cuda_runtime::memory::HostSlice;
fn ecntt_for_bench<C: Curve>(
points: &(impl HostOrDeviceSlice<Projective<C>> + ?Sized),
mut batch_ntt_result: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
test_sizes: usize,
batch_size: usize,
is_inverse: NTTDir,
ordering: Ordering,
config: &mut NTTConfig<C::ScalarField>,
_seed: u32,
) where
C::ScalarField: ArkConvertible,
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
<C::ScalarField as FieldImpl>::Config: NTTDomain<C::ScalarField>,
{
ecntt(points, is_inverse, config, batch_ntt_result).unwrap();
}
static INIT: OnceLock<()> = OnceLock::new();
fn benchmark_ecntt<C: Curve>(c: &mut Criterion)
where
C::ScalarField: ArkConvertible,
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
<C::ScalarField as FieldImpl>::Config: NTTDomain<C::ScalarField>,
{
use criterion::SamplingMode;
use icicle_core::ntt::ntt;
use icicle_core::ntt::tests::init_domain;
use icicle_core::ntt::NTTDomain;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
let group_id = format!("{} EC NTT", $field_prefix);
let mut group = c.benchmark_group(&group_id);
group.sampling_mode(SamplingMode::Flat);
group.sample_size(10);
const MAX_SIZE: u64 = 1 << 18;
const FAST_TWIDDLES_MODE: bool = false;
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
let test_sizes = [1 << 4, 1 << 8];
let batch_sizes = [1, 1 << 4, 128];
for test_size in test_sizes {
for batch_size in batch_sizes {
let points = C::generate_random_projective_points(test_size);
let points = HostSlice::from_slice(&points);
let mut batch_ntt_result = vec![Projective::<C>::zero(); batch_size * test_size];
let batch_ntt_result = HostSlice::from_mut_slice(&mut batch_ntt_result);
let mut config = NTTConfig::default();
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
for ordering in [
Ordering::kNN,
// Ordering::kNR, // times are ~ same as kNN
// Ordering::kRN,
// Ordering::kRR,
// Ordering::kNM, // no mixed radix ecntt
// Ordering::kMN,
] {
config.ordering = ordering;
for alg in [NttAlgorithm::Radix2] {
config.batch_size = batch_size as i32;
config.ntt_algorithm = alg;
let bench_descr = format!(
"{:?} {:?} {:?} {} x {}",
alg, ordering, is_inverse, test_size, batch_size
);
group.bench_function(&bench_descr, |b| {
b.iter(|| {
ecntt_for_bench::<C>(
points,
batch_ntt_result,
test_size,
batch_size,
is_inverse,
ordering,
&mut config,
black_box(1),
)
})
});
}
}
}
}
}
group.finish();
}
criterion_group!(benches, benchmark_ecntt<$curve>);
criterion_main!(benches);
};
}

View File

@@ -0,0 +1,92 @@
#![cfg(feature = "ec_ntt")]
use icicle_cuda_runtime::memory::HostSlice;
use crate::curve::Curve;
use crate::curve::*;
use crate::{
ecntt::*,
ntt::{NTTDir, NttAlgorithm, Ordering},
traits::FieldImpl,
};
use crate::ntt::NTTConfig;
pub fn check_ecntt<C: Curve>()
where
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
{
let test_sizes = [1 << 4, 1 << 9];
for test_size in test_sizes {
let points = C::generate_random_projective_points(test_size);
let slice = &points.clone();
let config: NTTConfig<'_, C::ScalarField> = NTTConfig::default();
let mut out_p = vec![Projective::<C>::zero(); test_size];
let ecntt_result = HostSlice::from_mut_slice(&mut out_p);
let input = HostSlice::from_slice(slice);
ecntt(input, NTTDir::kForward, &config, ecntt_result).unwrap();
assert_ne!(ecntt_result.as_slice(), points);
let mut slice = vec![Projective::<C>::zero(); test_size];
let iecntt_result = HostSlice::from_mut_slice(&mut slice);
ecntt(ecntt_result, NTTDir::kInverse, &config, iecntt_result).unwrap();
assert_eq!(iecntt_result.as_slice(), points);
}
}
pub fn check_ecntt_batch<C: Curve>()
where
<C::ScalarField as FieldImpl>::Config: ECNTT<C>,
{
let test_sizes = [1 << 4, 1 << 9];
let batch_sizes = [1, 1 << 4, 21];
for test_size in test_sizes {
// let coset_generators = [F::one(), F::Config::generate_random(1)[0]];
let mut config: NTTConfig<'_, C::ScalarField> = NTTConfig::default();
for batch_size in batch_sizes {
let slice = &C::generate_random_projective_points(test_size * batch_size);
let points = HostSlice::from_slice(slice);
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
for ordering in [
Ordering::kNN, // ~same performance
// Ordering::kNR,
// Ordering::kRN,
// Ordering::kRR,
// Ordering::kNM, // no mixed radix ecntt
// Ordering::kMN,
] {
config.ordering = ordering;
let mut slice = vec![Projective::zero(); batch_size * test_size];
let batch_ntt_result = HostSlice::from_mut_slice(&mut slice);
for alg in [NttAlgorithm::Radix2] {
config.batch_size = batch_size as i32;
config.ntt_algorithm = alg;
ecntt(points, is_inverse, &config, batch_ntt_result).unwrap();
config.batch_size = 1;
let mut slice = vec![Projective::zero(); test_size];
let one_ntt_result = HostSlice::from_mut_slice(&mut slice);
for i in 0..batch_size {
ecntt(
HostSlice::from_slice(
&points[i * test_size..(i + 1) * test_size]
.as_slice()
.to_vec(),
),
is_inverse,
&config,
one_ntt_result,
)
.unwrap();
assert_eq!(
batch_ntt_result[i * test_size..(i + 1) * test_size].as_slice(),
one_ntt_result.as_slice()
);
}
}
}
}
}
}
}

View File

@@ -3,15 +3,16 @@ use crate::traits::ArkConvertible;
use crate::traits::{FieldConfig, FieldImpl, MontgomeryConvertible};
#[cfg(feature = "arkworks")]
use ark_ff::{BigInteger, Field as ArkField, PrimeField};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::DeviceSlice;
use std::fmt::{Debug, Display};
use std::marker::PhantomData;
#[derive(PartialEq, Copy, Clone)]
#[repr(C)]
pub struct Field<const NUM_LIMBS: usize, F: FieldConfig> {
limbs: [u64; NUM_LIMBS],
limbs: [u32; NUM_LIMBS],
p: PhantomData<F>,
}
@@ -26,7 +27,7 @@ impl<const NUM_LIMBS: usize, F: FieldConfig> Display for Field<NUM_LIMBS, F> {
.iter()
.rev()
{
write!(f, "{:016x}", b)?;
write!(f, "{:08x}", b)?;
}
Ok(())
}
@@ -38,21 +39,21 @@ impl<const NUM_LIMBS: usize, F: FieldConfig> Debug for Field<NUM_LIMBS, F> {
}
}
impl<const NUM_LIMBS: usize, F: FieldConfig> Into<[u64; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
fn into(self) -> [u64; NUM_LIMBS] {
impl<const NUM_LIMBS: usize, F: FieldConfig> Into<[u32; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
fn into(self) -> [u32; NUM_LIMBS] {
self.limbs
}
}
impl<const NUM_LIMBS: usize, F: FieldConfig> From<[u64; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
fn from(limbs: [u64; NUM_LIMBS]) -> Self {
impl<const NUM_LIMBS: usize, F: FieldConfig> From<[u32; NUM_LIMBS]> for Field<NUM_LIMBS, F> {
fn from(limbs: [u32; NUM_LIMBS]) -> Self {
Self { limbs, p: PhantomData }
}
}
impl<const NUM_LIMBS: usize, F: FieldConfig> FieldImpl for Field<NUM_LIMBS, F> {
type Config = F;
type Repr = [u64; NUM_LIMBS];
type Repr = [u32; NUM_LIMBS];
fn to_bytes_le(&self) -> Vec<u8> {
self.limbs
@@ -68,49 +69,50 @@ impl<const NUM_LIMBS: usize, F: FieldConfig> FieldImpl for Field<NUM_LIMBS, F> {
// please note that this function zero-pads if there are not enough bytes
// and only takes the first bytes in there are too many of them
fn from_bytes_le(bytes: &[u8]) -> Self {
let mut limbs: [u64; NUM_LIMBS] = [0; NUM_LIMBS];
let mut limbs: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
for (i, chunk) in bytes
.chunks(8)
.chunks(4)
.take(NUM_LIMBS)
.enumerate()
{
let mut chunk_array: [u8; 8] = [0; 8];
let mut chunk_array: [u8; 4] = [0; 4];
chunk_array[..chunk.len()].clone_from_slice(chunk);
limbs[i] = u64::from_le_bytes(chunk_array);
limbs[i] = u32::from_le_bytes(chunk_array);
}
Self::from(limbs)
}
fn zero() -> Self {
Field {
limbs: [0u64; NUM_LIMBS],
p: PhantomData,
}
FieldImpl::from_u32(0)
}
fn one() -> Self {
let mut limbs = [0u64; NUM_LIMBS];
limbs[0] = 1;
FieldImpl::from_u32(1)
}
fn from_u32(val: u32) -> Self {
let mut limbs = [0u32; NUM_LIMBS];
limbs[0] = val;
Field { limbs, p: PhantomData }
}
}
#[doc(hidden)]
pub trait MontgomeryConvertibleField<F: FieldImpl> {
fn to_mont(values: &mut HostOrDeviceSlice<F>) -> CudaError;
fn from_mont(values: &mut HostOrDeviceSlice<F>) -> CudaError;
pub trait MontgomeryConvertibleField<'a, F: FieldImpl> {
fn to_mont(values: &mut DeviceSlice<F>, ctx: &DeviceContext<'a>) -> CudaError;
fn from_mont(values: &mut DeviceSlice<F>, ctx: &DeviceContext<'a>) -> CudaError;
}
impl<const NUM_LIMBS: usize, F: FieldConfig> MontgomeryConvertible for Field<NUM_LIMBS, F>
impl<'a, const NUM_LIMBS: usize, F: FieldConfig> MontgomeryConvertible<'a> for Field<NUM_LIMBS, F>
where
F: MontgomeryConvertibleField<Self>,
F: MontgomeryConvertibleField<'a, Self>,
{
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
F::to_mont(values)
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
F::to_mont(values, ctx)
}
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError {
F::from_mont(values)
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError {
F::from_mont(values, ctx)
}
}
@@ -168,13 +170,13 @@ macro_rules! impl_scalar_field {
impl_field!($num_limbs, $field_name, $field_cfg, $ark_equiv);
mod $field_prefix_ident {
use crate::curve::{$field_name, CudaError, DeviceContext, HostOrDeviceSlice};
use super::{$field_name, CudaError, DeviceContext, HostOrDeviceSlice};
extern "C" {
#[link_name = concat!($field_prefix, "GenerateScalars")]
#[link_name = concat!($field_prefix, "_generate_scalars")]
pub(crate) fn generate_scalars(scalars: *mut $field_name, size: usize);
#[link_name = concat!($field_prefix, "ScalarConvertMontgomery")]
#[link_name = concat!($field_prefix, "_scalar_convert_montgomery")]
fn _convert_scalars_montgomery(
scalars: *mut $field_name,
size: usize,
@@ -184,17 +186,12 @@ macro_rules! impl_scalar_field {
}
pub(crate) fn convert_scalars_montgomery(
scalars: &mut HostOrDeviceSlice<$field_name>,
scalars: *mut $field_name,
len: usize,
is_into: bool,
ctx: &DeviceContext,
) -> CudaError {
unsafe {
_convert_scalars_montgomery(
scalars.as_mut_ptr(),
scalars.len(),
is_into,
&DeviceContext::default() as *const _ as *const DeviceContext,
)
}
unsafe { _convert_scalars_montgomery(scalars, len, is_into, ctx as *const DeviceContext) }
}
}
@@ -206,13 +203,34 @@ macro_rules! impl_scalar_field {
}
}
impl MontgomeryConvertibleField<$field_name> for $field_cfg {
fn to_mont(values: &mut HostOrDeviceSlice<$field_name>) -> CudaError {
$field_prefix_ident::convert_scalars_montgomery(values, true)
impl<'a> MontgomeryConvertibleField<'a, $field_name> for $field_cfg {
fn to_mont(values: &mut DeviceSlice<$field_name>, ctx: &DeviceContext<'a>) -> CudaError {
check_device(ctx.device_id);
assert_eq!(
values
.device_id()
.unwrap(),
ctx.device_id,
"Device ids are different in slice and context"
);
$field_prefix_ident::convert_scalars_montgomery(unsafe { values.as_mut_ptr() }, values.len(), true, ctx)
}
fn from_mont(values: &mut HostOrDeviceSlice<$field_name>) -> CudaError {
$field_prefix_ident::convert_scalars_montgomery(values, false)
fn from_mont(values: &mut DeviceSlice<$field_name>, ctx: &DeviceContext<'a>) -> CudaError {
check_device(ctx.device_id);
assert_eq!(
values
.device_id()
.unwrap(),
ctx.device_id,
"Device ids are different in slice and context"
);
$field_prefix_ident::convert_scalars_montgomery(
unsafe { values.as_mut_ptr() },
values.len(),
false,
ctx,
)
}
}
};
@@ -227,5 +245,10 @@ macro_rules! impl_field_tests {
fn test_field_convert_montgomery() {
check_field_convert_montgomery::<$field_name>()
}
#[test]
fn test_field_equality() {
check_field_equality::<$field_name>()
}
};
}

View File

@@ -1,10 +1,11 @@
pub mod curve;
pub mod ecntt;
pub mod error;
pub mod field;
pub mod msm;
pub mod ntt;
pub mod polynomials;
pub mod poseidon;
#[cfg(feature = "arkworks")]
#[doc(hidden)]
pub mod tests;
pub mod traits;
@@ -13,6 +14,6 @@ pub mod vec_ops;
pub trait SNARKCurve: curve::Curve + msm::MSM<Self>
where
<Self::ScalarField as traits::FieldImpl>::Config: ntt::NTT<Self::ScalarField>,
<Self::ScalarField as traits::FieldImpl>::Config: ntt::NTT<Self::ScalarField, Self::ScalarField>,
{
}

View File

@@ -1,7 +1,8 @@
use crate::curve::{Affine, Curve, Projective};
use crate::error::IcicleResult;
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::{DeviceContext, DEFAULT_DEVICE_ID};
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
#[cfg(feature = "arkworks")]
#[doc(hidden)]
@@ -91,18 +92,18 @@ impl<'a> MSMConfig<'a> {
#[doc(hidden)]
pub trait MSM<C: Curve> {
fn msm_unchecked(
scalars: &HostOrDeviceSlice<C::ScalarField>,
points: &HostOrDeviceSlice<Affine<C>>,
scalars: &(impl HostOrDeviceSlice<C::ScalarField> + ?Sized),
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
cfg: &MSMConfig,
results: &mut HostOrDeviceSlice<Projective<C>>,
results: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
) -> IcicleResult<()>;
fn precompute_bases_unchecked(
points: &HostOrDeviceSlice<Affine<C>>,
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
precompute_factor: i32,
_c: i32,
ctx: &DeviceContext,
output_bases: &mut HostOrDeviceSlice<Affine<C>>,
output_bases: &mut DeviceSlice<Affine<C>>,
) -> IcicleResult<()>;
}
@@ -122,10 +123,10 @@ pub trait MSM<C: Curve> {
///
/// Returns `Ok(())` if no errors occurred or a `CudaError` otherwise.
pub fn msm<C: Curve + MSM<C>>(
scalars: &HostOrDeviceSlice<C::ScalarField>,
points: &HostOrDeviceSlice<Affine<C>>,
scalars: &(impl HostOrDeviceSlice<C::ScalarField> + ?Sized),
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
cfg: &MSMConfig,
results: &mut HostOrDeviceSlice<Projective<C>>,
results: &mut (impl HostOrDeviceSlice<Projective<C>> + ?Sized),
) -> IcicleResult<()> {
if points.len() % (cfg.precompute_factor as usize) != 0 {
panic!(
@@ -149,6 +150,28 @@ pub fn msm<C: Curve + MSM<C>>(
scalars.len()
);
}
let ctx_device_id = cfg
.ctx
.device_id;
if let Some(device_id) = scalars.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in scalars and context are different"
);
}
if let Some(device_id) = points.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in points and context are different"
);
}
if let Some(device_id) = results.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in results and context are different"
);
}
check_device(ctx_device_id);
let mut local_cfg = cfg.clone();
local_cfg.points_size = points_size as i32;
local_cfg.batch_size = results.len() as i32;
@@ -179,11 +202,11 @@ pub fn msm<C: Curve + MSM<C>>(
///
/// Returns `Ok(())` if no errors occurred or a `CudaError` otherwise.
pub fn precompute_bases<C: Curve + MSM<C>>(
points: &HostOrDeviceSlice<Affine<C>>,
points: &(impl HostOrDeviceSlice<Affine<C>> + ?Sized),
precompute_factor: i32,
_c: i32,
ctx: &DeviceContext,
output_bases: &mut HostOrDeviceSlice<Affine<C>>,
output_bases: &mut DeviceSlice<Affine<C>>,
) -> IcicleResult<()> {
assert_eq!(
output_bases.len(),
@@ -208,7 +231,7 @@ macro_rules! impl_msm {
use super::{$curve, Affine, CudaError, Curve, DeviceContext, MSMConfig, Projective};
extern "C" {
#[link_name = concat!($curve_prefix, "MSMCuda")]
#[link_name = concat!($curve_prefix, "_msm_cuda")]
pub(crate) fn msm_cuda(
scalars: *const <$curve as Curve>::ScalarField,
points: *const Affine<$curve>,
@@ -217,7 +240,7 @@ macro_rules! impl_msm {
out: *mut Projective<$curve>,
) -> CudaError;
#[link_name = concat!($curve_prefix, "PrecomputeMSMBases")]
#[link_name = concat!($curve_prefix, "_precompute_msm_bases_cuda")]
pub(crate) fn precompute_bases_cuda(
points: *const Affine<$curve>,
bases_size: i32,
@@ -232,10 +255,10 @@ macro_rules! impl_msm {
impl MSM<$curve> for $curve {
fn msm_unchecked(
scalars: &HostOrDeviceSlice<<$curve as Curve>::ScalarField>,
points: &HostOrDeviceSlice<Affine<$curve>>,
scalars: &(impl HostOrDeviceSlice<<$curve as Curve>::ScalarField> + ?Sized),
points: &(impl HostOrDeviceSlice<Affine<$curve>> + ?Sized),
cfg: &MSMConfig,
results: &mut HostOrDeviceSlice<Projective<$curve>>,
results: &mut (impl HostOrDeviceSlice<Projective<$curve>> + ?Sized),
) -> IcicleResult<()> {
unsafe {
$curve_prefix_indent::msm_cuda(
@@ -250,11 +273,11 @@ macro_rules! impl_msm {
}
fn precompute_bases_unchecked(
points: &HostOrDeviceSlice<Affine<$curve>>,
points: &(impl HostOrDeviceSlice<Affine<$curve>> + ?Sized),
precompute_factor: i32,
_c: i32,
ctx: &DeviceContext,
output_bases: &mut HostOrDeviceSlice<Affine<$curve>>,
output_bases: &mut DeviceSlice<Affine<$curve>>,
) -> IcicleResult<()> {
unsafe {
$curve_prefix_indent::precompute_bases_cuda(

View File

@@ -2,7 +2,7 @@ use crate::curve::{Affine, Curve, Projective};
use crate::msm::{msm, precompute_bases, MSMConfig, MSM};
use crate::traits::{FieldImpl, GenerateRandom};
use icicle_cuda_runtime::device::{get_device_count, set_device, warmup};
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
use icicle_cuda_runtime::stream::CudaStream;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
@@ -42,7 +42,7 @@ where
set_device(device_id).unwrap();
let test_sizes = [4, 8, 16, 32, 64, 128, 256, 1000, 1 << 18];
let mut msm_results = HostOrDeviceSlice::cuda_malloc(1).unwrap();
let mut msm_results = DeviceVec::<Projective<C>>::cuda_malloc_for_device(1, device_id).unwrap();
for test_size in test_sizes {
let points = generate_random_affine_points_with_zeroes(test_size, 2);
let scalars = <C::ScalarField as FieldImpl>::Config::generate_random(test_size);
@@ -58,10 +58,10 @@ where
// (just beware the possible extra flag in affine point types, can't transmute ark Affine because of that)
let scalars_mont = unsafe { &*(&scalars_ark[..] as *const _ as *const [C::ScalarField]) };
let mut scalars_d = HostOrDeviceSlice::cuda_malloc(test_size).unwrap();
let mut scalars_d = DeviceVec::<C::ScalarField>::cuda_malloc(test_size).unwrap();
let stream = CudaStream::create().unwrap();
scalars_d
.copy_from_host_async(&scalars_mont, &stream)
.copy_from_host_async(HostSlice::from_slice(&scalars_mont), &stream)
.unwrap();
let mut cfg = MSMConfig::default_for_device(device_id);
@@ -69,17 +69,23 @@ where
.stream = &stream;
cfg.is_async = true;
cfg.are_scalars_montgomery_form = true;
msm(&scalars_d, &HostOrDeviceSlice::on_host(points), &cfg, &mut msm_results).unwrap();
msm(
&scalars_d[..],
HostSlice::from_slice(&points),
&cfg,
&mut msm_results[..],
)
.unwrap();
// need to make sure that scalars_d weren't mutated by the previous call
let mut scalars_mont_after = vec![C::ScalarField::zero(); test_size];
scalars_d
.copy_to_host_async(&mut scalars_mont_after, &stream)
.copy_to_host_async(HostSlice::from_mut_slice(&mut scalars_mont_after), &stream)
.unwrap();
assert_eq!(scalars_mont, scalars_mont_after);
let mut msm_host_result = vec![Projective::<C>::zero(); 1];
msm_results
.copy_to_host(&mut msm_host_result[..])
.copy_to_host(HostSlice::from_mut_slice(&mut msm_host_result[..]))
.unwrap();
stream
.synchronize()
@@ -118,10 +124,9 @@ where
for test_size in test_sizes {
let precompute_factor = 8;
let points = generate_random_affine_points_with_zeroes(test_size, 10);
let points_h = HostOrDeviceSlice::on_host(points.clone());
let mut precomputed_points_d = HostOrDeviceSlice::cuda_malloc(precompute_factor * test_size).unwrap();
let mut precomputed_points_d = DeviceVec::cuda_malloc(precompute_factor * test_size).unwrap();
precompute_bases(
&points_h,
HostSlice::from_slice(&points),
precompute_factor as i32,
0,
&cfg.ctx,
@@ -135,27 +140,27 @@ where
.take(batch_size)
.flatten()
.collect();
let scalars_h = HostOrDeviceSlice::on_host(scalars);
let scalars_h = HostSlice::from_slice(&scalars);
let mut msm_results_1 = HostOrDeviceSlice::cuda_malloc(batch_size).unwrap();
let mut msm_results_2 = HostOrDeviceSlice::cuda_malloc(batch_size).unwrap();
let mut points_d = HostOrDeviceSlice::cuda_malloc(test_size * batch_size).unwrap();
let mut msm_results_1 = DeviceVec::<Projective<C>>::cuda_malloc(batch_size).unwrap();
let mut msm_results_2 = DeviceVec::<Projective<C>>::cuda_malloc(batch_size).unwrap();
let mut points_d = DeviceVec::<Affine<C>>::cuda_malloc(test_size * batch_size).unwrap();
points_d
.copy_from_host_async(&points_cloned, &stream)
.copy_from_host_async(HostSlice::from_slice(&points_cloned), &stream)
.unwrap();
cfg.precompute_factor = precompute_factor as i32;
msm(&scalars_h, &precomputed_points_d, &cfg, &mut msm_results_1).unwrap();
msm(scalars_h, &precomputed_points_d[..], &cfg, &mut msm_results_1[..]).unwrap();
cfg.precompute_factor = 1;
msm(&scalars_h, &points_d, &cfg, &mut msm_results_2).unwrap();
msm(scalars_h, &points_d[..], &cfg, &mut msm_results_2[..]).unwrap();
let mut msm_host_result_1 = vec![Projective::<C>::zero(); batch_size];
let mut msm_host_result_2 = vec![Projective::<C>::zero(); batch_size];
msm_results_1
.copy_to_host_async(&mut msm_host_result_1[..], &stream)
.copy_to_host_async(HostSlice::from_mut_slice(&mut msm_host_result_1), &stream)
.unwrap();
msm_results_2
.copy_to_host_async(&mut msm_host_result_2[..], &stream)
.copy_to_host_async(HostSlice::from_mut_slice(&mut msm_host_result_2), &stream)
.unwrap();
stream
.synchronize()
@@ -166,7 +171,6 @@ where
.map(|x| x.to_ark())
.collect();
let scalars_ark: Vec<_> = scalars_h
.as_slice()
.iter()
.map(|x| x.to_ark())
.collect();
@@ -217,17 +221,17 @@ where
.map(|x| x.to_ark())
.collect();
let mut msm_results = HostOrDeviceSlice::on_host(vec![Projective::<C>::zero(); batch_size]);
let mut msm_results = vec![Projective::<C>::zero(); batch_size];
let mut cfg = MSMConfig::default();
if test_size < test_threshold {
cfg.bitsize = 1;
}
msm(
&HostOrDeviceSlice::on_host(scalars),
&HostOrDeviceSlice::on_host(points),
HostSlice::from_slice(&scalars),
HostSlice::from_slice(&points),
&cfg,
&mut msm_results,
HostSlice::from_mut_slice(&mut msm_results),
)
.unwrap();
@@ -238,7 +242,7 @@ where
{
let msm_result_ark: ark_ec::models::short_weierstrass::Projective<C::ArkSWConfig> =
VariableBaseMSM::msm(&points_chunk, &scalars_chunk).unwrap();
assert_eq!(msm_results.as_slice()[i].to_ark(), msm_result_ark);
assert_eq!(msm_results[i].to_ark(), msm_result_ark);
}
}
}

View File

@@ -1,7 +1,8 @@
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::{DeviceContext, DEFAULT_DEVICE_ID};
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use crate::{error::IcicleResult, traits::FieldImpl};
pub use crate::{error::IcicleResult, traits::FieldImpl};
#[cfg(feature = "arkworks")]
#[doc(hidden)]
@@ -71,7 +72,7 @@ pub enum NttAlgorithm {
#[repr(C)]
#[derive(Debug, Clone)]
pub struct NTTConfig<'a, S> {
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
/// Details related to the device such as its id and stream id. See [DeviceContext](DeviceContext).
pub ctx: DeviceContext<'a>,
/// Coset generator. Used to perform coset (i)NTTs. Default value: `S::one()` (corresponding to no coset being used).
pub coset_gen: S,
@@ -79,15 +80,15 @@ pub struct NTTConfig<'a, S> {
pub batch_size: i32,
/// If true the function will compute the NTTs over the columns of the input matrix and not over the rows.
pub columns_batch: bool,
/// Ordering of inputs and outputs. See [Ordering](@ref Ordering). Default value: `Ordering::kNN`.
/// Ordering of inputs and outputs. See [Ordering](Ordering). Default value: `Ordering::kNN`.
pub ordering: Ordering,
are_inputs_on_device: bool,
are_outputs_on_device: bool,
pub are_inputs_on_device: bool,
pub are_outputs_on_device: bool,
/// Whether to run the NTT asynchronously. If set to `true`, the NTT function will be non-blocking and you'd need to synchronize
/// it explicitly by running `stream.synchronize()`. If set to false, the NTT function will block the current CPU thread.
pub is_async: bool,
/// Explicitly select the NTT algorithm. Default value: Auto (the implementation selects radix-2 or mixed-radix algorithm based
/// on heuristics
/// on heuristics).
pub ntt_algorithm: NttAlgorithm,
}
@@ -114,17 +115,25 @@ impl<'a, S: FieldImpl> NTTConfig<'a, S> {
}
#[doc(hidden)]
pub trait NTT<F: FieldImpl> {
pub trait NTTDomain<F: FieldImpl> {
fn get_root_of_unity(max_size: u64) -> F;
fn initialize_domain(primitive_root: F, ctx: &DeviceContext, fast_twiddles: bool) -> IcicleResult<()>;
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()>;
}
#[doc(hidden)]
pub trait NTT<T, F: FieldImpl>: NTTDomain<F> {
fn ntt_unchecked(
input: &HostOrDeviceSlice<F>,
input: &(impl HostOrDeviceSlice<T> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<F>,
output: &mut (impl HostOrDeviceSlice<T> + ?Sized),
) -> IcicleResult<()>;
fn ntt_inplace_unchecked(
inout: &mut (impl HostOrDeviceSlice<T> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<F>,
output: &mut HostOrDeviceSlice<F>,
) -> IcicleResult<()>;
fn ntt_inplace_unchecked(inout: &mut HostOrDeviceSlice<F>, dir: NTTDir, cfg: &NTTConfig<F>) -> IcicleResult<()>;
fn initialize_domain(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>;
fn initialize_domain_fast_twiddles_mode(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>;
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()>;
}
/// Computes the NTT, or a batch of several NTTs.
@@ -138,15 +147,15 @@ pub trait NTT<F: FieldImpl> {
/// * `cfg` - config used to specify extra arguments of the NTT.
///
/// * `output` - buffer to write the NTT outputs into. Must be of the same size as `input`.
pub fn ntt<F>(
input: &HostOrDeviceSlice<F>,
pub fn ntt<T, F>(
input: &(impl HostOrDeviceSlice<T> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<F>,
output: &mut HostOrDeviceSlice<F>,
output: &mut (impl HostOrDeviceSlice<T> + ?Sized),
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: NTT<F>,
<F as FieldImpl>::Config: NTT<T, F>,
{
if input.len() != output.len() {
panic!(
@@ -155,11 +164,27 @@ where
output.len()
);
}
let ctx_device_id = cfg
.ctx
.device_id;
if let Some(device_id) = input.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in input and context are different"
);
}
if let Some(device_id) = output.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in output and context are different"
);
}
check_device(ctx_device_id);
let mut local_cfg = cfg.clone();
local_cfg.are_inputs_on_device = input.is_on_device();
local_cfg.are_outputs_on_device = output.is_on_device();
<<F as FieldImpl>::Config as NTT<F>>::ntt_unchecked(input, dir, &local_cfg, output)
<<F as FieldImpl>::Config as NTT<T, F>>::ntt_unchecked(input, dir, &local_cfg, output)
}
/// Computes the NTT, or a batch of several NTTs inplace.
@@ -171,16 +196,20 @@ where
/// * `dir` - whether to compute forward of inverse NTT.
///
/// * `cfg` - config used to specify extra arguments of the NTT.
pub fn ntt_inplace<F>(inout: &mut HostOrDeviceSlice<F>, dir: NTTDir, cfg: &NTTConfig<F>) -> IcicleResult<()>
pub fn ntt_inplace<T, F>(
inout: &mut (impl HostOrDeviceSlice<T> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<F>,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: NTT<F>,
<F as FieldImpl>::Config: NTT<T, F>,
{
let mut local_cfg = cfg.clone();
local_cfg.are_inputs_on_device = inout.is_on_device();
local_cfg.are_outputs_on_device = inout.is_on_device();
<<F as FieldImpl>::Config as NTT<F>>::ntt_inplace_unchecked(inout, dir, &local_cfg)
<<F as FieldImpl>::Config as NTT<T, F>>::ntt_inplace_unchecked(inout, dir, &local_cfg)
}
/// Generates twiddle factors which will be used to compute NTTs.
@@ -192,71 +221,60 @@ where
/// This function will panic if the order of `primitive_root` is not a power of two.
///
/// * `ctx` - GPU index and stream to perform the computation.
pub fn initialize_domain<F>(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>
pub fn initialize_domain<F>(primitive_root: F, ctx: &DeviceContext, fast_twiddles: bool) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: NTT<F>,
<F as FieldImpl>::Config: NTTDomain<F>,
{
<<F as FieldImpl>::Config as NTT<F>>::initialize_domain(primitive_root, ctx)
}
pub fn initialize_domain_fast_twiddles_mode<F>(primitive_root: F, ctx: &DeviceContext) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: NTT<F>,
{
<<F as FieldImpl>::Config as NTT<F>>::initialize_domain_fast_twiddles_mode(primitive_root, ctx)
<<F as FieldImpl>::Config as NTTDomain<F>>::initialize_domain(primitive_root, ctx, fast_twiddles)
}
pub fn release_domain<F>(ctx: &DeviceContext) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: NTT<F>,
<F as FieldImpl>::Config: NTTDomain<F>,
{
<<F as FieldImpl>::Config as NTT<F>>::release_domain(ctx)
<<F as FieldImpl>::Config as NTTDomain<F>>::release_domain(ctx)
}
pub fn get_root_of_unity<F>(max_size: u64) -> F
where
F: FieldImpl,
<F as FieldImpl>::Config: NTTDomain<F>,
{
<<F as FieldImpl>::Config as NTTDomain<F>>::get_root_of_unity(max_size)
}
#[macro_export]
macro_rules! impl_ntt {
macro_rules! impl_ntt_without_domain {
(
$field_prefix:literal,
$field_prefix_ident:ident,
$field:ident,
$field_config:ident
$domain_field:ident,
$domain_config:ident,
$ntt_type:ident,
$ntt_type_lit:literal,
$inout:ident
) => {
mod $field_prefix_ident {
use crate::ntt::{$field, $field_config, CudaError, DeviceContext, NTTConfig, NTTDir, DEFAULT_DEVICE_ID};
extern "C" {
#[link_name = concat!($field_prefix, "NTTCuda")]
pub(crate) fn ntt_cuda(
input: *const $field,
size: i32,
dir: NTTDir,
config: &NTTConfig<$field>,
output: *mut $field,
) -> CudaError;
#[link_name = concat!($field_prefix, "InitializeDomain")]
pub(crate) fn initialize_ntt_domain(
primitive_root: &$field,
ctx: &DeviceContext,
fast_twiddles_mode: bool,
) -> CudaError;
#[link_name = concat!($field_prefix, "ReleaseDomain")]
pub(crate) fn release_ntt_domain(ctx: &DeviceContext) -> CudaError;
}
extern "C" {
#[link_name = concat!($field_prefix, concat!($ntt_type_lit, "_cuda"))]
fn ntt_cuda(
input: *const $inout,
size: i32,
dir: NTTDir,
config: &NTTConfig<$domain_field>,
output: *mut $inout,
) -> CudaError;
}
impl NTT<$field> for $field_config {
impl $ntt_type<$inout, $domain_field> for $domain_config {
fn ntt_unchecked(
input: &HostOrDeviceSlice<$field>,
input: &(impl HostOrDeviceSlice<$inout> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<$field>,
output: &mut HostOrDeviceSlice<$field>,
cfg: &NTTConfig<$domain_field>,
output: &mut (impl HostOrDeviceSlice<$inout> + ?Sized),
) -> IcicleResult<()> {
unsafe {
$field_prefix_ident::ntt_cuda(
ntt_cuda(
input.as_ptr(),
(input.len() / (cfg.batch_size as usize)) as i32,
dir,
@@ -268,13 +286,13 @@ macro_rules! impl_ntt {
}
fn ntt_inplace_unchecked(
inout: &mut HostOrDeviceSlice<$field>,
inout: &mut (impl HostOrDeviceSlice<$inout> + ?Sized),
dir: NTTDir,
cfg: &NTTConfig<$field>,
cfg: &NTTConfig<$domain_field>,
) -> IcicleResult<()> {
unsafe {
$field_prefix_ident::ntt_cuda(
inout.as_ptr(),
ntt_cuda(
inout.as_mut_ptr(),
(inout.len() / (cfg.batch_size as usize)) as i32,
dir,
cfg,
@@ -283,16 +301,55 @@ macro_rules! impl_ntt {
.wrap()
}
}
}
};
}
fn initialize_domain(primitive_root: $field, ctx: &DeviceContext) -> IcicleResult<()> {
unsafe { $field_prefix_ident::initialize_ntt_domain(&primitive_root, ctx, false).wrap() }
#[macro_export]
macro_rules! impl_ntt {
(
$field_prefix:literal,
$field_prefix_ident:ident,
$field:ident,
$field_config:ident
) => {
mod $field_prefix_ident {
use crate::ntt::*;
extern "C" {
#[link_name = concat!($field_prefix, "_initialize_domain")]
fn initialize_ntt_domain(
primitive_root: &$field,
ctx: &DeviceContext,
fast_twiddles_mode: bool,
) -> CudaError;
#[link_name = concat!($field_prefix, "_release_domain")]
fn release_ntt_domain(ctx: &DeviceContext) -> CudaError;
#[link_name = concat!($field_prefix, "_get_root_of_unity")]
fn get_root_of_unity(max_size: u64) -> $field;
}
fn initialize_domain_fast_twiddles_mode(primitive_root: $field, ctx: &DeviceContext) -> IcicleResult<()> {
unsafe { $field_prefix_ident::initialize_ntt_domain(&primitive_root, ctx, true).wrap() }
}
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()> {
unsafe { $field_prefix_ident::release_ntt_domain(ctx).wrap() }
impl NTTDomain<$field> for $field_config {
fn initialize_domain(
primitive_root: $field,
ctx: &DeviceContext,
fast_twiddles: bool,
) -> IcicleResult<()> {
unsafe { initialize_ntt_domain(&primitive_root, ctx, fast_twiddles).wrap() }
}
fn release_domain(ctx: &DeviceContext) -> IcicleResult<()> {
unsafe { release_ntt_domain(ctx).wrap() }
}
fn get_root_of_unity(max_size: u64) -> $field {
unsafe { get_root_of_unity(max_size) }
}
}
impl_ntt_without_domain!($field_prefix, $field, $field_config, NTT, "_ntt", $field);
}
};
}
@@ -308,40 +365,45 @@ macro_rules! impl_ntt_tests {
const FAST_TWIDDLES_MODE: bool = false;
#[test]
#[parallel]
fn test_ntt() {
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
check_ntt::<$field>()
}
#[test]
#[parallel]
fn test_ntt_coset_from_subgroup() {
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
check_ntt_coset_from_subgroup::<$field>()
}
#[test]
#[parallel]
fn test_ntt_arbitrary_coset() {
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
check_ntt_arbitrary_coset::<$field>()
}
#[test]
#[parallel]
fn test_ntt_batch() {
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
check_ntt_batch::<$field>()
}
#[test]
#[parallel]
fn test_ntt_device_async() {
// init_domain is in this test is performed per-device
check_ntt_device_async::<$field>()
}
#[test]
#[serial]
fn test_ntt_release_domain() {
INIT.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE));
check_release_domain::<$field>();
*RELEASE.get_or_init(move || init_domain::<$field>(MAX_SIZE, DEFAULT_DEVICE_ID, FAST_TWIDDLES_MODE))
check_release_domain::<$field>()
}
};
}

View File

@@ -3,39 +3,33 @@ use ark_poly::{EvaluationDomain, GeneralEvaluationDomain};
use ark_std::{ops::Neg, test_rng, UniformRand};
use icicle_cuda_runtime::device::{get_device_count, set_device};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use crate::error::IcicleResult;
use crate::{
ntt::{
initialize_domain, initialize_domain_fast_twiddles_mode, ntt, ntt_inplace, release_domain, NTTDir,
NttAlgorithm, Ordering,
initialize_domain, ntt, ntt_inplace, release_domain, NTTConfig, NTTDir, NTTDomain, NttAlgorithm, Ordering, NTT,
},
traits::{ArkConvertible, FieldImpl, GenerateRandom},
vec_ops::{transpose_matrix, VecOps},
};
use super::{NTTConfig, NTT};
pub fn init_domain<F: FieldImpl + ArkConvertible>(max_size: u64, device_id: usize, fast_twiddles_mode: bool)
where
F::ArkEquivalent: FftField,
<F as FieldImpl>::Config: NTT<F>,
<F as FieldImpl>::Config: NTTDomain<F>,
{
let ctx = DeviceContext::default_for_device(device_id);
let ark_rou = F::ArkEquivalent::get_root_of_unity(max_size).unwrap();
if fast_twiddles_mode {
initialize_domain_fast_twiddles_mode(F::from_ark(ark_rou), &ctx).unwrap();
} else {
initialize_domain(F::from_ark(ark_rou), &ctx).unwrap();
}
initialize_domain(F::from_ark(ark_rou), &ctx, fast_twiddles_mode).unwrap();
}
pub fn rel_domain<F: FieldImpl>(ctx: &DeviceContext)
pub fn rel_domain<F: FieldImpl>(ctx: &DeviceContext) -> IcicleResult<()>
where
<F as FieldImpl>::Config: NTT<F>,
<F as FieldImpl>::Config: NTTDomain<F>,
{
release_domain::<F>(&ctx).unwrap();
release_domain::<F>(&ctx)
}
pub fn reverse_bit_order(n: u32, order: u32) -> u32 {
@@ -52,14 +46,6 @@ pub fn reverse_bit_order(n: u32, order: u32) -> u32 {
u32::from_str_radix(&reversed, 2).unwrap()
}
pub fn transpose_flattened_matrix<T: Copy>(m: &[T], nrows: usize) -> Vec<T> {
let ncols = m.len() / nrows;
assert!(nrows * ncols == m.len());
(0..m.len())
.map(|i| m[(i % nrows) * ncols + i / nrows])
.collect()
}
pub fn list_to_reverse_bit_order<T: Copy>(l: &[T]) -> Vec<T> {
l.iter()
.enumerate()
@@ -70,7 +56,7 @@ pub fn list_to_reverse_bit_order<T: Copy>(l: &[T]) -> Vec<T> {
pub fn check_ntt<F: FieldImpl + ArkConvertible>()
where
F::ArkEquivalent: FftField,
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
{
let test_sizes = [1 << 4, 1 << 17];
for test_size in test_sizes {
@@ -83,13 +69,14 @@ where
.collect::<Vec<F::ArkEquivalent>>();
// if we simply transmute arkworks types, we'll get scalars in Montgomery format
let scalars_mont = unsafe { &*(&ark_scalars[..] as *const _ as *const [F]) };
let scalars_mont_h = HostOrDeviceSlice::on_host(scalars_mont.to_vec());
let scalars_mont_h = HostSlice::from_slice(&scalars_mont);
let mut config = NTTConfig::default();
let mut config: NTTConfig<'_, F> = NTTConfig::default();
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
config.ntt_algorithm = alg;
let mut ntt_result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
ntt(&scalars_mont_h, NTTDir::kForward, &config, &mut ntt_result).unwrap();
let mut ntt_result = vec![F::zero(); test_size];
let ntt_result = HostSlice::from_mut_slice(&mut ntt_result);
ntt(scalars_mont_h, NTTDir::kForward, &config, ntt_result).unwrap();
assert_ne!(ntt_result.as_slice(), scalars_mont);
let mut ark_ntt_result = ark_scalars.clone();
@@ -100,8 +87,14 @@ where
unsafe { &*(ntt_result.as_slice() as *const _ as *const [<F as ArkConvertible>::ArkEquivalent]) };
assert_eq!(ark_ntt_result, ntt_result_as_ark);
let mut intt_result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
ntt(&ntt_result, NTTDir::kInverse, &config, &mut intt_result).unwrap();
let mut intt_result = vec![F::zero(); test_size];
ntt(
ntt_result,
NTTDir::kInverse,
&config,
HostSlice::from_mut_slice(&mut intt_result),
)
.unwrap();
assert_eq!(intt_result.as_slice(), scalars_mont);
}
@@ -111,7 +104,7 @@ where
pub fn check_ntt_coset_from_subgroup<F: FieldImpl + ArkConvertible>()
where
F::ArkEquivalent: FftField,
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
{
let test_sizes = [1 << 4, 1 << 16];
for test_size in test_sizes {
@@ -124,7 +117,6 @@ where
let ark_large_domain = GeneralEvaluationDomain::<F::ArkEquivalent>::new(test_size).unwrap();
let mut scalars: Vec<F> = F::Config::generate_random(small_size);
let scalars_h = HostOrDeviceSlice::on_host(scalars.clone());
let mut ark_scalars = scalars
.iter()
.map(|v| v.to_ark())
@@ -134,21 +126,29 @@ where
let mut config = NTTConfig::default();
config.ordering = Ordering::kNR;
config.ntt_algorithm = alg;
let mut ntt_result_1 = HostOrDeviceSlice::on_host(vec![F::zero(); small_size]);
let mut ntt_result_2 = HostOrDeviceSlice::on_host(vec![F::zero(); small_size]);
ntt(&scalars_h, NTTDir::kForward, &config, &mut ntt_result_1).unwrap();
let mut ntt_result_1 = vec![F::zero(); small_size];
let mut ntt_result_2 = vec![F::zero(); small_size];
let ntt_result_2 = HostSlice::from_mut_slice(&mut ntt_result_2);
let scalars_h = HostSlice::from_slice(&scalars[..small_size]);
ntt(
scalars_h,
NTTDir::kForward,
&config,
HostSlice::from_mut_slice(&mut ntt_result_1),
)
.unwrap();
assert_ne!(*ntt_result_1.as_slice(), scalars);
config.coset_gen = F::from_ark(test_size_rou);
ntt(&scalars_h, NTTDir::kForward, &config, &mut ntt_result_2).unwrap();
let mut ntt_large_result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
ntt(scalars_h, NTTDir::kForward, &config, ntt_result_2).unwrap();
let mut ntt_large_result = vec![F::zero(); test_size];
// back to non-coset NTT
config.coset_gen = F::one();
scalars.resize(test_size, F::zero());
ntt(
&HostOrDeviceSlice::on_host(scalars.clone()),
HostSlice::from_slice(&scalars),
NTTDir::kForward,
&config,
&mut ntt_large_result,
HostSlice::from_mut_slice(&mut ntt_large_result),
)
.unwrap();
assert_eq!(*ntt_result_1.as_slice(), ntt_large_result.as_slice()[..small_size]);
@@ -170,13 +170,18 @@ where
config.coset_gen = F::from_ark(test_size_rou);
config.ordering = Ordering::kRN;
let mut intt_result = HostOrDeviceSlice::on_host(vec![F::zero(); small_size]);
ntt(&ntt_result_2, NTTDir::kInverse, &config, &mut intt_result).unwrap();
let mut intt_result = vec![F::zero(); small_size];
ntt(
ntt_result_2,
NTTDir::kInverse,
&config,
HostSlice::from_mut_slice(&mut intt_result),
)
.unwrap();
assert_eq!(*intt_result.as_slice(), scalars[..small_size]);
ark_small_domain.ifft_in_place(&mut ark_scalars);
let intt_result_as_ark = intt_result
.as_slice()
.iter()
.map(|p| p.to_ark())
.collect::<Vec<F::ArkEquivalent>>();
@@ -188,7 +193,7 @@ where
pub fn check_ntt_arbitrary_coset<F: FieldImpl + ArkConvertible>()
where
F::ArkEquivalent: FftField + ArkField,
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
{
let mut seed = test_rng();
let test_sizes = [1 << 4, 1 << 17];
@@ -204,10 +209,10 @@ where
.get_coset(coset_gen)
.unwrap();
let mut scalars = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
let mut scalars = F::Config::generate_random(test_size);
let scalars = HostSlice::from_mut_slice(&mut scalars);
// here you can see how arkworks type can be easily created without any purpose-built conversions
let mut ark_scalars = scalars
.as_slice()
.iter()
.map(|v| F::ArkEquivalent::from_random_bytes(&v.to_bytes_le()).unwrap())
.collect::<Vec<F::ArkEquivalent>>();
@@ -217,7 +222,7 @@ where
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
config.ordering = Ordering::kNR;
config.ntt_algorithm = alg;
ntt_inplace(&mut scalars, NTTDir::kForward, &config).unwrap();
ntt_inplace(scalars, NTTDir::kForward, &config).unwrap();
let ark_scalars_copy = ark_scalars.clone();
ark_domain.fft_in_place(&mut ark_scalars);
@@ -231,9 +236,8 @@ where
assert_eq!(ark_scalars, ark_scalars_copy);
config.ordering = Ordering::kRN;
ntt_inplace(&mut scalars, NTTDir::kInverse, &config).unwrap();
ntt_inplace(scalars, NTTDir::kInverse, &config).unwrap();
let ntt_result_as_ark = scalars
.as_slice()
.iter()
.map(|p| p.to_ark())
.collect::<Vec<F::ArkEquivalent>>();
@@ -245,7 +249,7 @@ where
pub fn check_ntt_batch<F: FieldImpl>()
where
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
<F as FieldImpl>::Config: VecOps<F>,
{
let test_sizes = [1 << 4, 1 << 12];
@@ -254,7 +258,8 @@ where
let coset_generators = [F::one(), F::Config::generate_random(1)[0]];
let mut config = NTTConfig::default();
for batch_size in batch_sizes {
let scalars = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size * batch_size));
let scalars = F::Config::generate_random(test_size * batch_size);
let scalars = HostSlice::from_slice(&scalars);
for coset_gen in coset_generators {
for is_inverse in [NTTDir::kInverse, NTTDir::kForward] {
@@ -268,19 +273,25 @@ where
] {
config.coset_gen = coset_gen;
config.ordering = ordering;
let mut batch_ntt_result = HostOrDeviceSlice::on_host(vec![F::zero(); batch_size * test_size]);
let mut batch_ntt_result = vec![F::zero(); batch_size * test_size];
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
config.batch_size = batch_size as i32;
config.ntt_algorithm = alg;
ntt(&scalars, is_inverse, &config, &mut batch_ntt_result).unwrap();
ntt(
scalars,
is_inverse,
&config,
HostSlice::from_mut_slice(&mut batch_ntt_result),
)
.unwrap();
config.batch_size = 1;
let mut one_ntt_result = HostOrDeviceSlice::on_host(vec![F::one(); test_size]);
let mut one_ntt_result = vec![F::one(); test_size];
for i in 0..batch_size {
ntt(
&HostOrDeviceSlice::on_host(scalars[i * test_size..(i + 1) * test_size].to_vec()),
&scalars[i * test_size..(i + 1) * test_size],
is_inverse,
&config,
&mut one_ntt_result,
HostSlice::from_mut_slice(&mut one_ntt_result),
)
.unwrap();
assert_eq!(
@@ -297,25 +308,30 @@ where
// for now, columns batching only works with MixedRadix NTT
config.batch_size = batch_size as i32;
config.columns_batch = true;
let mut transposed_input = HostOrDeviceSlice::on_host(vec![F::zero(); batch_size * test_size]);
let mut transposed_input = vec![F::zero(); batch_size * test_size];
transpose_matrix(
&scalars,
scalars,
row_size,
column_size,
&mut transposed_input,
HostSlice::from_mut_slice(&mut transposed_input),
&config.ctx,
on_device,
is_async,
)
.unwrap();
let mut col_batch_ntt_result =
HostOrDeviceSlice::on_host(vec![F::zero(); batch_size * test_size]);
ntt(&transposed_input, is_inverse, &config, &mut col_batch_ntt_result).unwrap();
let mut col_batch_ntt_result = vec![F::zero(); batch_size * test_size];
ntt(
HostSlice::from_slice(&transposed_input),
is_inverse,
&config,
HostSlice::from_mut_slice(&mut col_batch_ntt_result),
)
.unwrap();
transpose_matrix(
&col_batch_ntt_result,
HostSlice::from_slice(&col_batch_ntt_result),
column_size,
row_size,
&mut transposed_input,
HostSlice::from_mut_slice(&mut transposed_input),
&config.ctx,
on_device,
is_async,
@@ -333,7 +349,7 @@ where
pub fn check_ntt_device_async<F: FieldImpl + ArkConvertible>()
where
F::ArkEquivalent: FftField,
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: NTT<F, F> + GenerateRandom<F>,
{
let device_count = get_device_count().unwrap();
@@ -352,14 +368,14 @@ where
.ctx
.stream;
for batch_size in batch_sizes {
let scalars_h: Vec<F> = F::Config::generate_random(test_size * batch_size);
let sum_of_coeffs: F::ArkEquivalent = scalars_h[..test_size]
let scalars: Vec<F> = F::Config::generate_random(test_size * batch_size);
let sum_of_coeffs: F::ArkEquivalent = scalars[..test_size]
.iter()
.map(|x| x.to_ark())
.sum();
let mut scalars_d = HostOrDeviceSlice::cuda_malloc(test_size * batch_size).unwrap();
let mut scalars_d = DeviceVec::<F>::cuda_malloc(test_size * batch_size).unwrap();
scalars_d
.copy_from_host(&scalars_h)
.copy_from_host(HostSlice::from_slice(&scalars))
.unwrap();
for coset_gen in coset_generators {
@@ -374,21 +390,22 @@ where
for alg in [NttAlgorithm::Radix2, NttAlgorithm::MixedRadix] {
config.ntt_algorithm = alg;
let mut ntt_result_h = vec![F::zero(); test_size * batch_size];
ntt_inplace(&mut scalars_d, NTTDir::kForward, &config).unwrap();
let mut ntt_result_slice = HostSlice::from_mut_slice(&mut ntt_result_h);
ntt_inplace(&mut *scalars_d, NTTDir::kForward, &config).unwrap();
if coset_gen == F::one() {
scalars_d
.copy_to_host(&mut ntt_result_h)
.copy_to_host(ntt_result_slice)
.unwrap();
assert_eq!(sum_of_coeffs, ntt_result_h[0].to_ark());
assert_eq!(sum_of_coeffs, ntt_result_slice[0].to_ark());
}
ntt_inplace(&mut scalars_d, NTTDir::kInverse, &config).unwrap();
ntt_inplace(&mut *scalars_d, NTTDir::kInverse, &config).unwrap();
scalars_d
.copy_to_host_async(&mut ntt_result_h, &stream)
.copy_to_host_async(&mut ntt_result_slice, &stream)
.unwrap();
stream
.synchronize()
.unwrap();
assert_eq!(scalars_h, ntt_result_h);
assert_eq!(scalars, *ntt_result_h.as_slice());
}
}
}
@@ -397,11 +414,11 @@ where
});
}
pub fn check_release_domain<F: FieldImpl + ArkConvertible>()
pub fn check_release_domain<F: FieldImpl>()
where
F::ArkEquivalent: FftField,
<F as FieldImpl>::Config: NTT<F> + GenerateRandom<F>,
<F as FieldImpl>::Config: NTTDomain<F>,
{
let config: NTTConfig<'static, F> = NTTConfig::default();
rel_domain::<F>(&config.ctx);
let err = rel_domain::<F>(&config.ctx);
assert!(err.is_ok())
}

View File

@@ -0,0 +1,814 @@
use crate::traits::{FieldConfig, FieldImpl};
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
pub trait UnivariatePolynomial
where
Self::Field: FieldImpl,
Self::FieldConfig: FieldConfig,
{
type Field;
type FieldConfig;
fn from_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(coeffs: &S, size: usize) -> Self;
fn from_rou_evals<S: HostOrDeviceSlice<Self::Field> + ?Sized>(evals: &S, size: usize) -> Self;
fn divide(&self, denominator: &Self) -> (Self, Self)
where
Self: Sized;
fn div_by_vanishing(&self, degree: u64) -> Self;
fn add_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64);
fn sub_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64);
fn slice(&self, offset: u64, stride: u64, size: u64) -> Self;
fn even(&self) -> Self;
fn odd(&self) -> Self;
fn eval(&self, x: &Self::Field) -> Self::Field;
fn degree(&self) -> i64;
fn eval_on_domain<D: HostOrDeviceSlice<Self::Field> + ?Sized, E: HostOrDeviceSlice<Self::Field> + ?Sized>(
&self,
domain: &D,
evals: &mut E,
);
fn get_nof_coeffs(&self) -> u64;
fn get_coeff(&self, idx: u64) -> Self::Field;
fn copy_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(&self, start_idx: u64, coeffs: &mut S);
}
#[macro_export]
macro_rules! impl_univariate_polynomial_api {
(
$field_prefix:literal,
$field_prefix_ident:ident,
$field:ident,
$field_cfg:ident
) => {
use icicle_core::{polynomials::UnivariatePolynomial, traits::FieldImpl};
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
use std::{
clone, cmp,
ffi::c_void,
ops::{Add, AddAssign, Div, Mul, Rem, Sub},
ptr, slice,
};
type PolynomialHandle = *const c_void;
extern "C" {
#[link_name = concat!($field_prefix, "_polynomial_init_cuda_backend")]
fn init_cuda_backend() -> bool;
#[link_name = concat!($field_prefix, "_polynomial_create_from_coefficients")]
fn create_from_coeffs(coeffs: *const $field, size: usize) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_create_from_rou_evaluations")]
fn create_from_rou_evals(coeffs: *const $field, size: usize) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_clone")]
fn clone(p: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_delete")]
fn delete(ptr: PolynomialHandle);
#[link_name = concat!($field_prefix, "_polynomial_print")]
fn print(ptr: PolynomialHandle);
#[link_name = concat!($field_prefix, "_polynomial_add")]
fn add(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_add_inplace")]
fn add_inplace(a: PolynomialHandle, b: PolynomialHandle) -> c_void;
#[link_name = concat!($field_prefix, "_polynomial_subtract")]
fn subtract(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_multiply")]
fn multiply(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_multiply_by_scalar")]
fn multiply_by_scalar(a: PolynomialHandle, b: &$field) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_quotient")]
fn quotient(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_remainder")]
fn remainder(a: PolynomialHandle, b: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_division")]
fn divide(a: PolynomialHandle, b: PolynomialHandle, q: *mut PolynomialHandle, r: *mut PolynomialHandle);
#[link_name = concat!($field_prefix, "_polynomial_divide_by_vanishing")]
fn div_by_vanishing(a: PolynomialHandle, deg: u64) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_add_monomial_inplace")]
fn add_monomial_inplace(a: PolynomialHandle, monomial_coeff: &$field, monomial: u64) -> c_void;
#[link_name = concat!($field_prefix, "_polynomial_sub_monomial_inplace")]
fn sub_monomial_inplace(a: PolynomialHandle, monomial_coeff: &$field, monomial: u64) -> c_void;
#[link_name = concat!($field_prefix, "_polynomial_slice")]
fn slice(a: PolynomialHandle, offset: u64, stride: u64, size: u64) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_even")]
fn even(a: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_odd")]
fn odd(a: PolynomialHandle) -> PolynomialHandle;
#[link_name = concat!($field_prefix, "_polynomial_evaluate_on_domain")]
fn eval_on_domain(a: PolynomialHandle, domain: *const $field, domain_size: u64, evals: *mut $field);
#[link_name = concat!($field_prefix, "_polynomial_degree")]
fn degree(a: PolynomialHandle) -> i64;
#[link_name = concat!($field_prefix, "_polynomial_copy_coeffs_range")]
fn copy_coeffs(a: PolynomialHandle, host_coeffs: *mut $field, start_idx: u64, end_idx: u64) -> u64;
#[link_name = concat!($field_prefix, "_polynomial_get_coeffs_raw_ptr")]
fn get_coeffs_ptr(a: PolynomialHandle, len: *mut u64, device_id: *mut u64) -> *mut $field;
}
pub struct DensePolynomial {
handle: PolynomialHandle,
}
impl DensePolynomial {
pub fn init_cuda_backend() -> bool {
unsafe { init_cuda_backend() }
}
// TODO Yuval: implement Display trait
pub fn print(&self) {
unsafe {
print(self.handle);
}
}
pub fn coeffs_mut_slice(&mut self) -> &mut DeviceSlice<$field> {
unsafe {
let mut len: u64 = 0;
let mut device_id: u64 = 0;
let mut coeffs_mut = get_coeffs_ptr(self.handle, &mut len, &mut device_id);
let s = slice::from_raw_parts_mut(coeffs_mut, len as usize);
DeviceSlice::from_mut_slice(s)
}
}
}
impl UnivariatePolynomial for DensePolynomial {
type Field = $field;
type FieldConfig = $field_cfg;
fn from_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(coeffs: &S, size: usize) -> Self {
unsafe {
DensePolynomial {
handle: create_from_coeffs(coeffs.as_ptr(), size),
}
}
}
fn from_rou_evals<S: HostOrDeviceSlice<Self::Field> + ?Sized>(evals: &S, size: usize) -> Self {
unsafe {
Self {
handle: create_from_rou_evals(evals.as_ptr(), size),
}
}
}
fn divide(&self, denominator: &Self) -> (Self, Self) {
let mut q_handle: PolynomialHandle = std::ptr::null_mut();
let mut r_handle: PolynomialHandle = std::ptr::null_mut();
unsafe {
divide(self.handle, denominator.handle, &mut q_handle, &mut r_handle);
}
(Self { handle: q_handle }, Self { handle: r_handle })
}
fn div_by_vanishing(&self, degree: u64) -> Self {
unsafe {
Self {
handle: div_by_vanishing(self.handle, degree),
}
}
}
fn add_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64) {
unsafe {
add_monomial_inplace(self.handle, monomial_coeff, monomial);
}
}
fn sub_monomial_inplace(&mut self, monomial_coeff: &Self::Field, monomial: u64) {
unsafe {
sub_monomial_inplace(self.handle, monomial_coeff, monomial);
}
}
fn slice(&self, offset: u64, stride: u64, size: u64) -> Self {
unsafe {
Self {
handle: slice(self.handle, offset, stride, size),
}
}
}
fn even(&self) -> Self {
unsafe {
Self {
handle: even(self.handle),
}
}
}
fn odd(&self) -> Self {
unsafe {
Self {
handle: odd(self.handle),
}
}
}
fn eval(&self, x: &Self::Field) -> Self::Field {
let mut eval = Self::Field::zero();
unsafe {
eval_on_domain(self.handle, x, 1, &mut eval);
}
eval
}
fn eval_on_domain<
D: HostOrDeviceSlice<Self::Field> + ?Sized,
E: HostOrDeviceSlice<Self::Field> + ?Sized,
>(
&self,
domain: &D,
evals: &mut E,
) {
assert!(
domain.len() <= evals.len(),
"eval_on_domain(): eval size must not be smaller then domain"
);
unsafe {
eval_on_domain(
self.handle,
domain.as_ptr(),
domain.len() as u64,
evals.as_mut_ptr(),
);
}
}
fn get_nof_coeffs(&self) -> u64 {
unsafe {
// returns total #coeffs. Not copying when null
let nof_coeffs = copy_coeffs(self.handle, std::ptr::null_mut(), 0, 0);
nof_coeffs
}
}
fn get_coeff(&self, idx: u64) -> Self::Field {
let mut coeff: Self::Field = Self::Field::zero();
unsafe { copy_coeffs(self.handle, &mut coeff, idx, idx) };
coeff
}
fn copy_coeffs<S: HostOrDeviceSlice<Self::Field> + ?Sized>(&self, start_idx: u64, coeffs: &mut S) {
let coeffs_len = coeffs.len() as u64;
let nof_coeffs = self.get_nof_coeffs();
let end_idx = cmp::min(nof_coeffs, start_idx + coeffs_len - 1);
unsafe {
copy_coeffs(self.handle, coeffs.as_mut_ptr(), start_idx, end_idx);
}
}
fn degree(&self) -> i64 {
unsafe { degree(self.handle) }
}
}
impl Drop for DensePolynomial {
fn drop(&mut self) {
unsafe {
delete(self.handle);
}
}
}
impl Clone for DensePolynomial {
fn clone(&self) -> Self {
unsafe {
DensePolynomial {
handle: clone(self.handle),
}
}
}
}
impl Add for &DensePolynomial {
type Output = DensePolynomial;
fn add(self: Self, rhs: Self) -> Self::Output {
unsafe {
DensePolynomial {
handle: add(self.handle, rhs.handle),
}
}
}
}
impl AddAssign<&DensePolynomial> for DensePolynomial {
fn add_assign(&mut self, other: &DensePolynomial) {
unsafe { add_inplace(self.handle, other.handle) };
}
}
impl Sub for &DensePolynomial {
type Output = DensePolynomial;
fn sub(self: Self, rhs: Self) -> Self::Output {
unsafe {
DensePolynomial {
handle: subtract(self.handle, rhs.handle),
}
}
}
}
impl Mul for &DensePolynomial {
type Output = DensePolynomial;
fn mul(self: Self, rhs: Self) -> Self::Output {
unsafe {
DensePolynomial {
handle: multiply(self.handle, rhs.handle),
}
}
}
}
// poly * scalar
impl Mul<&$field> for &DensePolynomial {
type Output = DensePolynomial;
fn mul(self: Self, rhs: &$field) -> Self::Output {
unsafe {
DensePolynomial {
handle: multiply_by_scalar(self.handle, rhs),
}
}
}
}
// scalar * poly
impl Mul<&DensePolynomial> for &$field {
type Output = DensePolynomial;
fn mul(self, rhs: &DensePolynomial) -> Self::Output {
unsafe {
DensePolynomial {
handle: multiply_by_scalar(rhs.handle, self),
}
}
}
}
impl Div for &DensePolynomial {
type Output = DensePolynomial;
fn div(self: Self, rhs: Self) -> Self::Output {
unsafe {
DensePolynomial {
handle: quotient(self.handle, rhs.handle),
}
}
}
}
impl Rem for &DensePolynomial {
type Output = DensePolynomial;
fn rem(self: Self, rhs: Self) -> Self::Output {
unsafe {
DensePolynomial {
handle: remainder(self.handle, rhs.handle),
}
}
}
}
};
}
#[macro_export]
macro_rules! impl_polynomial_tests {
(
$field_prefix_ident:ident,
$field:ident
) => {
use super::*;
use icicle_core::ntt::{get_root_of_unity, initialize_domain, release_domain, NTTDomain};
use icicle_core::vec_ops::{add_scalars, mul_scalars, sub_scalars, VecOps, VecOpsConfig};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::memory::{DeviceVec, HostSlice};
use std::sync::Once;
use icicle_core::traits::{FieldImpl, GenerateRandom};
type Poly = DensePolynomial;
fn init_domain<F: FieldImpl>(max_size: u64, ctx: &DeviceContext, fast_twiddles_mode: bool)
where
<F as FieldImpl>::Config: NTTDomain<F>,
{
let rou: F = get_root_of_unity(max_size);
initialize_domain(rou, &ctx, fast_twiddles_mode).unwrap();
}
fn rel_domain<F: FieldImpl>(ctx: &DeviceContext)
where
<F as FieldImpl>::Config: NTTDomain<F>,
{
release_domain::<F>(&ctx).unwrap()
}
fn randomize_coeffs<F: FieldImpl>(size: usize) -> Vec<F>
where
<F as FieldImpl>::Config: GenerateRandom<F>,
{
let coeffs = F::Config::generate_random(size);
coeffs
}
fn rand() -> $field {
let r = randomize_coeffs::<$field>(1);
// let coeffs = $field::Config::generate_random(1);
r[0]
}
// Note: implementing field arithmetic (+,-,*) for fields via vec_ops since they are not implemented on host
fn add(a: &$field, b: &$field) -> $field {
let a = [a.clone()];
let b = [b.clone()];
let mut result = [$field::zero()];
let cfg = VecOpsConfig::default();
add_scalars(
HostSlice::from_slice(&a),
HostSlice::from_slice(&b),
HostSlice::from_mut_slice(&mut result),
&cfg,
)
.unwrap();
result[0]
}
fn sub(a: &$field, b: &$field) -> $field {
let a = [a.clone()];
let b = [b.clone()];
let mut result = [$field::zero()];
let cfg = VecOpsConfig::default();
sub_scalars(
HostSlice::from_slice(&a),
HostSlice::from_slice(&b),
HostSlice::from_mut_slice(&mut result),
&cfg,
)
.unwrap();
result[0]
}
fn mul(a: &$field, b: &$field) -> $field {
let a = [a.clone()];
let b = [b.clone()];
let mut result = [$field::zero()];
let cfg = VecOpsConfig::default();
mul_scalars(
HostSlice::from_slice(&a),
HostSlice::from_slice(&b),
HostSlice::from_mut_slice(&mut result),
&cfg,
)
.unwrap();
result[0]
}
fn randomize_poly(size: usize) -> Poly {
let coeffs = randomize_coeffs::<$field>(size);
let p = Poly::from_coeffs(HostSlice::from_slice(&coeffs), size);
p
}
static INIT: Once = Once::new();
pub fn setup() -> () {
INIT.call_once(|| {
let device_id: usize = 0;
// using logn=20 since babybear NTT tests is using it and I don't want order of tests to be a problem
let domain_max_size: u64 = 1 << 20; //
// TODO Yuval: how to consolidate this with NTT tests and avoid releaseDomain being called too early???
let ctx = DeviceContext::default_for_device(device_id);
init_domain::<ScalarField>(domain_max_size, &ctx, false /*=fast twiddle */);
Poly::init_cuda_backend();
});
}
// Note: tests are marked with #[ignore] since they conflict with NTT tests domain. This is a (hopefully temporary) workaround.
// The poly tests are executed via 'cargo test -- --ignored' as an additional step
#[test]
#[ignore]
fn test_poly_eval() {
setup();
// testing correct evaluation of f(8) for f(x)=4x^2+2x+5
let coeffs = [$field::from_u32(5), $field::from_u32(2), $field::from_u32(4)];
let f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), coeffs.len());
let x = $field::from_u32(8);
let f_x = f.eval(&x);
assert_eq!(f_x, $field::from_u32(277));
}
#[test]
#[ignore]
fn test_poly_clone() {
setup();
// testing that the clone g(x) is independent of f(x) and cloned correctly
let mut f = randomize_poly(8);
let x = rand();
let fx = f.eval(&x);
let g = f.clone();
f += &g;
let gx = g.eval(&x);
let new_fx = f.eval(&x);
assert_eq!(fx, gx); // cloned correctly
assert_eq!(add(&fx, &gx), new_fx);
}
#[test]
#[ignore]
fn test_poly_add_sub_mul() {
setup();
// testing add/sub operations
let size = 1 << 10;
let mut f = randomize_poly(size);
let mut g = randomize_poly(size);
let x = rand();
let fx = f.eval(&x);
let gx = g.eval(&x);
let poly_add = &f + &g;
let poly_sub = &f - &g;
let poly_mul = &f * &g;
assert_eq!(poly_add.eval(&x), add(&fx, &gx));
assert_eq!(poly_sub.eval(&x), sub(&fx, &gx));
assert_eq!(poly_mul.eval(&x), mul(&fx, &gx));
// test scalar multiplication
let s1 = rand();
let s2 = rand();
let poly_mul_s1 = &f * &s1;
let poly_mul_s2 = &s2 * &f;
assert_eq!(poly_mul_s1.eval(&x), mul(&fx, &s1));
assert_eq!(poly_mul_s2.eval(&x), mul(&fx, &s2));
// test inplace add
f += &g;
assert_eq!(f.eval(&x), add(&fx, &gx));
}
#[test]
#[ignore]
fn test_poly_monomials() {
setup();
// testing add/sub monomials inplace
let zero = $field::from_u32(0);
let one = $field::from_u32(1);
let two = $field::from_u32(2);
let three = $field::from_u32(3);
// f(x) = 1+2x^2
let coeffs = [one, zero, two];
let mut f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), coeffs.len());
let x = rand();
let fx = f.eval(&x);
f.add_monomial_inplace(&three, 1); // +3x
let fx_add = f.eval(&x);
assert_eq!(fx_add, add(&fx, &mul(&three, &x)));
f.sub_monomial_inplace(&one, 0); // -1
let fx_sub = f.eval(&x);
assert_eq!(fx_sub, sub(&fx_add, &one));
}
#[test]
#[ignore]
fn test_poly_read_coeffs() {
setup();
let zero = $field::from_u32(0);
let one = $field::from_u32(1);
let two = $field::from_u32(2);
let three = $field::from_u32(3);
let four = $field::from_u32(4);
let coeffs = [one, two, three, four];
let mut f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), coeffs.len());
// read coeffs to host memory
let mut host_mem = vec![$field::zero(); coeffs.len()];
f.copy_coeffs(0, HostSlice::from_mut_slice(&mut host_mem));
assert_eq!(host_mem, coeffs);
// read coeffs to device memory
let mut device_mem = DeviceVec::<$field>::cuda_malloc(coeffs.len()).unwrap();
f.copy_coeffs(0, &mut device_mem[..]);
let mut host_coeffs_from_dev = vec![ScalarField::zero(); coeffs.len() as usize];
device_mem
.copy_to_host(HostSlice::from_mut_slice(&mut host_coeffs_from_dev))
.unwrap();
assert_eq!(host_mem, host_coeffs_from_dev);
// multiply by two and read single coeff
f = &f * &two;
// read single coeff
let x_squared_coeff = f.get_coeff(2);
assert_eq!(x_squared_coeff, mul(&two, &three));
}
#[test]
#[ignore]
fn test_poly_division() {
setup();
// divide f(x)/g(x), compute q(x), r(x) and check f(x)=q(x)*g(x)+r(x)
let f = randomize_poly(1 << 12);
let g = randomize_poly(1 << 4);
let (q, r) = f.divide(&g);
let f_reconstructed = &(&q * &g) + &r;
let x = rand();
assert_eq!(f.eval(&x), f_reconstructed.eval(&x));
}
#[test]
#[ignore]
fn test_poly_divide_by_vanishing() {
setup();
let zero = $field::from_u32(0);
let one = $field::from_u32(1);
let minus_one = sub(&zero, &one);
// compute random f(x) and compute f(x)*v(x) for v(x) vanishing poly
// divide by vanishing and check that f(x) is reconstructed
let f = randomize_poly(1 << 12);
let v_coeffs = [minus_one, zero, zero, zero, one]; // x^4-1
let v = Poly::from_coeffs(HostSlice::from_slice(&v_coeffs), v_coeffs.len());
let fv = &f * &v;
let deg_f = f.degree();
let deg_fv = fv.degree();
assert_eq!(deg_f + 4, deg_fv);
let f_reconstructed = fv.div_by_vanishing(4);
assert_eq!(deg_f, f_reconstructed.degree());
let x = rand();
assert_eq!(f.eval(&x), f_reconstructed.eval(&x));
}
#[test]
#[ignore]
fn test_poly_eval_on_domain() {
setup();
let one = $field::from_u32(1);
let two = $field::from_u32(2);
let three = $field::from_u32(3);
let f = randomize_poly(1 << 12);
let domain = [one, two, three];
// evaluate to host memory
let mut host_evals = vec![ScalarField::zero(); domain.len()];
f.eval_on_domain(
HostSlice::from_slice(&domain),
HostSlice::from_mut_slice(&mut host_evals),
);
// check eval on domain agrees with eval() method
assert_eq!(f.eval(&one), host_evals[0]);
assert_eq!(f.eval(&two), host_evals[1]);
assert_eq!(f.eval(&three), host_evals[2]);
// evaluate to device memory
let mut device_evals = DeviceVec::<ScalarField>::cuda_malloc(domain.len()).unwrap();
f.eval_on_domain(HostSlice::from_slice(&domain), &mut device_evals[..]);
let mut host_evals_from_device = vec![ScalarField::zero(); domain.len()];
device_evals
.copy_to_host(HostSlice::from_mut_slice(&mut host_evals_from_device))
.unwrap();
// check that evaluation to device memory is equivalent
assert_eq!(host_evals, host_evals_from_device);
// use evals as domain (on device) and evaluate from device to host
f.eval_on_domain(&mut device_evals[..], HostSlice::from_mut_slice(&mut host_evals));
// check that the evaluations are correct
assert_eq!(f.eval(&host_evals_from_device[0]), host_evals[0]);
assert_eq!(f.eval(&host_evals_from_device[1]), host_evals[1]);
assert_eq!(f.eval(&host_evals_from_device[2]), host_evals[2]);
}
#[test]
#[ignore]
fn test_odd_even_slicing() {
setup();
let size = (1 << 10) - 3;
// slicing even and odd parts and checking
let f = randomize_poly(size);
let x = rand();
let even = f.even();
let odd = f.odd();
assert_eq!(f.degree(), even.degree() + odd.degree() + 1);
// computing even(x) and odd(x) directly
let expected_even = (0..=f.degree())
.filter(|&i| i % 2 == 0)
.rev()
.fold($field::zero(), |acc, i| {
add(&mul(&acc, &x), &f.get_coeff(i as u64))
});
let expected_odd = (0..=f.degree())
.filter(|&i| i % 2 != 0)
.rev()
.fold($field::zero(), |acc, i| {
add(&mul(&acc, &x), &f.get_coeff(i as u64))
});
// check that even(x) and odd(x) compute correctly
let evenx = even.eval(&x);
let oddx = odd.eval(&x);
assert_eq!(expected_even, evenx);
assert_eq!(expected_odd, oddx);
}
use icicle_core::ntt::{ntt, ntt_inplace, NTTConfig, NTTDir, Ordering};
#[test]
#[ignore]
fn test_coeffs_slice() {
setup();
let size = 4;
let coeffs = randomize_coeffs::<$field>(size);
let mut f = Poly::from_coeffs(HostSlice::from_slice(&coeffs), size);
// take a mutable coeffs slice as a DeviceSlice
let coeffs_slice_dev = f.coeffs_mut_slice();
assert_eq!(coeffs_slice_dev.len(), size);
assert!(coeffs_slice_dev.is_on_device());
// let g = &f + &f; // cannot borrow here since s is a mutable slice of f
// copy to host and check equality
let mut coeffs_copied_from_slice = vec![ScalarField::zero(); coeffs_slice_dev.len()];
coeffs_slice_dev
.copy_to_host(HostSlice::from_mut_slice(&mut coeffs_copied_from_slice))
.unwrap();
assert_eq!(coeffs_copied_from_slice, coeffs);
// or can use the memory directly
let mut config: NTTConfig<'_, $field> = NTTConfig::default();
let mut ntt_result = vec![$field::zero(); coeffs_slice_dev.len()];
ntt(
coeffs_slice_dev,
NTTDir::kForward,
&config,
HostSlice::from_mut_slice(&mut ntt_result),
)
.unwrap();
// ntt[0] is f(one) because it's the sum of coeffs
assert_eq!(ntt_result[0], f.eval(&$field::one()));
// after last use of coeffs_slice_dev, can borrow f again
let g = &f * &f;
assert_eq!(mul(&ntt_result[0], &ntt_result[0]), g.eval(&$field::one()));
}
};
}

View File

@@ -2,14 +2,14 @@
pub mod tests;
use icicle_cuda_runtime::{
device::check_device,
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
memory::HostOrDeviceSlice,
memory::{DeviceSlice, HostOrDeviceSlice},
};
use crate::{error::IcicleResult, traits::FieldImpl};
#[repr(C)]
#[derive(Debug, Clone)]
pub struct PoseidonConstants<'a, F: FieldImpl> {
arity: u32,
@@ -18,10 +18,10 @@ pub struct PoseidonConstants<'a, F: FieldImpl> {
full_rounds_half: u32,
/// These should be pointers to data allocated on device
round_constants: &'a [F],
mds_matrix: &'a [F],
non_sparse_matrix: &'a [F],
sparse_matrices: &'a [F],
round_constants: &'a DeviceSlice<F>,
mds_matrix: &'a DeviceSlice<F>,
non_sparse_matrix: &'a DeviceSlice<F>,
sparse_matrices: &'a DeviceSlice<F>,
/// Domain tag is the first element in the Poseidon state.
/// For the Merkle tree mode it should equal 2^arity - 1
@@ -88,8 +88,8 @@ pub trait Poseidon<F: FieldImpl> {
) -> IcicleResult<PoseidonConstants<'a, F>>;
fn load_optimized_constants<'a>(arity: u32, ctx: &DeviceContext) -> IcicleResult<PoseidonConstants<'a, F>>;
fn poseidon_unchecked(
input: &mut HostOrDeviceSlice<F>,
output: &mut HostOrDeviceSlice<F>,
input: &mut (impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
arity: u32,
constants: &PoseidonConstants<F>,
@@ -146,8 +146,8 @@ where
///
/// * `config` - config used to specify extra arguments of the Poseidon.
pub fn poseidon_hash_many<F>(
input: &mut HostOrDeviceSlice<F>,
output: &mut HostOrDeviceSlice<F>,
input: &mut (impl HostOrDeviceSlice<F> + ?Sized),
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
number_of_states: u32,
arity: u32,
constants: &PoseidonConstants<F>,
@@ -179,6 +179,22 @@ where
);
}
let ctx_device_id = config
.ctx
.device_id;
if let Some(device_id) = input.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in input and context are different"
);
}
if let Some(device_id) = output.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in output and context are different"
);
}
check_device(ctx_device_id);
let mut local_cfg = config.clone();
local_cfg.are_inputs_on_device = input.is_on_device();
local_cfg.are_outputs_on_device = output.is_on_device();
@@ -204,7 +220,7 @@ macro_rules! impl_poseidon {
mod $field_prefix_ident {
use crate::poseidon::{$field, $field_config, CudaError, DeviceContext, PoseidonConfig, PoseidonConstants};
extern "C" {
#[link_name = concat!($field_prefix, "CreateOptimizedPoseidonConstants")]
#[link_name = concat!($field_prefix, "_create_optimized_poseidon_constants_cuda")]
pub(crate) fn _create_optimized_constants(
arity: u32,
full_rounds_half: u32,
@@ -214,14 +230,14 @@ macro_rules! impl_poseidon {
poseidon_constants: *mut PoseidonConstants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "InitOptimizedPoseidonConstants")]
#[link_name = concat!($field_prefix, "_init_optimized_poseidon_constants_cuda")]
pub(crate) fn _load_optimized_constants(
arity: u32,
ctx: &DeviceContext,
constants: *mut PoseidonConstants<$field>,
) -> CudaError;
#[link_name = concat!($field_prefix, "PoseidonHash")]
#[link_name = concat!($field_prefix, "_poseidon_hash_cuda")]
pub(crate) fn hash_many(
input: *mut $field,
output: *mut $field,
@@ -268,8 +284,8 @@ macro_rules! impl_poseidon {
}
fn poseidon_unchecked(
input: &mut HostOrDeviceSlice<$field>,
output: &mut HostOrDeviceSlice<$field>,
input: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
number_of_states: u32,
arity: u32,
constants: &PoseidonConstants<$field>,

View File

@@ -1,6 +1,6 @@
use crate::traits::FieldImpl;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{HostOrDeviceSlice, HostSlice};
use std::io::Read;
use std::path::PathBuf;
@@ -26,16 +26,16 @@ where
{
let test_size = 1 << 10;
let arity = 2u32;
let inputs = vec![F::one(); test_size * arity as usize];
let outputs = vec![F::zero(); test_size];
let mut inputs = vec![F::one(); test_size * arity as usize];
let mut outputs = vec![F::zero(); test_size];
let mut input_slice = HostOrDeviceSlice::on_host(inputs);
let mut output_slice = HostOrDeviceSlice::on_host(outputs);
let input_slice = HostSlice::from_mut_slice(&mut inputs);
let output_slice = HostSlice::from_mut_slice(&mut outputs);
let config = PoseidonConfig::default();
poseidon_hash_many::<F>(
&mut input_slice,
&mut output_slice,
input_slice,
output_slice,
test_size as u32,
arity as u32,
&constants,
@@ -43,8 +43,8 @@ where
)
.unwrap();
let a1 = output_slice[0..1][0];
let a2 = output_slice[output_slice.len() - 2..output_slice.len() - 1][0];
let a1 = output_slice[0];
let a2 = output_slice[output_slice.len() - 2];
println!("first: {:?}, last: {:?}", a1, a2);
assert_eq!(a1, a2);

View File

@@ -7,9 +7,13 @@ use crate::{
};
#[cfg(feature = "arkworks")]
use ark_ec::short_weierstrass::{Affine as ArkAffine, Projective as ArkProjective};
use icicle_cuda_runtime::{error::CudaResultWrap, memory::HostOrDeviceSlice};
use icicle_cuda_runtime::{
device_context::DeviceContext,
error::CudaResultWrap,
memory::{DeviceVec, HostSlice},
};
pub fn check_scalar_equality<F: FieldImpl>() {
pub fn check_field_equality<F: FieldImpl>() {
let left = F::zero();
let right = F::one();
assert_ne!(left, right);
@@ -48,6 +52,7 @@ where
assert_eq!(left, right);
}
#[cfg(feature = "arkworks")]
pub fn check_ark_scalar_convert<F: FieldImpl + ArkConvertible>()
where
F::Config: GenerateRandom<F>,
@@ -60,6 +65,7 @@ where
}
}
#[cfg(feature = "arkworks")]
pub fn check_ark_point_convert<C: Curve>()
where
Affine<C>: ArkConvertible<ArkEquivalent = ArkAffine<C::ArkSWConfig>>,
@@ -79,27 +85,28 @@ where
pub fn check_field_convert_montgomery<F>()
where
F: FieldImpl + MontgomeryConvertible,
F: FieldImpl + MontgomeryConvertible<'static>,
F::Config: GenerateRandom<F>,
{
let size = 1 << 10;
let scalars = F::Config::generate_random(size);
let device_ctx = DeviceContext::default();
let mut d_scalars = HostOrDeviceSlice::cuda_malloc(size).unwrap();
let mut d_scalars = DeviceVec::cuda_malloc(size).unwrap();
d_scalars
.copy_from_host(&scalars)
.copy_from_host(HostSlice::from_slice(&scalars))
.unwrap();
F::to_mont(&mut d_scalars)
F::to_mont(&mut d_scalars, &device_ctx)
.wrap()
.unwrap();
F::from_mont(&mut d_scalars)
F::from_mont(&mut d_scalars, &device_ctx)
.wrap()
.unwrap();
let mut scalars_copy = vec![F::zero(); size];
d_scalars
.copy_to_host(&mut scalars_copy)
.copy_to_host(HostSlice::from_mut_slice(&mut scalars_copy))
.unwrap();
for (s1, s2) in scalars
@@ -112,27 +119,28 @@ where
pub fn check_points_convert_montgomery<C: Curve>()
where
Affine<C>: MontgomeryConvertible,
Projective<C>: MontgomeryConvertible,
Affine<C>: MontgomeryConvertible<'static>,
Projective<C>: MontgomeryConvertible<'static>,
{
let size = 1 << 10;
let device_ctx = DeviceContext::default();
let affine_points = C::generate_random_affine_points(size);
let mut d_affine = HostOrDeviceSlice::cuda_malloc(size).unwrap();
let mut d_affine = DeviceVec::cuda_malloc(size).unwrap();
d_affine
.copy_from_host(&affine_points)
.copy_from_host(HostSlice::from_slice(&affine_points))
.unwrap();
Affine::<C>::to_mont(&mut d_affine)
Affine::<C>::to_mont(&mut d_affine, &device_ctx)
.wrap()
.unwrap();
Affine::<C>::from_mont(&mut d_affine)
Affine::<C>::from_mont(&mut d_affine, &device_ctx)
.wrap()
.unwrap();
let mut affine_copy = vec![Affine::<C>::zero(); size];
d_affine
.copy_to_host(&mut affine_copy)
.copy_to_host(HostSlice::from_mut_slice(&mut affine_copy))
.unwrap();
for (p1, p2) in affine_points
@@ -143,21 +151,21 @@ where
}
let proj_points = C::generate_random_projective_points(size);
let mut d_proj = HostOrDeviceSlice::cuda_malloc(size).unwrap();
let mut d_proj = DeviceVec::cuda_malloc(size).unwrap();
d_proj
.copy_from_host(&proj_points)
.copy_from_host(HostSlice::from_slice(&proj_points))
.unwrap();
Projective::<C>::to_mont(&mut d_proj)
Projective::<C>::to_mont(&mut d_proj, &device_ctx)
.wrap()
.unwrap();
Projective::<C>::from_mont(&mut d_proj)
Projective::<C>::from_mont(&mut d_proj, &device_ctx)
.wrap()
.unwrap();
let mut projective_copy = vec![Projective::<C>::zero(); size];
d_proj
.copy_to_host(&mut projective_copy)
.copy_to_host(HostSlice::from_mut_slice(&mut projective_copy))
.unwrap();
for (p1, p2) in proj_points

View File

@@ -1,7 +1,7 @@
use crate::error::IcicleResult;
#[cfg(feature = "arkworks")]
use ark_ff::Field as ArkField;
use icicle_cuda_runtime::{error::CudaError, memory::HostOrDeviceSlice};
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::DeviceSlice};
use std::{
fmt::{Debug, Display},
mem::MaybeUninit,
@@ -29,6 +29,7 @@ pub trait FieldImpl:
fn from_bytes_le(bytes: &[u8]) -> Self;
fn zero() -> Self;
fn one() -> Self;
fn from_u32(val: u32) -> Self;
}
#[cfg(feature = "arkworks")]
@@ -39,9 +40,9 @@ pub trait ArkConvertible {
fn from_ark(ark: Self::ArkEquivalent) -> Self;
}
pub trait MontgomeryConvertible: Sized {
fn to_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError;
fn from_mont(values: &mut HostOrDeviceSlice<Self>) -> CudaError;
pub trait MontgomeryConvertible<'a>: Sized {
fn to_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError;
fn from_mont(values: &mut DeviceSlice<Self>, ctx: &DeviceContext<'a>) -> CudaError;
}
pub trait IcicleResultWrap {

View File

@@ -1,3 +1,4 @@
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::{
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
memory::HostOrDeviceSlice,
@@ -54,7 +55,7 @@ pub fn merkle_tree_digests_len(height: u32, arity: u32) -> usize {
pub trait TreeBuilder<F: FieldImpl> {
fn build_poseidon_tree_unchecked(
leaves: &mut HostOrDeviceSlice<F>,
leaves: &mut (impl HostOrDeviceSlice<F> + ?Sized),
digests: &mut [F],
height: u32,
arity: u32,
@@ -75,7 +76,7 @@ pub trait TreeBuilder<F: FieldImpl> {
///
/// * `config` - config used to specify extra arguments of the Tree builder.
pub fn build_poseidon_merkle_tree<F>(
leaves: &mut HostOrDeviceSlice<F>,
leaves: &mut (impl HostOrDeviceSlice<F> + ?Sized),
digests: &mut [F],
height: u32,
arity: u32,
@@ -100,6 +101,16 @@ where
);
}
let ctx_device_id = config
.ctx
.device_id;
if let Some(device_id) = leaves.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in leaves and context are different"
);
}
check_device(ctx_device_id);
let mut local_cfg = config.clone();
local_cfg.are_inputs_on_device = leaves.is_on_device();
@@ -121,7 +132,7 @@ macro_rules! impl_tree_builder {
use icicle_core::poseidon::PoseidonConstants;
extern "C" {
#[link_name = concat!($field_prefix, "BuildPoseidonMerkleTree")]
#[link_name = concat!($field_prefix, "_build_poseidon_merkle_tree")]
pub(crate) fn _build_poseidon_merkle_tree(
leaves: *mut $field,
digests: *mut $field,
@@ -135,7 +146,7 @@ macro_rules! impl_tree_builder {
impl TreeBuilder<$field> for $field_config {
fn build_poseidon_tree_unchecked(
leaves: &mut HostOrDeviceSlice<$field>,
leaves: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
digests: &mut [$field],
height: u32,
arity: u32,

View File

@@ -1,4 +1,4 @@
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::HostSlice;
use crate::{
poseidon::{tests::init_poseidon, Poseidon},
@@ -15,16 +15,16 @@ where
let height = 20;
let arity = 2;
let keep_rows = 1;
let leaves = vec![F::one(); 1 << (height - 1)];
let mut leaves = vec![F::one(); 1 << (height - 1)];
let mut digests = vec![F::zero(); merkle_tree_digests_len(height, arity)];
let mut leaves_slice = HostOrDeviceSlice::on_host(leaves);
let leaves_slice = HostSlice::from_mut_slice(&mut leaves);
let constants = init_poseidon(arity as u32);
let mut config = TreeBuilderConfig::default();
config.keep_rows = keep_rows;
build_poseidon_merkle_tree::<F>(&mut leaves_slice, &mut digests, height, arity, &constants, &config).unwrap();
build_poseidon_merkle_tree::<F>(leaves_slice, &mut digests, height, arity, &constants, &config).unwrap();
println!("Root: {:?}", digests[0..1][0]);
println!("Root: {:?}", digests[0]);
}

View File

@@ -1,3 +1,4 @@
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::{
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
memory::HostOrDeviceSlice,
@@ -16,7 +17,6 @@ pub struct VecOpsConfig<'a> {
is_a_on_device: bool,
is_b_on_device: bool,
is_result_on_device: bool,
is_result_montgomery_form: bool,
/// Whether to run the vector operations asynchronously. If set to `true`, the functions will be non-blocking and you'd need to synchronize
/// it explicitly by running `stream.synchronize()`. If set to false, the functions will block the current CPU thread.
pub is_async: bool,
@@ -35,7 +35,6 @@ impl<'a> VecOpsConfig<'a> {
is_a_on_device: false,
is_b_on_device: false,
is_result_on_device: false,
is_result_montgomery_form: false,
is_async: false,
}
}
@@ -44,38 +43,43 @@ impl<'a> VecOpsConfig<'a> {
#[doc(hidden)]
pub trait VecOps<F> {
fn add(
a: &HostOrDeviceSlice<F>,
b: &HostOrDeviceSlice<F>,
result: &mut HostOrDeviceSlice<F>,
a: &(impl HostOrDeviceSlice<F> + ?Sized),
b: &(impl HostOrDeviceSlice<F> + ?Sized),
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()>;
fn sub(
a: &HostOrDeviceSlice<F>,
b: &HostOrDeviceSlice<F>,
result: &mut HostOrDeviceSlice<F>,
a: &(impl HostOrDeviceSlice<F> + ?Sized),
b: &(impl HostOrDeviceSlice<F> + ?Sized),
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()>;
fn mul(
a: &HostOrDeviceSlice<F>,
b: &HostOrDeviceSlice<F>,
result: &mut HostOrDeviceSlice<F>,
a: &(impl HostOrDeviceSlice<F> + ?Sized),
b: &(impl HostOrDeviceSlice<F> + ?Sized),
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()>;
fn transpose(
input: &HostOrDeviceSlice<F>,
input: &(impl HostOrDeviceSlice<F> + ?Sized),
row_size: u32,
column_size: u32,
output: &mut HostOrDeviceSlice<F>,
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
ctx: &DeviceContext,
on_device: bool,
is_async: bool,
) -> IcicleResult<()>;
}
fn check_vec_ops_args<F>(a: &HostOrDeviceSlice<F>, b: &HostOrDeviceSlice<F>, result: &mut HostOrDeviceSlice<F>) {
fn check_vec_ops_args<'a, F>(
a: &(impl HostOrDeviceSlice<F> + ?Sized),
b: &(impl HostOrDeviceSlice<F> + ?Sized),
result: &(impl HostOrDeviceSlice<F> + ?Sized),
cfg: &VecOpsConfig<'a>,
) -> VecOpsConfig<'a> {
if a.len() != b.len() || a.len() != result.len() {
panic!(
"left, right and output lengths {}; {}; {} do not match",
@@ -84,55 +88,77 @@ fn check_vec_ops_args<F>(a: &HostOrDeviceSlice<F>, b: &HostOrDeviceSlice<F>, res
result.len()
);
}
let ctx_device_id = cfg
.ctx
.device_id;
if let Some(device_id) = a.device_id() {
assert_eq!(device_id, ctx_device_id, "Device ids in a and context are different");
}
if let Some(device_id) = b.device_id() {
assert_eq!(device_id, ctx_device_id, "Device ids in b and context are different");
}
if let Some(device_id) = result.device_id() {
assert_eq!(
device_id, ctx_device_id,
"Device ids in result and context are different"
);
}
check_device(ctx_device_id);
let mut res_cfg = cfg.clone();
res_cfg.is_a_on_device = a.is_on_device();
res_cfg.is_b_on_device = b.is_on_device();
res_cfg.is_result_on_device = result.is_on_device();
res_cfg
}
pub fn add_scalars<F>(
a: &HostOrDeviceSlice<F>,
b: &HostOrDeviceSlice<F>,
result: &mut HostOrDeviceSlice<F>,
a: &(impl HostOrDeviceSlice<F> + ?Sized),
b: &(impl HostOrDeviceSlice<F> + ?Sized),
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: VecOps<F>,
{
check_vec_ops_args(a, b, result);
<<F as FieldImpl>::Config as VecOps<F>>::add(a, b, result, cfg)
let cfg = check_vec_ops_args(a, b, result, cfg);
<<F as FieldImpl>::Config as VecOps<F>>::add(a, b, result, &cfg)
}
pub fn sub_scalars<F>(
a: &HostOrDeviceSlice<F>,
b: &HostOrDeviceSlice<F>,
result: &mut HostOrDeviceSlice<F>,
a: &(impl HostOrDeviceSlice<F> + ?Sized),
b: &(impl HostOrDeviceSlice<F> + ?Sized),
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: VecOps<F>,
{
check_vec_ops_args(a, b, result);
<<F as FieldImpl>::Config as VecOps<F>>::sub(a, b, result, cfg)
let cfg = check_vec_ops_args(a, b, result, cfg);
<<F as FieldImpl>::Config as VecOps<F>>::sub(a, b, result, &cfg)
}
pub fn mul_scalars<F>(
a: &HostOrDeviceSlice<F>,
b: &HostOrDeviceSlice<F>,
result: &mut HostOrDeviceSlice<F>,
a: &(impl HostOrDeviceSlice<F> + ?Sized),
b: &(impl HostOrDeviceSlice<F> + ?Sized),
result: &mut (impl HostOrDeviceSlice<F> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()>
where
F: FieldImpl,
<F as FieldImpl>::Config: VecOps<F>,
{
check_vec_ops_args(a, b, result);
<<F as FieldImpl>::Config as VecOps<F>>::mul(a, b, result, cfg)
let cfg = check_vec_ops_args(a, b, result, cfg);
<<F as FieldImpl>::Config as VecOps<F>>::mul(a, b, result, &cfg)
}
pub fn transpose_matrix<F>(
input: &HostOrDeviceSlice<F>,
input: &(impl HostOrDeviceSlice<F> + ?Sized),
row_size: u32,
column_size: u32,
output: &mut HostOrDeviceSlice<F>,
output: &mut (impl HostOrDeviceSlice<F> + ?Sized),
ctx: &DeviceContext,
on_device: bool,
is_async: bool,
@@ -157,7 +183,7 @@ macro_rules! impl_vec_ops_field {
use icicle_core::vec_ops::VecOpsConfig;
extern "C" {
#[link_name = concat!($field_prefix, "AddCuda")]
#[link_name = concat!($field_prefix, "_add_cuda")]
pub(crate) fn add_scalars_cuda(
a: *const $field,
b: *const $field,
@@ -166,7 +192,7 @@ macro_rules! impl_vec_ops_field {
result: *mut $field,
) -> CudaError;
#[link_name = concat!($field_prefix, "SubCuda")]
#[link_name = concat!($field_prefix, "_sub_cuda")]
pub(crate) fn sub_scalars_cuda(
a: *const $field,
b: *const $field,
@@ -175,7 +201,7 @@ macro_rules! impl_vec_ops_field {
result: *mut $field,
) -> CudaError;
#[link_name = concat!($field_prefix, "MulCuda")]
#[link_name = concat!($field_prefix, "_mul_cuda")]
pub(crate) fn mul_scalars_cuda(
a: *const $field,
b: *const $field,
@@ -184,7 +210,7 @@ macro_rules! impl_vec_ops_field {
result: *mut $field,
) -> CudaError;
#[link_name = concat!($field_prefix, "TransposeMatrix")]
#[link_name = concat!($field_prefix, "_transpose_matrix_cuda")]
pub(crate) fn transpose_cuda(
input: *const $field,
row_size: u32,
@@ -199,9 +225,9 @@ macro_rules! impl_vec_ops_field {
impl VecOps<$field> for $field_config {
fn add(
a: &HostOrDeviceSlice<$field>,
b: &HostOrDeviceSlice<$field>,
result: &mut HostOrDeviceSlice<$field>,
a: &(impl HostOrDeviceSlice<$field> + ?Sized),
b: &(impl HostOrDeviceSlice<$field> + ?Sized),
result: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()> {
unsafe {
@@ -209,7 +235,7 @@ macro_rules! impl_vec_ops_field {
a.as_ptr(),
b.as_ptr(),
a.len() as u32,
cfg as *const _ as *const VecOpsConfig,
cfg as *const VecOpsConfig,
result.as_mut_ptr(),
)
.wrap()
@@ -217,9 +243,9 @@ macro_rules! impl_vec_ops_field {
}
fn sub(
a: &HostOrDeviceSlice<$field>,
b: &HostOrDeviceSlice<$field>,
result: &mut HostOrDeviceSlice<$field>,
a: &(impl HostOrDeviceSlice<$field> + ?Sized),
b: &(impl HostOrDeviceSlice<$field> + ?Sized),
result: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()> {
unsafe {
@@ -227,7 +253,7 @@ macro_rules! impl_vec_ops_field {
a.as_ptr(),
b.as_ptr(),
a.len() as u32,
cfg as *const _ as *const VecOpsConfig,
cfg as *const VecOpsConfig,
result.as_mut_ptr(),
)
.wrap()
@@ -235,9 +261,9 @@ macro_rules! impl_vec_ops_field {
}
fn mul(
a: &HostOrDeviceSlice<$field>,
b: &HostOrDeviceSlice<$field>,
result: &mut HostOrDeviceSlice<$field>,
a: &(impl HostOrDeviceSlice<$field> + ?Sized),
b: &(impl HostOrDeviceSlice<$field> + ?Sized),
result: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
cfg: &VecOpsConfig,
) -> IcicleResult<()> {
unsafe {
@@ -245,7 +271,7 @@ macro_rules! impl_vec_ops_field {
a.as_ptr(),
b.as_ptr(),
a.len() as u32,
cfg as *const _ as *const VecOpsConfig,
cfg as *const VecOpsConfig,
result.as_mut_ptr(),
)
.wrap()
@@ -253,10 +279,10 @@ macro_rules! impl_vec_ops_field {
}
fn transpose(
input: &HostOrDeviceSlice<$field>,
input: &(impl HostOrDeviceSlice<$field> + ?Sized),
row_size: u32,
column_size: u32,
output: &mut HostOrDeviceSlice<$field>,
output: &mut (impl HostOrDeviceSlice<$field> + ?Sized),
ctx: &DeviceContext,
on_device: bool,
is_async: bool,

View File

@@ -1,5 +1,6 @@
use crate::traits::GenerateRandom;
use crate::vec_ops::{add_scalars, mul_scalars, sub_scalars, FieldImpl, HostOrDeviceSlice, VecOps, VecOpsConfig};
use crate::vec_ops::{add_scalars, mul_scalars, sub_scalars, FieldImpl, VecOps, VecOpsConfig};
use icicle_cuda_runtime::memory::HostSlice;
pub fn check_vec_ops_scalars<F: FieldImpl>()
where
@@ -7,21 +8,27 @@ where
{
let test_size = 1 << 14;
let a = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
let b = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
let ones = HostOrDeviceSlice::on_host(vec![F::one(); test_size]);
let mut result = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
let mut result2 = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
let mut result3 = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
let a = F::Config::generate_random(test_size);
let b = F::Config::generate_random(test_size);
let ones = vec![F::one(); test_size];
let mut result = vec![F::zero(); test_size];
let mut result2 = vec![F::zero(); test_size];
let mut result3 = vec![F::zero(); test_size];
let a = HostSlice::from_slice(&a);
let b = HostSlice::from_slice(&b);
let ones = HostSlice::from_slice(&ones);
let result = HostSlice::from_mut_slice(&mut result);
let result2 = HostSlice::from_mut_slice(&mut result2);
let result3 = HostSlice::from_mut_slice(&mut result3);
let cfg = VecOpsConfig::default();
add_scalars(&a, &b, &mut result, &cfg).unwrap();
add_scalars(a, b, result, &cfg).unwrap();
sub_scalars(&result, &b, &mut result2, &cfg).unwrap();
sub_scalars(result, b, result2, &cfg).unwrap();
assert_eq!(a[0..1][0], result2[0..1][0]);
assert_eq!(a[0], result2[0]);
mul_scalars(&a, &ones, &mut result3, &cfg).unwrap();
mul_scalars(a, ones, result3, &cfg).unwrap();
assert_eq!(a[0..1][0], result3[0..1][0]);
assert_eq!(a[0], result3[0]);
}

View File

@@ -83,6 +83,11 @@ fn main() {
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
.allowlist_function("cudaFreeAsync")
.allowlist_function("cudaMallocAsync")
// Unified Addressing
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__UNIFIED.html
.rustified_enum("cudaMemoryType")
.allowlist_type("cudaPointerAttributes")
.allowlist_function("cudaPointerGetAttributes")
//
.generate()
.expect("Unable to generate bindings");

View File

@@ -1,5 +1,8 @@
use crate::{
bindings::{cudaFreeAsync, cudaGetDevice, cudaGetDeviceCount, cudaMallocAsync, cudaMemGetInfo, cudaSetDevice},
bindings::{
cudaFreeAsync, cudaGetDevice, cudaGetDeviceCount, cudaMallocAsync, cudaMemGetInfo, cudaPointerAttributes,
cudaPointerGetAttributes, cudaSetDevice,
},
error::{CudaResult, CudaResultWrap},
stream::CudaStream,
};
@@ -19,6 +22,23 @@ pub fn get_device() -> CudaResult<usize> {
unsafe { cudaGetDevice(&mut device_id) }.wrap_value(device_id as usize)
}
pub fn get_device_from_pointer(ptr: *const ::std::os::raw::c_void) -> CudaResult<usize> {
let mut ptr_attributes = MaybeUninit::<cudaPointerAttributes>::uninit();
unsafe {
cudaPointerGetAttributes(ptr_attributes.as_mut_ptr(), ptr).wrap()?;
Ok(ptr_attributes
.assume_init()
.device as usize)
}
}
pub fn check_device(device_id: usize) {
match device_id == get_device().unwrap() {
true => (),
false => panic!("Attempt to use on a different device"),
}
}
// This function pre-allocates default memory pool and warms the GPU up
// so that subsequent memory allocations and other calls are not slowed down
pub fn warmup(stream: &CudaStream) -> CudaResult<()> {

View File

@@ -3,8 +3,6 @@ use crate::stream::CudaStream;
pub const DEFAULT_DEVICE_ID: usize = 0;
use crate::device::get_device;
/// Properties of the device used in Icicle functions.
#[repr(C)]
#[derive(Debug, Clone)]
@@ -38,10 +36,3 @@ impl DeviceContext<'_> {
}
}
}
pub fn check_device(device_id: i32) {
match device_id == get_device().unwrap() as i32 {
true => (),
false => panic!("Attempt to use on a different device"),
}
}

View File

@@ -1,7 +1,7 @@
#![allow(non_upper_case_globals)]
#![allow(non_camel_case_types)]
#[allow(dead_code)]
#[allow(dead_code, non_snake_case)]
mod bindings;
pub mod device;
pub mod device_context;

View File

@@ -1,87 +1,233 @@
use crate::bindings::{
cudaFree, cudaMalloc, cudaMallocAsync, cudaMemPool_t, cudaMemcpy, cudaMemcpyAsync, cudaMemcpyKind,
};
use crate::device::get_device;
use crate::device_context::check_device;
use crate::device::{check_device, get_device_from_pointer};
use crate::error::{CudaError, CudaResult, CudaResultWrap};
use crate::stream::CudaStream;
use std::mem::{size_of, MaybeUninit};
use std::ops::{Index, IndexMut, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive};
use std::mem::{size_of, ManuallyDrop, MaybeUninit};
use std::ops::{
Deref, DerefMut, Index, IndexMut, Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive,
};
use std::os::raw::c_void;
use std::slice::from_raw_parts_mut;
use std::slice::SliceIndex;
pub enum HostOrDeviceSlice<'a, T> {
Host(Vec<T>),
Device(&'a mut [T], i32),
#[derive(Debug)]
pub struct HostSlice<T>([T]);
pub struct DeviceVec<T>(ManuallyDrop<Box<[T]>>);
pub struct DeviceSlice<T>([T]);
pub trait HostOrDeviceSlice<T> {
fn is_on_device(&self) -> bool;
fn device_id(&self) -> Option<usize>;
unsafe fn as_ptr(&self) -> *const T;
unsafe fn as_mut_ptr(&mut self) -> *mut T;
fn len(&self) -> usize;
fn is_empty(&self) -> bool;
}
impl<'a, T> HostOrDeviceSlice<'a, T> {
// Function to get the device_id for Device variant
pub fn get_device_id(&self) -> Option<i32> {
match self {
HostOrDeviceSlice::Device(_, device_id) => Some(*device_id),
HostOrDeviceSlice::Host(_) => None,
}
impl<T> HostOrDeviceSlice<T> for HostSlice<T> {
fn is_on_device(&self) -> bool {
false
}
pub fn len(&self) -> usize {
match self {
Self::Device(s, _) => s.len(),
Self::Host(v) => v.len(),
}
fn device_id(&self) -> Option<usize> {
None
}
pub fn is_empty(&self) -> bool {
match self {
Self::Device(s, _) => s.is_empty(),
Self::Host(v) => v.is_empty(),
}
unsafe fn as_ptr(&self) -> *const T {
self.0
.as_ptr()
}
pub fn is_on_device(&self) -> bool {
match self {
Self::Device(_, _) => true,
Self::Host(_) => false,
}
unsafe fn as_mut_ptr(&mut self) -> *mut T {
self.0
.as_mut_ptr()
}
pub fn as_mut_slice(&mut self) -> &mut [T] {
match self {
Self::Device(_, _) => {
panic!("Use copy_to_host and copy_to_host_async to move device data to a slice")
}
Self::Host(v) => v.as_mut_slice(),
}
fn len(&self) -> usize {
self.0
.len()
}
fn is_empty(&self) -> bool {
self.len() == 0
}
}
impl<T> HostOrDeviceSlice<T> for DeviceSlice<T> {
fn is_on_device(&self) -> bool {
true
}
fn device_id(&self) -> Option<usize> {
Some(
get_device_from_pointer(unsafe { self.as_ptr() as *const ::std::os::raw::c_void })
.expect("Invalid pointer. Maybe host pointer was used here?"),
)
}
unsafe fn as_ptr(&self) -> *const T {
self.0
.as_ptr()
}
unsafe fn as_mut_ptr(&mut self) -> *mut T {
self.0
.as_mut_ptr()
}
fn len(&self) -> usize {
self.0
.len()
}
fn is_empty(&self) -> bool {
self.len() == 0
}
}
impl<T> HostSlice<T> {
// Currently this function just transmutes types. However it is not guaranteed that this function
// will always be cheap as it might at some point e.g. pin the memory which takes some time.
pub fn from_slice(slice: &[T]) -> &Self {
unsafe { &*(slice as *const [T] as *const Self) }
}
// Currently this function just transmutes types. However it is not guaranteed that this function
// will always be cheap as it might at some point e.g. pin the memory which takes some time.
pub fn from_mut_slice(slice: &mut [T]) -> &mut Self {
unsafe { &mut *(slice as *mut [T] as *mut Self) }
}
pub fn as_slice(&self) -> &[T] {
match self {
Self::Device(_, _) => {
panic!("Use copy_to_host and copy_to_host_async to move device data to a slice")
&self.0
}
pub fn as_mut_slice(&mut self) -> &mut [T] {
&mut self.0
}
pub fn iter(&self) -> impl Iterator<Item = &T> {
self.0
.iter()
}
pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut T> {
self.0
.iter_mut()
}
}
impl<T> DeviceSlice<T> {
pub unsafe fn from_slice(slice: &[T]) -> &Self {
&*(slice as *const [T] as *const Self)
}
pub unsafe fn from_mut_slice(slice: &mut [T]) -> &mut Self {
&mut *(slice as *mut [T] as *mut Self)
}
pub fn copy_from_host(&mut self, val: &HostSlice<T>) -> CudaResult<()> {
assert!(
self.len() == val.len(),
"In copy from host, destination and source slices have different lengths"
);
check_device(
self.device_id()
.unwrap(),
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpy(
self.as_mut_ptr() as *mut c_void,
val.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyHostToDevice,
)
.wrap()?
}
Self::Host(v) => v.as_slice(),
}
Ok(())
}
pub fn as_ptr(&self) -> *const T {
match self {
Self::Device(s, _) => s.as_ptr(),
Self::Host(v) => v.as_ptr(),
pub fn copy_to_host(&self, val: &mut HostSlice<T>) -> CudaResult<()> {
assert!(
self.len() == val.len(),
"In copy to host, destination and source slices have different lengths"
);
check_device(
self.device_id()
.unwrap(),
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpy(
val.as_mut_ptr() as *mut c_void,
self.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyDeviceToHost,
)
.wrap()?
}
}
Ok(())
}
pub fn as_mut_ptr(&mut self) -> *mut T {
match self {
Self::Device(s, _) => s.as_mut_ptr(),
Self::Host(v) => v.as_mut_ptr(),
pub fn copy_from_host_async(&mut self, val: &HostSlice<T>, stream: &CudaStream) -> CudaResult<()> {
assert!(
self.len() == val.len(),
"In copy from host async, destination and source slices have different lengths"
);
check_device(
self.device_id()
.unwrap(),
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpyAsync(
self.as_mut_ptr() as *mut c_void,
val.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyHostToDevice,
stream.handle,
)
.wrap()?
}
}
Ok(())
}
pub fn on_host(src: Vec<T>) -> Self {
//TODO: HostOrDeviceSlice on_host() with slice input without actually copying the data
Self::Host(src)
pub fn copy_to_host_async(&self, val: &mut HostSlice<T>, stream: &CudaStream) -> CudaResult<()> {
assert!(
self.len() == val.len(),
"In copy to host async, destination and source slices have different lengths"
);
check_device(
self.device_id()
.unwrap(),
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpyAsync(
val.as_mut_ptr() as *mut c_void,
self.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyDeviceToHost,
stream.handle,
)
.wrap()?
}
}
Ok(())
}
}
impl<T> DeviceVec<T> {
pub fn cuda_malloc(count: usize) -> CudaResult<Self> {
let size = count
.checked_mul(size_of::<T>())
@@ -93,10 +239,11 @@ impl<'a, T> HostOrDeviceSlice<'a, T> {
let mut device_ptr = MaybeUninit::<*mut c_void>::uninit();
unsafe {
cudaMalloc(device_ptr.as_mut_ptr(), size).wrap()?;
Ok(Self::Device(
from_raw_parts_mut(device_ptr.assume_init() as *mut T, count),
get_device().unwrap() as i32,
))
let res = Self(ManuallyDrop::new(Box::from_raw(from_raw_parts_mut(
device_ptr.assume_init() as *mut T,
count,
))));
Ok(res)
}
}
@@ -105,146 +252,58 @@ impl<'a, T> HostOrDeviceSlice<'a, T> {
.checked_mul(size_of::<T>())
.unwrap_or(0);
if size == 0 {
return Err(CudaError::cudaErrorMemoryAllocation);
return Err(CudaError::cudaErrorMemoryAllocation); //TODO: only CUDA backend should return CudaError
}
let mut device_ptr = MaybeUninit::<*mut c_void>::uninit();
unsafe {
cudaMallocAsync(device_ptr.as_mut_ptr(), size, stream.handle as *mut _ as *mut _).wrap()?;
Ok(Self::Device(
from_raw_parts_mut(device_ptr.assume_init() as *mut T, count),
get_device().unwrap() as i32,
))
cudaMallocAsync(device_ptr.as_mut_ptr(), size, stream.handle).wrap()?;
Ok(Self(ManuallyDrop::new(Box::from_raw(from_raw_parts_mut(
device_ptr.assume_init() as *mut T,
count,
)))))
}
}
pub fn copy_from_host(&mut self, val: &[T]) -> CudaResult<()> {
match self {
Self::Device(_, device_id) => check_device(*device_id),
Self::Host(_) => panic!("Need device memory to copy into, and not host"),
};
assert!(
self.len() == val.len(),
"destination and source slices have different lengths"
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpy(
self.as_mut_ptr() as *mut c_void,
val.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyHostToDevice,
)
.wrap()?
}
}
Ok(())
pub fn cuda_malloc_for_device(count: usize, device_id: usize) -> CudaResult<Self> {
check_device(device_id);
Self::cuda_malloc(count)
}
pub fn copy_to_host(&self, val: &mut [T]) -> CudaResult<()> {
match self {
Self::Device(_, device_id) => check_device(*device_id),
Self::Host(_) => panic!("Need device memory to copy from, and not host"),
};
assert!(
self.len() == val.len(),
"destination and source slices have different lengths"
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpy(
val.as_mut_ptr() as *mut c_void,
self.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyDeviceToHost,
)
.wrap()?
}
}
Ok(())
}
pub fn copy_from_host_async(&mut self, val: &[T], stream: &CudaStream) -> CudaResult<()> {
match self {
Self::Device(_, device_id) => check_device(*device_id),
Self::Host(_) => panic!("Need device memory to copy into, and not host"),
};
assert!(
self.len() == val.len(),
"destination and source slices have different lengths"
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpyAsync(
self.as_mut_ptr() as *mut c_void,
val.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyHostToDevice,
stream.handle as *mut _ as *mut _,
)
.wrap()?
}
}
Ok(())
}
pub fn copy_to_host_async(&self, val: &mut [T], stream: &CudaStream) -> CudaResult<()> {
match self {
Self::Device(_, device_id) => check_device(*device_id),
Self::Host(_) => panic!("Need device memory to copy from, and not host"),
};
assert!(
self.len() == val.len(),
"destination and source slices have different lengths"
);
let size = size_of::<T>() * self.len();
if size != 0 {
unsafe {
cudaMemcpyAsync(
val.as_mut_ptr() as *mut c_void,
self.as_ptr() as *const c_void,
size,
cudaMemcpyKind::cudaMemcpyDeviceToHost,
stream.handle as *mut _ as *mut _,
)
.wrap()?
}
}
Ok(())
pub fn cuda_malloc_async_for_device(count: usize, stream: &CudaStream, device_id: usize) -> CudaResult<Self> {
check_device(device_id);
Self::cuda_malloc_async(count, stream)
}
}
macro_rules! impl_index {
macro_rules! impl_host_index {
($($t:ty)*) => {
$(
impl<'a, T> Index<$t> for HostOrDeviceSlice<'a, T>
impl<T> Index<$t> for HostSlice<T>
{
type Output = [T];
type Output = Self;
fn index(&self, index: $t) -> &Self::Output {
match self {
Self::Device(s, _) => s.index(index),
Self::Host(v) => v.index(index),
}
Self::from_slice(
self.0
.index(index),
)
}
}
impl<'a, T> IndexMut<$t> for HostOrDeviceSlice<'a, T>
impl<T> IndexMut<$t> for HostSlice<T>
{
fn index_mut(&mut self, index: $t) -> &mut Self::Output {
match self {
Self::Device(s,_) => s.index_mut(index),
Self::Host(v) => v.index_mut(index),
}
Self::from_mut_slice(
self.0
.index_mut(index),
)
}
}
)*
}
}
impl_index! {
impl_host_index! {
Range<usize>
RangeFull
RangeFrom<usize>
@@ -253,22 +312,112 @@ impl_index! {
RangeToInclusive<usize>
}
impl<'a, T> Drop for HostOrDeviceSlice<'a, T> {
fn drop(&mut self) {
match self {
Self::Device(s, device_id) => {
check_device(*device_id);
if s.is_empty() {
return;
}
impl<T> Index<usize> for HostSlice<T> {
type Output = T;
unsafe {
cudaFree(s.as_mut_ptr() as *mut c_void)
.wrap()
.unwrap();
}
}
Self::Host(_) => {}
fn index(&self, index: usize) -> &Self::Output {
self.0
.index(index)
}
}
impl<T> IndexMut<usize> for HostSlice<T> {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
self.0
.index_mut(index)
}
}
impl<Idx, T> Index<Idx> for DeviceVec<T>
where
Idx: SliceIndex<[T], Output = [T]>,
{
type Output = DeviceSlice<T>;
fn index(&self, index: Idx) -> &Self::Output {
unsafe {
Self::Output::from_slice(
self.0
.index(index),
)
}
}
}
impl<Idx, T> IndexMut<Idx> for DeviceVec<T>
where
Idx: SliceIndex<[T], Output = [T]>,
{
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
unsafe {
Self::Output::from_mut_slice(
self.0
.index_mut(index),
)
}
}
}
impl<Idx, T> Index<Idx> for DeviceSlice<T>
where
Idx: SliceIndex<[T], Output = [T]>,
{
type Output = Self;
fn index(&self, index: Idx) -> &Self::Output {
unsafe {
Self::from_slice(
self.0
.index(index),
)
}
}
}
impl<Idx, T> IndexMut<Idx> for DeviceSlice<T>
where
Idx: SliceIndex<[T], Output = [T]>,
{
fn index_mut(&mut self, index: Idx) -> &mut Self::Output {
unsafe {
Self::from_mut_slice(
self.0
.index_mut(index),
)
}
}
}
impl<T> Deref for DeviceVec<T> {
type Target = DeviceSlice<T>;
fn deref(&self) -> &Self::Target {
&self[..]
}
}
impl<T> DerefMut for DeviceVec<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self[..]
}
}
impl<T> Drop for DeviceVec<T> {
fn drop(&mut self) {
if self
.0
.is_empty()
{
return;
}
unsafe {
let ptr = self
.0
.as_mut_ptr() as *mut c_void;
cudaFree(ptr)
.wrap()
.unwrap();
}
}
}

View File

@@ -11,11 +11,13 @@ repository.workspace = true
icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
ark-bls12-377 = { version = "0.4.0", optional = true }
criterion = "0.3"
[build-dependencies]
cmake = "0.1.50"
[dev-dependencies]
criterion = "0.3"
ark-bls12-377 = "0.4.0"
ark-std = "0.4.0"
ark-ff = "0.4.0"
@@ -23,6 +25,7 @@ ark-ec = "0.4.0"
ark-poly = "0.4.0"
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
icicle-bls12-377 = { path = ".", features = ["arkworks"] }
serial_test = "3.0.0"
[features]
default = []
@@ -32,3 +35,7 @@ g2 = ["icicle-core/g2"]
ec_ntt = ["icicle-core/ec_ntt"]
devmode = ["icicle-core/devmode"]
arkworks = ["ark-bls12-377", "icicle-core/arkworks"]
[[bench]]
name = "ecntt"
harness = false # Criterion provides own harness

View File

@@ -0,0 +1,10 @@
#[cfg(feature = "ec_ntt")]
use icicle_bls12_377::curve::{CurveCfg, ScalarField};
#[cfg(feature = "ec_ntt")]
use icicle_core::impl_ecntt_bench;
#[cfg(feature = "ec_ntt")]
impl_ecntt_bench!("BLS12_377", ScalarField, CurveCfg);
#[cfg(not(feature = "ec_ntt"))]
fn main() {}

View File

@@ -13,47 +13,50 @@ fn main() {
// Optional Features
#[cfg(feature = "g2")]
config.define("G2_DEFINED", "ON");
config.define("G2", "ON");
#[cfg(feature = "ec_ntt")]
config.define("ECNTT_DEFINED", "ON");
config.define("ECNTT", "ON");
#[cfg(feature = "devmode")]
config.define("DEVMODE", "ON");
// Build
let out_dir = config
.build_target("icicle")
.build_target("icicle_curve")
.build();
println!("cargo:rustc-link-search={}/build", out_dir.display());
println!("cargo:rustc-link-lib=ingo_bls12_377");
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
println!("cargo:rustc-link-lib=ingo_field_bls12_377");
println!("cargo:rustc-link-lib=ingo_curve_bls12_377");
if cfg!(feature = "bw6-761") {
// Base config
let mut config = Config::new("../../../../icicle");
config
.define("BUILD_TESTS", "OFF")
.define("CURVE", "bw6_761")
.define("CMAKE_BUILD_TYPE", "Release");
// Optional Features
#[cfg(feature = "bw6-761-g2")]
config.define("G2_DEFINED", "ON");
config.define("G2", "ON");
#[cfg(feature = "ec_ntt")]
config.define("ECNTT_DEFINED", "OFF");
config.define("ECNTT", "OFF");
#[cfg(feature = "devmode")]
config.define("DEVMODE", "ON");
// Build
let out_dir = config
.build_target("icicle")
.build_target("icicle_curve")
.build();
println!("cargo:rustc-link-search={}/build", out_dir.display());
println!("cargo:rustc-link-lib=ingo_bw6_761");
println!("cargo:rustc-link-search={}/build/lib/", out_dir.display());
println!("cargo:rustc-link-lib=ingo_field_bw6_761");
println!("cargo:rustc-link-lib=ingo_curve_bw6_761");
}
println!("cargo:rustc-link-lib=stdc++");

View File

@@ -6,14 +6,15 @@ use icicle_core::curve::{Affine, Curve, Projective};
use icicle_core::field::{Field, MontgomeryConvertibleField};
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
pub(crate) const SCALAR_LIMBS: usize = 4;
pub(crate) const BASE_LIMBS: usize = 6;
pub(crate) const SCALAR_LIMBS: usize = 8;
pub(crate) const BASE_LIMBS: usize = 12;
#[cfg(feature = "g2")]
pub(crate) const G2_BASE_LIMBS: usize = 12;
pub(crate) const G2_BASE_LIMBS: usize = 24;
impl_scalar_field!("bls12_377", bls12_377_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
#[cfg(feature = "bw6-761")]
@@ -34,7 +35,7 @@ impl_curve!(
);
#[cfg(feature = "g2")]
impl_curve!(
"bls12_377G2",
"bls12_377_g2",
bls12_377_g2,
G2CurveCfg,
ScalarField,

View File

@@ -0,0 +1,23 @@
#![cfg(feature = "ec_ntt")]
use icicle_core::error::IcicleResult;
use icicle_core::impl_ecntt;
use icicle_core::ntt::{NTTConfig, NTTDir};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use icicle_cuda_runtime::error::CudaError;
use crate::curve::{CurveCfg, ScalarCfg, ScalarField};
use icicle_core::ecntt::Projective;
impl_ecntt!("bls12_377", bls12_377, ScalarField, ScalarCfg, CurveCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::{CurveCfg, ScalarField};
use icicle_core::ecntt::tests::*;
use icicle_core::impl_ecntt_tests;
use std::sync::OnceLock;
impl_ecntt_tests!(ScalarField, CurveCfg);
}

View File

@@ -1,6 +1,8 @@
pub mod curve;
pub mod ecntt;
pub mod msm;
pub mod ntt;
pub mod polynomials;
pub mod poseidon;
pub mod tree;
pub mod vec_ops;

View File

@@ -8,11 +8,15 @@ use icicle_core::{
msm::{MSMConfig, MSM},
traits::IcicleResultWrap,
};
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
use icicle_cuda_runtime::{
device_context::DeviceContext,
error::CudaError,
memory::{DeviceSlice, HostOrDeviceSlice},
};
impl_msm!("bls12_377", bls12_377, CurveCfg);
#[cfg(feature = "g2")]
impl_msm!("bls12_377G2", bls12_377_g2, G2CurveCfg);
impl_msm!("bls12_377_g2", bls12_377_g2, G2CurveCfg);
#[cfg(test)]
pub(crate) mod tests {

View File

@@ -3,11 +3,10 @@ use crate::curve::{BaseCfg, BaseField};
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_ntt;
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
use icicle_core::traits::IcicleResultWrap;
use icicle_core::{impl_ntt, impl_ntt_without_domain};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
@@ -18,9 +17,10 @@ impl_ntt!("bw6_761", bw6_761, BaseField, BaseCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use crate::ntt::DEFAULT_DEVICE_ID;
use icicle_core::impl_ntt_tests;
use icicle_core::ntt::tests::*;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use serial_test::{parallel, serial};
use std::sync::OnceLock;
impl_ntt_tests!(ScalarField);

View File

@@ -0,0 +1,10 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::impl_univariate_polynomial_api;
impl_univariate_polynomial_api!("bls12_377", bls12_377, ScalarField, ScalarCfg);
#[cfg(test)]
mod tests {
use icicle_core::impl_polynomial_tests;
impl_polynomial_tests!(bls12_377, ScalarField);
}

View File

@@ -11,11 +11,13 @@ repository.workspace = true
icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
ark-bls12-381 = { version = "0.4.0", optional = true }
criterion = "0.3"
[build-dependencies]
cmake = "0.1.50"
[dev-dependencies]
criterion = "0.3"
ark-bls12-381 = "0.4.0"
ark-std = "0.4.0"
ark-ff = "0.4.0"
@@ -23,6 +25,7 @@ ark-ec = "0.4.0"
ark-poly = "0.4.0"
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
icicle-bls12-381 = { path = ".", features = ["arkworks"] }
serial_test = "3.0.0"
[features]
default = []
@@ -30,3 +33,7 @@ g2 = ["icicle-core/g2"]
ec_ntt = ["icicle-core/ec_ntt"]
devmode = ["icicle-core/devmode"]
arkworks = ["ark-bls12-381", "icicle-core/arkworks"]
[[bench]]
name = "ecntt"
harness = false # Criterion provides own harness

View File

@@ -0,0 +1,10 @@
#[cfg(feature = "ec_ntt")]
use icicle_bls12_381::curve::{CurveCfg, ScalarField};
#[cfg(feature = "ec_ntt")]
use icicle_core::impl_ecntt_bench;
#[cfg(feature = "ec_ntt")]
impl_ecntt_bench!("BLS12_381", ScalarField, CurveCfg);
#[cfg(not(feature = "ec_ntt"))]
fn main() {}

View File

@@ -7,28 +7,29 @@ fn main() {
// Base config
let mut config = Config::new("../../../../icicle");
config
.define("BUILD_TESTS", "OFF")
.define("CURVE", "bls12_381")
.define("CMAKE_BUILD_TYPE", "Release");
// Optional Features
#[cfg(feature = "g2")]
config.define("G2_DEFINED", "ON");
config.define("G2", "ON");
#[cfg(feature = "ec_ntt")]
config.define("ECNTT_DEFINED", "ON");
config.define("ECNTT", "ON");
#[cfg(feature = "devmode")]
config.define("DEVMODE", "ON");
// Build
let out_dir = config
.build_target("icicle")
.build_target("icicle_curve")
.build();
println!("cargo:rustc-link-search={}/build", out_dir.display());
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
println!("cargo:rustc-link-lib=ingo_field_bls12_381");
println!("cargo:rustc-link-lib=ingo_curve_bls12_381");
println!("cargo:rustc-link-lib=ingo_bls12_381");
println!("cargo:rustc-link-lib=stdc++");
println!("cargo:rustc-link-lib=cudart");
}

View File

@@ -6,14 +6,15 @@ use icicle_core::curve::{Affine, Curve, Projective};
use icicle_core::field::{Field, MontgomeryConvertibleField};
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
pub(crate) const SCALAR_LIMBS: usize = 4;
pub(crate) const BASE_LIMBS: usize = 6;
pub(crate) const SCALAR_LIMBS: usize = 8;
pub(crate) const BASE_LIMBS: usize = 12;
#[cfg(feature = "g2")]
pub(crate) const G2_BASE_LIMBS: usize = 12;
pub(crate) const G2_BASE_LIMBS: usize = 24;
impl_scalar_field!("bls12_381", bls12_381_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
@@ -31,7 +32,7 @@ impl_curve!(
);
#[cfg(feature = "g2")]
impl_curve!(
"bls12_381G2",
"bls12_381_g2",
bls12_381_g2,
G2CurveCfg,
ScalarField,

View File

@@ -0,0 +1,23 @@
#![cfg(feature = "ec_ntt")]
use icicle_core::error::IcicleResult;
use icicle_core::impl_ecntt;
use icicle_core::ntt::{NTTConfig, NTTDir};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use icicle_cuda_runtime::error::CudaError;
use crate::curve::{CurveCfg, ScalarCfg, ScalarField};
use icicle_core::ecntt::Projective;
impl_ecntt!("bls12_381", bls12_381, ScalarField, ScalarCfg, CurveCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::{CurveCfg, ScalarField};
use icicle_core::ecntt::tests::*;
use icicle_core::impl_ecntt_tests;
use std::sync::OnceLock;
impl_ecntt_tests!(ScalarField, CurveCfg);
}

View File

@@ -1,6 +1,8 @@
pub mod curve;
pub mod ecntt;
pub mod msm;
pub mod ntt;
pub mod polynomials;
pub mod poseidon;
pub mod tree;
pub mod vec_ops;

View File

@@ -8,11 +8,15 @@ use icicle_core::{
msm::{MSMConfig, MSM},
traits::IcicleResultWrap,
};
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
use icicle_cuda_runtime::{
device_context::DeviceContext,
error::CudaError,
memory::{DeviceSlice, HostOrDeviceSlice},
};
impl_msm!("bls12_381", bls12_381, CurveCfg);
#[cfg(feature = "g2")]
impl_msm!("bls12_381G2", bls12_381_g2, G2CurveCfg);
impl_msm!("bls12_381_g2", bls12_381_g2, G2CurveCfg);
#[cfg(test)]
pub(crate) mod tests {

View File

@@ -1,11 +1,10 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_ntt;
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
use icicle_core::traits::IcicleResultWrap;
use icicle_core::{impl_ntt, impl_ntt_without_domain};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
@@ -14,9 +13,10 @@ impl_ntt!("bls12_381", bls12_381, ScalarField, ScalarCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use crate::ntt::DEFAULT_DEVICE_ID;
use icicle_core::impl_ntt_tests;
use icicle_core::ntt::tests::*;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use serial_test::{parallel, serial};
use std::sync::OnceLock;
impl_ntt_tests!(ScalarField);

View File

@@ -0,0 +1,10 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::impl_univariate_polynomial_api;
impl_univariate_polynomial_api!("bls12_381", bls12_381, ScalarField, ScalarCfg);
#[cfg(test)]
mod tests {
use icicle_core::impl_polynomial_tests;
impl_polynomial_tests!(bls12_381, ScalarField);
}

View File

@@ -11,11 +11,13 @@ repository.workspace = true
icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
ark-bn254 = { version = "0.4.0", optional = true }
criterion = "0.3"
[build-dependencies]
cmake = "0.1.50"
[dev-dependencies]
criterion = "0.3"
ark-bn254 = "0.4.0"
ark-std = "0.4.0"
ark-ff = "0.4.0"
@@ -23,6 +25,7 @@ ark-ec = "0.4.0"
ark-poly = "0.4.0"
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
icicle-bn254 = { path = ".", features = ["arkworks"] }
serial_test = "3.0.0"
[features]
default = []
@@ -30,3 +33,7 @@ g2 = ["icicle-core/g2"]
ec_ntt = ["icicle-core/ec_ntt"]
devmode = ["icicle-core/devmode"]
arkworks = ["ark-bn254", "icicle-core/arkworks"]
[[bench]]
name = "ecntt"
harness = false # Criterion provides own harness

View File

@@ -0,0 +1,10 @@
#[cfg(feature = "ec_ntt")]
use icicle_bn254::curve::{CurveCfg, ScalarField};
#[cfg(feature = "ec_ntt")]
use icicle_core::impl_ecntt_bench;
#[cfg(feature = "ec_ntt")]
impl_ecntt_bench!("bn254", ScalarField, CurveCfg);
#[cfg(not(feature = "ec_ntt"))]
fn main() {}

View File

@@ -5,30 +5,30 @@ fn main() {
println!("cargo:rerun-if-changed=../../../../icicle");
// Base config
let mut config = Config::new("../../../../icicle");
let mut config = Config::new("../../../../icicle/");
config
.define("BUILD_TESTS", "OFF")
.define("CURVE", "bn254")
.define("CMAKE_BUILD_TYPE", "Release");
// Optional Features
#[cfg(feature = "g2")]
config.define("G2_DEFINED", "ON");
config.define("G2", "ON");
#[cfg(feature = "ec_ntt")]
config.define("ECNTT_DEFINED", "ON");
config.define("ECNTT", "ON");
#[cfg(feature = "devmode")]
config.define("DEVMODE", "ON");
// Build
let out_dir = config
.build_target("icicle")
.build_target("icicle_curve")
.build();
println!("cargo:rustc-link-search={}/build", out_dir.display());
println!("cargo:rustc-link-search={}/build/lib/", out_dir.display());
println!("cargo:rustc-link-lib=ingo_bn254");
println!("cargo:rustc-link-lib=ingo_field_bn254");
println!("cargo:rustc-link-lib=ingo_curve_bn254");
println!("cargo:rustc-link-lib=stdc++");
println!("cargo:rustc-link-lib=cudart");
}

View File

@@ -6,14 +6,15 @@ use icicle_core::curve::{Affine, Curve, Projective};
use icicle_core::field::{Field, MontgomeryConvertibleField};
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
pub(crate) const SCALAR_LIMBS: usize = 4;
pub(crate) const BASE_LIMBS: usize = 4;
pub(crate) const SCALAR_LIMBS: usize = 8;
pub(crate) const BASE_LIMBS: usize = 8;
#[cfg(feature = "g2")]
pub(crate) const G2_BASE_LIMBS: usize = 8;
pub(crate) const G2_BASE_LIMBS: usize = 16;
impl_scalar_field!("bn254", bn254_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
@@ -31,7 +32,7 @@ impl_curve!(
);
#[cfg(feature = "g2")]
impl_curve!(
"bn254G2",
"bn254_g2",
bn254_g2,
G2CurveCfg,
ScalarField,

View File

@@ -0,0 +1,23 @@
#![cfg(feature = "ec_ntt")]
use icicle_core::error::IcicleResult;
use icicle_core::impl_ecntt;
use icicle_core::ntt::{NTTConfig, NTTDir};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use icicle_cuda_runtime::error::CudaError;
use crate::curve::{CurveCfg, ScalarCfg, ScalarField};
use icicle_core::ecntt::Projective;
impl_ecntt!("bn254", bn254, ScalarField, ScalarCfg, CurveCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::{CurveCfg, ScalarField};
use icicle_core::ecntt::tests::*;
use icicle_core::impl_ecntt_tests;
use std::sync::OnceLock;
impl_ecntt_tests!(ScalarField, CurveCfg);
}

View File

@@ -1,6 +1,8 @@
pub mod curve;
pub mod ecntt;
pub mod msm;
pub mod ntt;
pub mod polynomials;
pub mod poseidon;
pub mod tree;
pub mod vec_ops;

View File

@@ -8,11 +8,15 @@ use icicle_core::{
msm::{MSMConfig, MSM},
traits::IcicleResultWrap,
};
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
use icicle_cuda_runtime::{
device_context::DeviceContext,
error::CudaError,
memory::{DeviceSlice, HostOrDeviceSlice},
};
impl_msm!("bn254", bn254, CurveCfg);
#[cfg(feature = "g2")]
impl_msm!("bn254G2", bn254_g2, G2CurveCfg);
impl_msm!("bn254_g2", bn254_g2, G2CurveCfg);
#[cfg(test)]
pub(crate) mod tests {

View File

@@ -1,11 +1,10 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_ntt;
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
use icicle_core::traits::IcicleResultWrap;
use icicle_core::{impl_ntt, impl_ntt_without_domain};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
@@ -14,9 +13,10 @@ impl_ntt!("bn254", bn254, ScalarField, ScalarCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use crate::ntt::DEFAULT_DEVICE_ID;
use icicle_core::impl_ntt_tests;
use icicle_core::ntt::tests::*;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use serial_test::{parallel, serial};
use std::sync::OnceLock;
impl_ntt_tests!(ScalarField);

View File

@@ -0,0 +1,10 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::impl_univariate_polynomial_api;
impl_univariate_polynomial_api!("bn254", bn254, ScalarField, ScalarCfg);
#[cfg(test)]
mod tests {
use icicle_core::impl_polynomial_tests;
impl_polynomial_tests!(bn254, ScalarField);
}

View File

@@ -12,11 +12,13 @@ icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
icicle-bls12-377 = { path = "../../icicle-curves/icicle-bls12-377", features = ["bw6-761"] }
ark-bw6-761 = { version = "0.4.0", optional = true }
criterion = "0.3"
[build-dependencies]
cmake = "0.1.50"
[dev-dependencies]
criterion = "0.3"
ark-bw6-761 = "0.4.0"
ark-std = "0.4.0"
ark-ff = "0.4.0"
@@ -24,9 +26,10 @@ ark-ec = "0.4.0"
ark-poly = "0.4.0"
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
icicle-bw6-761 = { path = ".", features = ["arkworks"] }
serial_test = "3.0.0"
[features]
default = []
g2 = ["icicle-bls12-377/bw6-761-g2"]
devmode = ["icicle-core/devmode"]
arkworks = ["ark-bw6-761", "icicle-core/arkworks", "icicle-bls12-377/arkworks"]
arkworks = ["ark-bw6-761", "icicle-core/arkworks", "icicle-bls12-377/arkworks"]

View File

@@ -9,9 +9,8 @@ use icicle_core::traits::FieldConfig;
use icicle_core::{impl_curve, impl_field};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
pub(crate) const BASE_LIMBS: usize = 12;
pub(crate) const BASE_LIMBS: usize = 24;
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
pub type ScalarField = bls12_377BaseField;
@@ -27,7 +26,7 @@ impl_curve!(
);
#[cfg(feature = "g2")]
impl_curve!(
"bw6_761G2",
"bw6_761_g2",
bw6_761_g2,
G2CurveCfg,
ScalarField,

View File

@@ -8,11 +8,15 @@ use icicle_core::{
msm::{MSMConfig, MSM},
traits::IcicleResultWrap,
};
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
use icicle_cuda_runtime::{
device_context::DeviceContext,
error::CudaError,
memory::{DeviceSlice, HostOrDeviceSlice},
};
impl_msm!("bw6_761", bw6_761, CurveCfg);
#[cfg(feature = "g2")]
impl_msm!("bw6_761G2", bw6_761_g2, G2CurveCfg);
impl_msm!("bw6_761_g2", bw6_761_g2, G2CurveCfg);
#[cfg(test)]
pub(crate) mod tests {

View File

@@ -4,6 +4,7 @@ pub(crate) mod tests {
use icicle_core::impl_ntt_tests;
use icicle_core::ntt::tests::*;
use icicle_cuda_runtime::device_context::DEFAULT_DEVICE_ID;
use serial_test::{parallel, serial};
use std::sync::OnceLock;
impl_ntt_tests!(ScalarField);

View File

@@ -1,30 +0,0 @@
[package]
name = "icicle-<CURVE>"
version.workspace = true
edition.workspace = true
authors.workspace = true
description = "Rust wrapper for the CUDA implementation of <CURVE> elliptic curve by Ingonyama"
homepage.workspace = true
repository.workspace = true
[dependencies]
icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
ark-<CURVE> = { version = "0.4.0", optional = true }
[build-dependencies]
cmake = "0.1.50"
[dev-dependencies]
ark-<CURVE> = "0.4.0"
ark-std = "0.4.0"
ark-ff = "0.4.0"
ark-ec = "0.4.0"
ark-poly = "0.4.0"
icicle-core = { path = "../../icicle-core", features = ["arkworks"] }
icicle-<CURVE> = { path = ".", features = ["arkworks"] }
[features]
default = []
g2 = ["icicle-core/g2"]
arkworks = ["ark-<CURVE>", "icicle-core/arkworks"]

View File

@@ -1,28 +0,0 @@
use cmake::Config;
fn main() {
println!("cargo:rerun-if-env-changed=CXXFLAGS");
println!("cargo:rerun-if-changed=../../../../icicle");
// Base config
let mut config = Config::new("../../../../icicle");
config
.define("BUILD_TESTS", "OFF")
.define("CURVE", "<CURVE>")
.define("CMAKE_BUILD_TYPE", "Release");
// Optional Features
#[cfg(feature = "g2")]
config.define("G2_DEFINED", "ON");
// Build
let out_dir = config
.build_target("icicle")
.build();
println!("cargo:rustc-link-search={}/build", out_dir.display());
println!("cargo:rustc-link-lib=ingo_<CURVE>");
println!("cargo:rustc-link-lib=stdc++");
println!("cargo:rustc-link-lib=cudart");
}

View File

@@ -1,61 +0,0 @@
#[cfg(feature = "arkworks")]
use ark_<CURVE>::{g1::Config as ArkG1Config, Fq, Fr};
#[cfg(all(feature = "arkworks", feature = "g2"))]
use ark_<CURVE>::{g2::Config as ArkG2Config, Fq2};
use icicle_core::curve::{Affine, Curve, Projective};
use icicle_core::field::{Field, MontgomeryConvertibleField};
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
pub(crate) const SCALAR_LIMBS: usize = ;
pub(crate) const BASE_LIMBS: usize = ;
#[cfg(feature = "g2")]
pub(crate) const G2_BASE_LIMBS: usize = ;
impl_scalar_field!("<CURVE>", <CURVE>_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);
#[cfg(feature = "g2")]
impl_field!(G2_BASE_LIMBS, G2BaseField, G2BaseCfg, Fq2);
impl_curve!(
"<CURVE>",
<CURVE>,
CurveCfg,
ScalarField,
BaseField,
ArkG1Config,
G1Affine,
G1Projective
);
#[cfg(feature = "g2")]
impl_curve!(
"<CURVE>G2",
<CURVE>_g2,
G2CurveCfg,
ScalarField,
G2BaseField,
ArkG2Config,
G2Affine,
G2Projective
);
#[cfg(test)]
mod tests {
use super::{CurveCfg, ScalarField, BASE_LIMBS};
#[cfg(feature = "g2")]
use super::{G2CurveCfg, G2_BASE_LIMBS};
use icicle_core::curve::Curve;
use icicle_core::tests::*;
use icicle_core::traits::FieldImpl;
use icicle_core::{impl_curve_tests, impl_field_tests};
impl_field_tests!(ScalarField);
impl_curve_tests!(BASE_LIMBS, CurveCfg);
#[cfg(feature = "g2")]
mod g2 {
use super::*;
impl_curve_tests!(G2_BASE_LIMBS, G2CurveCfg);
}
}

View File

@@ -1,8 +0,0 @@
pub mod curve;
pub mod msm;
pub mod ntt;
pub mod poseidon;
pub mod tree;
pub mod vec_ops;
impl icicle_core::SNARKCurve for curve::CurveCfg {}

View File

@@ -1,31 +0,0 @@
use crate::curve::CurveCfg;
#[cfg(feature = "g2")]
use crate::curve::G2CurveCfg;
use icicle_core::{
curve::{Affine, Curve, Projective},
error::IcicleResult,
impl_msm,
msm::{MSMConfig, MSM},
traits::IcicleResultWrap,
};
use icicle_cuda_runtime::{error::CudaError, memory::HostOrDeviceSlice};
impl_msm!("<CURVE>", <CURVE>, CurveCfg);
#[cfg(feature = "g2")]
impl_msm!("<CURVE>G2", <CURVE>_g2, G2CurveCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::CurveCfg;
#[cfg(feature = "g2")]
use crate::curve::G2CurveCfg;
use icicle_core::impl_msm_tests;
use icicle_core::msm::tests::*;
impl_msm_tests!(CurveCfg);
#[cfg(feature = "g2")]
mod g2 {
use super::*;
impl_msm_tests!(G2CurveCfg);
}
}

View File

@@ -1,22 +0,0 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_ntt;
use icicle_core::ntt::{NTTConfig, NTTDir, NTT};
use icicle_core::traits::IcicleResultWrap;
use icicle_cuda_runtime::device_context::{DeviceContext, DEFAULT_DEVICE_ID};
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
impl_ntt!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use crate::ntt::DEFAULT_DEVICE_ID;
use icicle_core::impl_ntt_tests;
use icicle_core::ntt::tests::*;
use std::sync::OnceLock;
impl_ntt_tests!(ScalarField);
}

View File

@@ -1,22 +0,0 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_poseidon;
use icicle_core::poseidon::{Poseidon, PoseidonConfig, PoseidonConstants};
use icicle_core::traits::IcicleResultWrap;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use core::mem::MaybeUninit;
impl_poseidon!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use icicle_core::impl_poseidon_tests;
use icicle_core::poseidon::tests::*;
impl_poseidon_tests!(ScalarField);
}

View File

@@ -1,21 +0,0 @@
use crate::curve::{ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_tree_builder;
use icicle_core::poseidon::PoseidonConstants;
use icicle_core::traits::IcicleResultWrap;
use icicle_core::tree::{TreeBuilder, TreeBuilderConfig};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
impl_tree_builder!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use icicle_core::impl_tree_builder_tests;
use icicle_core::tree::tests::*;
impl_tree_builder_tests!(ScalarField);
}

View File

@@ -4,24 +4,22 @@ fn main() {
println!("cargo:rerun-if-env-changed=CXXFLAGS");
println!("cargo:rerun-if-changed=../../../../icicle");
// Base config
let mut config = Config::new("../../../../icicle");
config
.define("BUILD_TESTS", "OFF")
.define("CURVE", "grumpkin")
.define("CMAKE_BUILD_TYPE", "Release");
#[cfg(feature = "devmode")]
config.define("DEVMODE", "ON");
// Build
let out_dir = config
.build_target("icicle")
.build_target("icicle_curve")
.build();
println!("cargo:rustc-link-search={}/build", out_dir.display());
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
println!("cargo:rustc-link-lib=ingo_grumpkin");
println!("cargo:rustc-link-lib=ingo_field_grumpkin");
println!("cargo:rustc-link-lib=ingo_curve_grumpkin");
println!("cargo:rustc-link-lib=stdc++");
println!("cargo:rustc-link-lib=cudart");
}

View File

@@ -4,12 +4,13 @@ use icicle_core::curve::{Affine, Curve, Projective};
use icicle_core::field::{Field, MontgomeryConvertibleField};
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
use icicle_core::{impl_curve, impl_field, impl_scalar_field};
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
pub(crate) const SCALAR_LIMBS: usize = 4;
pub(crate) const BASE_LIMBS: usize = 4;
pub(crate) const SCALAR_LIMBS: usize = 8;
pub(crate) const BASE_LIMBS: usize = 8;
impl_scalar_field!("grumpkin", grumpkin_sf, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
impl_field!(BASE_LIMBS, BaseField, BaseCfg, Fq);

View File

@@ -6,7 +6,11 @@ use icicle_core::{
msm::{MSMConfig, MSM},
traits::IcicleResultWrap,
};
use icicle_cuda_runtime::{device_context::DeviceContext, error::CudaError, memory::HostOrDeviceSlice};
use icicle_cuda_runtime::{
device_context::DeviceContext,
error::CudaError,
memory::{DeviceSlice, HostOrDeviceSlice},
};
impl_msm!("grumpkin", grumpkin, CurveCfg);

View File

@@ -0,0 +1,28 @@
[package]
name = "icicle-babybear"
version.workspace = true
edition.workspace = true
authors.workspace = true
description = "Rust wrapper for the CUDA implementation of baby bear prime field by Ingonyama"
homepage.workspace = true
repository.workspace = true
[dependencies]
icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
[build-dependencies]
cmake = "0.1.50"
[dev-dependencies]
risc0-core = "0.21.0"
risc0-zkp = "0.21.0"
p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
p3-field = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
p3-dft = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
p3-matrix = { git = "https://github.com/Plonky3/Plonky3", rev = "83590121c8c28011cffa7e73cb71cf9bf94b8477" }
serial_test = "3.0.0"
[features]
default = []
devmode = ["icicle-core/devmode"]

View File

@@ -0,0 +1,24 @@
use cmake::Config;
fn main() {
println!("cargo:rerun-if-env-changed=CXXFLAGS");
println!("cargo:rerun-if-changed=../../../../icicle");
// Base config
let mut config = Config::new("../../../../icicle/");
config
.define("FIELD", "babybear")
.define("CMAKE_BUILD_TYPE", "Release")
.define("EXT_FIELD", "ON");
// Build
let out_dir = config
.build_target("icicle_field")
.build();
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
println!("cargo:rustc-link-lib=ingo_field_babybear");
println!("cargo:rustc-link-lib=stdc++");
println!("cargo:rustc-link-lib=cudart");
}

View File

@@ -0,0 +1,34 @@
use icicle_core::field::{Field, MontgomeryConvertibleField};
use icicle_core::traits::{FieldConfig, FieldImpl, GenerateRandom};
use icicle_core::{impl_field, impl_scalar_field};
use icicle_cuda_runtime::device::check_device;
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::{DeviceSlice, HostOrDeviceSlice};
pub(crate) const SCALAR_LIMBS: usize = 1;
pub(crate) const EXTENSION_LIMBS: usize = 4;
impl_scalar_field!("babybear", babybear, SCALAR_LIMBS, ScalarField, ScalarCfg, Fr);
impl_scalar_field!(
"babybear_extension",
babybear_extension,
EXTENSION_LIMBS,
ExtensionField,
ExtensionCfg,
Fr
);
#[cfg(test)]
mod tests {
use super::{ExtensionField, ScalarField};
use icicle_core::impl_field_tests;
use icicle_core::tests::*;
impl_field_tests!(ScalarField);
mod extension {
use super::*;
impl_field_tests!(ExtensionField);
}
}

View File

@@ -0,0 +1,4 @@
pub mod field;
pub mod ntt;
pub mod polynomials;
pub mod vec_ops;

View File

@@ -0,0 +1,174 @@
use crate::field::{ExtensionField, ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::ntt::{NTTConfig, NTTDir, NTTDomain, NTT};
use icicle_core::traits::IcicleResultWrap;
use icicle_core::{impl_ntt, impl_ntt_without_domain};
use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
impl_ntt!("babybear", babybear, ScalarField, ScalarCfg);
impl_ntt_without_domain!(
"babybear_extension",
ScalarField,
ScalarCfg,
NTT,
"_ntt",
ExtensionField
);
#[cfg(test)]
pub(crate) mod tests {
use super::{ExtensionField, ScalarField};
use icicle_core::{
ntt::{initialize_domain, ntt_inplace, release_domain, NTTConfig, NTTDir},
traits::{FieldImpl, GenerateRandom},
};
use icicle_cuda_runtime::{device_context::DeviceContext, memory::HostSlice};
use p3_baby_bear::BabyBear;
use p3_dft::{Radix2Dit, TwoAdicSubgroupDft};
use p3_field::{
extension::BinomialExtensionField, AbstractExtensionField, AbstractField, PrimeField32, TwoAdicField,
};
use p3_matrix::dense::RowMajorMatrix;
use risc0_core::field::{
baby_bear::{Elem, ExtElem},
Elem as FieldElem, RootsOfUnity,
};
use serial_test::serial;
// Note that risc0 and plonky3 tests shouldn't be ran simultaneously in parallel as they use different roots of unity
#[test]
#[serial]
fn test_against_risc0() {
let log_sizes = [15, 20];
let ctx = DeviceContext::default();
let risc0_rou = Elem::ROU_FWD[log_sizes[1]];
initialize_domain(ScalarField::from([risc0_rou.as_u32()]), &ctx, false).unwrap();
for log_size in log_sizes {
let ntt_size = 1 << log_size;
let mut scalars: Vec<ScalarField> = <ScalarField as FieldImpl>::Config::generate_random(ntt_size);
let mut scalars_risc0: Vec<Elem> = scalars
.iter()
.map(|x| Elem::new(Into::<[u32; 1]>::into(*x)[0]))
.collect();
let ntt_cfg: NTTConfig<'_, ScalarField> = NTTConfig::default();
ntt_inplace(HostSlice::from_mut_slice(&mut scalars[..]), NTTDir::kForward, &ntt_cfg).unwrap();
risc0_zkp::core::ntt::bit_reverse(&mut scalars_risc0[..]);
risc0_zkp::core::ntt::evaluate_ntt::<Elem, Elem>(&mut scalars_risc0[..], ntt_size);
for (s1, s2) in scalars
.iter()
.zip(scalars_risc0)
{
assert_eq!(Into::<[u32; 1]>::into(*s1)[0], s2.as_u32());
}
let mut ext_scalars: Vec<ExtensionField> = <ExtensionField as FieldImpl>::Config::generate_random(ntt_size);
let mut ext_scalars_risc0: Vec<ExtElem> = ext_scalars
.iter()
.map(|x| ExtElem::from_u32_words(&Into::<[u32; 4]>::into(*x)[..]))
.collect();
ntt_inplace(
HostSlice::from_mut_slice(&mut ext_scalars[..]),
NTTDir::kForward,
&ntt_cfg,
)
.unwrap();
risc0_zkp::core::ntt::bit_reverse(&mut ext_scalars_risc0[..]);
risc0_zkp::core::ntt::evaluate_ntt::<Elem, ExtElem>(&mut ext_scalars_risc0[..], ntt_size);
for (s1, s2) in ext_scalars
.iter()
.zip(ext_scalars_risc0)
{
assert_eq!(Into::<[u32; 4]>::into(*s1)[..], s2.to_u32_words()[..]);
}
}
release_domain::<ScalarField>(&ctx).unwrap();
}
#[test]
#[serial]
fn test_against_plonky3() {
let log_ncols = [15, 18];
let nrows = 4;
let ctx = DeviceContext::default();
let plonky3_rou = BabyBear::two_adic_generator(log_ncols[1]);
// To compute FFTs using icicle, we first need to initialize it using plonky3's "two adic generator"
initialize_domain(ScalarField::from([plonky3_rou.as_canonical_u32()]), &ctx, false).unwrap();
for log_ncol in log_ncols {
let ntt_size = 1 << log_ncol;
let mut scalars: Vec<ScalarField> = <ScalarField as FieldImpl>::Config::generate_random(nrows * ntt_size);
let scalars_p3: Vec<BabyBear> = scalars
.iter()
.map(|x| BabyBear::from_wrapped_u32(Into::<[u32; 1]>::into(*x)[0]))
.collect();
let matrix_p3 = RowMajorMatrix::new(scalars_p3, nrows);
let mut ntt_cfg: NTTConfig<'_, ScalarField> = NTTConfig::default();
// Next two lines signalize that we want to compute `nrows` FFTs in column-ordered fashion
ntt_cfg.batch_size = nrows as i32;
ntt_cfg.columns_batch = true;
ntt_inplace(HostSlice::from_mut_slice(&mut scalars[..]), NTTDir::kForward, &ntt_cfg).unwrap();
let result_p3 = Radix2Dit.dft_batch(matrix_p3);
for i in 0..nrows {
for j in 0..ntt_size {
assert_eq!(
Into::<[u32; 1]>::into(scalars[i + j * nrows])[0],
result_p3.values[i + j * nrows].as_canonical_u32()
);
}
}
type Plonky3Extension = BinomialExtensionField<BabyBear, 4>;
let mut ext_scalars: Vec<ExtensionField> =
<ExtensionField as FieldImpl>::Config::generate_random(nrows * ntt_size);
let ext_scalars_p3: Vec<Plonky3Extension> = ext_scalars
.iter()
.map(|x| {
let arr: [u32; 4] = (*x).into();
Plonky3Extension::from_base_slice(
&(arr
.iter()
.map(|y| BabyBear::from_wrapped_u32(*y))
.collect::<Vec<BabyBear>>())[..],
)
})
.collect();
let ext_matrix_p3 = RowMajorMatrix::new(ext_scalars_p3, nrows);
ntt_inplace(
HostSlice::from_mut_slice(&mut ext_scalars[..]),
NTTDir::kForward,
&ntt_cfg,
)
.unwrap();
let ext_result_p3 = Radix2Dit.dft_batch(ext_matrix_p3);
for i in 0..nrows {
for j in 0..ntt_size {
let arr: [u32; 4] = ext_scalars[i + j * nrows].into();
let base_slice: &[BabyBear] = ext_result_p3.values[i + j * nrows].as_base_slice();
for k in 0..4 {
assert_eq!(arr[k], base_slice[k].as_canonical_u32());
}
}
}
}
release_domain::<ScalarField>(&ctx).unwrap();
}
}

View File

@@ -0,0 +1,10 @@
use crate::field::{ScalarCfg, ScalarField};
use icicle_core::impl_univariate_polynomial_api;
impl_univariate_polynomial_api!("babybear", babybear, ScalarField, ScalarCfg);
#[cfg(test)]
mod tests {
use icicle_core::impl_polynomial_tests;
impl_polynomial_tests!(babybear, ScalarField);
}

View File

@@ -1,4 +1,4 @@
use crate::curve::{ScalarCfg, ScalarField};
use crate::field::{ExtensionCfg, ExtensionField, ScalarCfg, ScalarField};
use icicle_core::error::IcicleResult;
use icicle_core::impl_vec_ops_field;
@@ -8,13 +8,19 @@ use icicle_cuda_runtime::device_context::DeviceContext;
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::memory::HostOrDeviceSlice;
impl_vec_ops_field!("<CURVE>", <CURVE>, ScalarField, ScalarCfg);
impl_vec_ops_field!("babybear", babybear, ScalarField, ScalarCfg);
impl_vec_ops_field!("babybear_extension", babybear_extension, ExtensionField, ExtensionCfg);
#[cfg(test)]
pub(crate) mod tests {
use crate::curve::ScalarField;
use crate::field::{ExtensionField, ScalarField};
use icicle_core::impl_vec_add_tests;
use icicle_core::vec_ops::tests::*;
impl_vec_add_tests!(ScalarField);
mod extension {
use super::*;
impl_vec_add_tests!(ExtensionField);
}
}

View File

@@ -0,0 +1,18 @@
[package]
name = "icicle-hash"
version.workspace = true
edition.workspace = true
authors.workspace = true
description = "Rust wrapper for the CUDA implementation of hash functions by Ingonyama"
homepage.workspace = true
repository.workspace = true
[dependencies]
icicle-core = { workspace = true }
icicle-cuda-runtime = { workspace = true }
[build-dependencies]
cmake = "0.1.50"
[features]
default = []

View File

@@ -0,0 +1,21 @@
use cmake::Config;
fn main() {
println!("cargo:rerun-if-env-changed=CXXFLAGS");
println!("cargo:rerun-if-changed=../../../icicle");
// Base config
let mut config = Config::new("../../../icicle/");
config.define("CMAKE_BUILD_TYPE", "Release");
config.define("BUILD_HASH", "ON");
// Build
let out_dir = config
.build_target("icicle_hash")
.build();
println!("cargo:rustc-link-search={}/build/lib", out_dir.display());
println!("cargo:rustc-link-lib=ingo_hash");
println!("cargo:rustc-link-lib=stdc++");
println!("cargo:rustc-link-lib=cudart");
}

View File

@@ -0,0 +1,102 @@
use icicle_cuda_runtime::error::CudaError;
use icicle_cuda_runtime::{
device_context::{DeviceContext, DEFAULT_DEVICE_ID},
memory::HostOrDeviceSlice,
};
use icicle_core::error::IcicleResult;
use icicle_core::traits::IcicleResultWrap;
pub mod tests;
#[repr(C)]
#[derive(Debug, Clone)]
pub struct KeccakConfig<'a> {
/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
pub ctx: DeviceContext<'a>,
/// True if inputs are on device and false if they're on host. Default value: false.
are_inputs_on_device: bool,
/// If true, output is preserved on device, otherwise on host. Default value: false.
are_outputs_on_device: bool,
/// Whether to run the Keccak asynchronously. If set to `true`, the keccak_hash function will be
/// non-blocking and you'd need to synchronize it explicitly by running
/// `cudaStreamSynchronize` or `cudaDeviceSynchronize`. If set to false, keccak_hash
/// function will block the current CPU thread.
is_async: bool,
}
impl<'a> Default for KeccakConfig<'a> {
fn default() -> Self {
Self::default_for_device(DEFAULT_DEVICE_ID)
}
}
impl<'a> KeccakConfig<'a> {
pub fn default_for_device(device_id: usize) -> Self {
KeccakConfig {
ctx: DeviceContext::default_for_device(device_id),
are_inputs_on_device: false,
are_outputs_on_device: false,
is_async: false,
}
}
}
extern "C" {
pub(crate) fn keccak256_cuda(
input: *const u8,
input_block_size: i32,
number_of_blocks: i32,
output: *mut u8,
config: KeccakConfig,
) -> CudaError;
pub(crate) fn keccak512_cuda(
input: *const u8,
input_block_size: i32,
number_of_blocks: i32,
output: *mut u8,
config: KeccakConfig,
) -> CudaError;
}
pub fn keccak256(
input: &(impl HostOrDeviceSlice<u8> + ?Sized),
input_block_size: i32,
number_of_blocks: i32,
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
config: KeccakConfig,
) -> IcicleResult<()> {
unsafe {
keccak256_cuda(
input.as_ptr(),
input_block_size,
number_of_blocks,
output.as_mut_ptr(),
config,
)
.wrap()
}
}
pub fn keccak512(
input: &(impl HostOrDeviceSlice<u8> + ?Sized),
input_block_size: i32,
number_of_blocks: i32,
output: &mut (impl HostOrDeviceSlice<u8> + ?Sized),
config: KeccakConfig,
) -> IcicleResult<()> {
unsafe {
keccak512_cuda(
input.as_ptr(),
input_block_size,
number_of_blocks,
output.as_mut_ptr(),
config,
)
.wrap()
}
}

View File

@@ -0,0 +1 @@

View File

@@ -0,0 +1 @@
pub mod keccak;