clean the code

small fix
bug fix
2026-01-12 17:07:59 -05:00 · 2023-05-28 14:10:29 +03:00 · 2023-05-28 10:31:05 +03:00 · 2023-05-24 16:46:03 +03:00 · 2023-05-24 14:56:04 +03:00 · 2023-05-24 14:26:19 +03:00
145 changed files with 13108 additions and 18248 deletions
--- a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
+++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md
@@ -4,4 +4,4 @@ This PR...

 ## Linked Issues

-Resolves #
+Closes #
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -2,9 +2,7 @@ name: Build

 on: 
  pull_request:
-    branches:
-      - "main"
-      - "dev"
+    branches: [ "main" ]
    paths:
      - "icicle/**"
      - "src/**"
@@ -14,7 +12,6 @@ on:
 env:
  CARGO_TERM_COLOR: always
  ARCH_TYPE: sm_70
-  DEFAULT_STREAM: per-thread

 jobs:
  build-linux:
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,45 @@
-[workspace]
-name = "icicle"
+[package]
+name = "icicle-utils"
 version = "0.1.0"
 edition = "2021"
+authors = [ "Ingonyama" ]
+description = "An implementation of the Ingonyama CUDA Library"
+homepage = "https://www.ingonyama.com"
+repository = "https://github.com/ingonyama-zk/icicle"

-# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+[[bench]]
+name = "ntt"
+path = "benches/ntt.rs"
+harness = false

-members = ["icicle-core", "bls12-381", "bls12-377", "bn254"]
+[[bench]]
+name = "msm"
+path = "benches/msm.rs"
+harness = false

+[dependencies] 
+hex = "*"
+ark-std = "0.3.0"
+ark-ff = "0.3.0"
+ark-poly = "0.3.0"
+ark-ec = { version = "0.3.0", features = [ "parallel" ] }
+ark-bls12-381 = "0.3.0"
+ark-bls12-377 = "0.3.0"
+ark-bn254 = "0.3.0"
+
+rustacuda = "0.1"
+rustacuda_core = "0.1"
+rustacuda_derive = "0.1"
+
+rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
+
+[build-dependencies]
+cc = { version = "1.0", features = ["parallel"] }
+
+[dev-dependencies]
+"criterion" = "0.4.0"
+
+[features]
+default = ["bls12_381"]
+bls12_381 = ["ark-bls12-381/curve"]
+g2 = []
--- a/benches/msm.rs
+++ b/benches/msm.rs
@@ -0,0 +1,50 @@
+extern crate criterion;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use icicle_utils::{set_up_scalars, generate_random_points, commit_batch, get_rng, field::BaseField};
+#[cfg(feature = "g2")]
+use icicle_utils::{commit_batch_g2, field::ExtensionField};
+
+use rustacuda::prelude::*;
+
+
+const LOG_MSM_SIZES: [usize; 1] = [12];
+const BATCH_SIZES: [usize; 2] = [128, 256];
+
+fn bench_msm(c: &mut Criterion) {
+    let mut group = c.benchmark_group("MSM");
+    for log_msm_size in LOG_MSM_SIZES {
+        for batch_size in BATCH_SIZES {
+            let msm_size = 1 << log_msm_size;
+            let (scalars, _, _) = set_up_scalars(msm_size, 0, false);
+            let batch_scalars = vec![scalars; batch_size].concat();
+            let mut d_scalars = DeviceBuffer::from_slice(&batch_scalars[..]).unwrap();
+
+            let points = generate_random_points::<BaseField>(msm_size, get_rng(None));
+            let batch_points = vec![points; batch_size].concat();
+            let mut d_points = DeviceBuffer::from_slice(&batch_points[..]).unwrap();
+
+            #[cfg(feature = "g2")]
+            let g2_points = generate_random_points::<ExtensionField>(msm_size, get_rng(None));
+            #[cfg(feature = "g2")]
+            let g2_batch_points = vec![g2_points; batch_size].concat();
+            #[cfg(feature = "g2")]
+            let mut d_g2_points = DeviceBuffer::from_slice(&g2_batch_points[..]).unwrap();
+
+            group.sample_size(30).bench_function(
+                &format!("MSM of size 2^{} in batch {}", log_msm_size, batch_size),
+                |b| b.iter(|| commit_batch(&mut d_points, &mut d_scalars, batch_size))
+            );
+
+            #[cfg(feature = "g2")]
+            group.sample_size(10).bench_function(
+                &format!("G2 MSM of size 2^{} in batch {}", log_msm_size, batch_size),
+                |b| b.iter(|| commit_batch_g2(&mut d_g2_points, &mut d_scalars, batch_size))
+            );
+        }
+    }
+}
+
+criterion_group!(msm_benches, bench_msm);
+criterion_main!(msm_benches);
--- a/benches/ntt.rs
+++ b/benches/ntt.rs
@@ -0,0 +1,33 @@
+extern crate criterion;
+
+use criterion::{criterion_group, criterion_main, Criterion};
+
+use icicle_utils::{interpolate_scalars_batch, interpolate_points_batch, set_up_scalars, set_up_points};
+
+
+const LOG_NTT_SIZES: [usize; 1] = [15];
+const BATCH_SIZES: [usize; 2] = [8, 16];
+
+fn bench_ntt(c: &mut Criterion) {
+    let mut group = c.benchmark_group("NTT");
+    for log_ntt_size in LOG_NTT_SIZES {
+        for batch_size in BATCH_SIZES {
+            let ntt_size = 1 << log_ntt_size;
+            let (_, mut d_evals, mut d_domain) = set_up_scalars(ntt_size * batch_size, log_ntt_size, true);
+            let (_, mut d_points_evals, _) = set_up_points(ntt_size * batch_size, log_ntt_size, true);
+
+            group.sample_size(100).bench_function(
+                &format!("Scalar NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
+                |b| b.iter(|| interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size))
+            );
+
+            group.sample_size(10).bench_function(
+                &format!("EC NTT of size 2^{} in batch {}", log_ntt_size, batch_size),
+                |b| b.iter(|| interpolate_points_batch(&mut d_points_evals, &mut d_domain, batch_size))
+            );
+        }
+    }
+}
+
+criterion_group!(ntt_benches, bench_ntt);
+criterion_main!(ntt_benches);
--- a/bls12-377/Cargo.toml
+++ b/bls12-377/Cargo.toml
@@ -1,34 +0,0 @@
-[package]
-name = "bls12-377"
-version = "0.1.0"
-edition = "2021"
-authors = [ "Ingonyama" ]
-
-[dependencies]
-icicle-core = { path = "../icicle-core" }
-
-hex = "*"
-ark-std = "0.3.0"
-ark-ff = "0.3.0"
-ark-poly = "0.3.0"
-ark-ec = { version = "0.3.0", features = [ "parallel" ] }
-ark-bls12-377 = "0.3.0"
-
-serde = { version = "1.0", features = ["derive"] }
-serde_derive = "1.0"
-serde_cbor = "0.11.2"
-
-rustacuda = "0.1"
-rustacuda_core = "0.1"
-rustacuda_derive = "0.1"
-
-rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
-
-[build-dependencies]
-cc = { version = "1.0", features = ["parallel"] }
-
-[dev-dependencies]
-"criterion" = "0.4.0"
-
-[features]
-g2 = []
--- a/bls12-377/src/basic_structs/field.rs
+++ b/bls12-377/src/basic_structs/field.rs
@@ -1,4 +0,0 @@
-pub trait Field<const NUM_LIMBS: usize> {
-    const MODOLUS: [u32;NUM_LIMBS];
-    const LIMBS: usize = NUM_LIMBS;
-}
--- a/bls12-377/src/basic_structs/mod.rs
+++ b/bls12-377/src/basic_structs/mod.rs
@@ -1,3 +0,0 @@
-pub mod field; 
-pub mod scalar; 
-pub mod point; 
--- a/bls12-377/src/basic_structs/point.rs
+++ b/bls12-377/src/basic_structs/point.rs
@@ -1,106 +0,0 @@
-use std::ffi::c_uint;
-
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger256, PrimeField};
-use std::mem::transmute;
-use ark_ff::Field;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-
-use super::scalar::{get_fixed_limbs, self};
-
-
-#[derive(Debug, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointT<BF: scalar::ScalarTrait> {
-    pub x: BF,
-    pub y: BF,
-    pub z: BF,
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
-    fn default() -> Self {
-        PointT::zero()
-    }
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
-    pub fn zero() -> Self {
-        PointT {
-            x: BF::zero(),
-            y: BF::one(),
-            z: BF::zero(),
-        }
-    }
-
-    pub fn infinity() -> Self {
-        Self::zero()
-    }
-}
-
-#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointAffineNoInfinityT<BF> {
-    pub x: BF,
-    pub y: BF,
-}
-
-impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
-    fn default() -> Self {
-        PointAffineNoInfinityT {
-            x: BF::zero(),
-            y: BF::zero(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
-    ///From u32 limbs x,y
-    pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
-        PointAffineNoInfinityT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y)
-        }
-    }
-
-    pub fn limbs(&self) -> Vec<u32> {
-        [self.x.limbs(), self.y.limbs()].concat()
-    }
-
-    pub fn to_projective(&self) -> PointT<BF> {
-        PointT {
-            x: self.x,
-            y: self.y,
-            z: BF::one(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointT<BF>  {
-    pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
-        PointT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y),
-            z: BF::from_limbs(z)
-        }
-    }
-
-    pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
-        let l = value.len();
-        assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
-        PointT {
-            x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
-            y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
-            z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
-        }
-    }
-
-    pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
-        PointAffineNoInfinityT {
-            x: self.x,
-            y: self.y,
-        }
-    }
-}
--- a/bls12-377/src/basic_structs/scalar.rs
+++ b/bls12-377/src/basic_structs/scalar.rs
@@ -1,102 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-use std::mem::transmute;
-use rustacuda::prelude::*;
-use rustacuda_core::DevicePointer;
-use rustacuda::memory::{DeviceBox, CopyDestination};
-
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use std::marker::PhantomData;
-use std::convert::TryInto;
-
-use super::field::{Field, self};
-
-pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
-    match val.len() {
-        n if n < NUM_LIMBS => {
-            let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
-            padded[..val.len()].copy_from_slice(&val);
-            padded
-        }
-        n if n == NUM_LIMBS => val.try_into().unwrap(),
-        _ => panic!("slice has too many elements"),
-    }
-}
-
-pub trait ScalarTrait{
-    fn base_limbs() -> usize;
-    fn zero() -> Self;
-    fn from_limbs(value: &[u32]) -> Self;
-    fn one() -> Self;
-    fn to_bytes_le(&self) -> Vec<u8>;
-    fn limbs(&self) -> &[u32];
-}
-
-#[derive(Debug, PartialEq, Clone, Copy)]
-#[repr(C)]
-pub struct ScalarT<M, const NUM_LIMBS: usize> {
-    pub(crate) phantom: PhantomData<M>,
-    pub(crate) value : [u32; NUM_LIMBS]
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
-where
-    M: Field<NUM_LIMBS>,
-{
-
-    fn base_limbs() -> usize {
-        return NUM_LIMBS; 
-    }
-
-    fn zero() -> Self {
-        ScalarT {
-            value: [0u32; NUM_LIMBS],
-            phantom: PhantomData,
-        }
-    }
-
-    fn from_limbs(value: &[u32]) -> Self {
-        Self {
-            value: get_fixed_limbs(value),
-            phantom: PhantomData,
-        }
-    }
-
-    fn one() -> Self {
-        let mut s = [0u32; NUM_LIMBS];
-        s[0] = 1;
-        ScalarT { value: s, phantom: PhantomData }
-    }
-
-    fn to_bytes_le(&self) -> Vec<u8> {
-        self.value
-            .iter()
-            .map(|s| s.to_le_bytes().to_vec())
-            .flatten()
-            .collect::<Vec<_>>()
-    }
-
-    fn limbs(&self) -> &[u32] {
-        &self.value
-    }
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
-    pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-        Self::from_limbs(value)
-     }
- 
-    pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-         let mut value = value.to_vec();
-         value.reverse();
-         Self::from_limbs_le(&value)
-     }
- 
-     // Additional Functions
-     pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{  // overload + 
-         return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData }; 
-     }
-}
--- a/bls12-377/src/curve_structs.rs
+++ b/bls12-377/src/curve_structs.rs
@@ -1,62 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda_derive::DeviceCopy;
-use std::mem::transmute;
-use rustacuda::prelude::*;
-use rustacuda_core::DevicePointer;
-use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
-
-use std::marker::PhantomData;
-use std::convert::TryInto;
-
-use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
-use crate::basic_structs::scalar::ScalarT;
-use crate::basic_structs::field::Field;
-
-
-#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
-#[repr(C)]
-pub struct ScalarField;
-impl Field<8> for ScalarField {
-    const MODOLUS: [u32; 8] = [0x0;8];
-}
-
-#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
-#[repr(C)]
-pub struct BaseField;
-impl Field<12> for BaseField {
-    const MODOLUS: [u32; 12] = [0x0;12];
-}
-
-
-pub type Scalar = ScalarT<ScalarField,8>;
-impl Default for Scalar {
-    fn default() -> Self {
-        Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
-    }
-}
-
-unsafe impl DeviceCopy for Scalar{}
-
-
-pub type Base = ScalarT<BaseField,12>;
-impl Default for Base {
-    fn default() -> Self {
-        Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
-    }
-}
-
-unsafe impl DeviceCopy for Base{}
-
-pub type Point = PointT<Base>;
-pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
-
-extern "C" {
-    fn eq(point1: *const Point, point2: *const Point) -> c_uint;
-}
-
-impl PartialEq for Point {
-    fn eq(&self, other: &Self) -> bool {
-        unsafe { eq(self, other) != 0 }
-    }
-}
--- a/bls12-377/src/from_cuda.rs
+++ b/bls12-377/src/from_cuda.rs
@@ -1,798 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use ark_std::UniformRand;
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda::CudaFlags;
-use rustacuda::memory::DeviceBox;
-use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
-use rustacuda_core::DevicePointer;
-use std::mem::transmute;
-use crate::basic_structs::scalar::ScalarTrait;
-use crate::curve_structs::*;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-use std::marker::PhantomData;
-use std::convert::TryInto;
-use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger384, BigInteger256, PrimeField};
-use rustacuda::memory::{CopyDestination, DeviceCopy};
-
-extern "C" {
-    fn msm_cuda(
-        out: *mut Point,
-        points: *const PointAffineNoInfinity,
-        scalars: *const Scalar,
-        count: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn msm_batch_cuda(
-        out: *mut Point,
-        points: *const PointAffineNoInfinity,
-        scalars: *const Scalar,
-        batch_size: usize,
-        msm_size: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn commit_cuda(
-        d_out: DevicePointer<Point>,
-        d_scalars: DevicePointer<Scalar>,
-        d_points: DevicePointer<PointAffineNoInfinity>,
-        count: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn commit_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_scalars: DevicePointer<Scalar>,
-        d_points: DevicePointer<PointAffineNoInfinity>,
-        count: usize,
-        batch_size: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
-
-    fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
-
-    fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
-
-    fn ntt_batch_cuda(
-        inout: *mut Scalar,
-        arr_size: usize,
-        n: usize,
-        inverse: bool,
-    ) -> c_int;
-
-    fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
-
-    fn interpolate_scalars_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_evaluations: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>, 
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_scalars_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_evaluations: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_points_cuda(
-        d_out: DevicePointer<Point>,
-        d_evaluations: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_points_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_evaluations: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_on_coset_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_on_coset_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_on_coset_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_on_coset_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_scalars_cuda(
-        d_arr: DevicePointer<Scalar>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_scalars_batch_cuda(
-        d_arr: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_points_cuda(
-        d_arr: DevicePointer<Point>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_points_batch_cuda(
-        d_arr: DevicePointer<Point>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn vec_mod_mult_point(
-        inout: *mut Point,
-        scalars: *const Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-
-    fn vec_mod_mult_scalar(
-        inout: *mut Scalar,
-        scalars: *const Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-
-    fn matrix_vec_mod_mult(
-        matrix_flattened: *const Scalar,
-        input: *const Scalar,
-        output: *mut Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-}
-
-pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
-    let count = points.len();
-    if count != scalars.len() {
-        todo!("variable length")
-    }
-
-    let mut ret = Point::zero();
-    unsafe {
-        msm_cuda(
-            &mut ret as *mut _ as *mut Point,
-            points as *const _ as *const PointAffineNoInfinity,
-            scalars as *const _ as *const Scalar,
-            scalars.len(),
-            device_id,
-        )
-    };
-
-    ret
-}
-
-pub fn msm_batch(
-    points: &[PointAffineNoInfinity],
-    scalars: &[Scalar],
-    batch_size: usize,
-    device_id: usize,
-) -> Vec<Point> {
-    let count = points.len();
-    if count != scalars.len() {
-        todo!("variable length")
-    }
-
-    let mut ret = vec![Point::zero(); batch_size];
-
-    unsafe {
-        msm_batch_cuda(
-            &mut ret[0] as *mut _ as *mut Point,
-            points as *const _ as *const PointAffineNoInfinity,
-            scalars as *const _ as *const Scalar,
-            batch_size,
-            count / batch_size,
-            device_id,
-        )
-    };
-
-    ret
-}
-
-pub fn commit(
-    points: &mut DeviceBuffer<PointAffineNoInfinity>,
-    scalars: &mut DeviceBuffer<Scalar>,
-) -> DeviceBox<Point> {
-    let mut res = DeviceBox::new(&Point::zero()).unwrap();
-    unsafe {
-        commit_cuda(
-            res.as_device_ptr(),
-            scalars.as_device_ptr(),
-            points.as_device_ptr(),
-            scalars.len(),
-            0,
-        );
-    }
-    return res;
-}
-
-pub fn commit_batch(
-    points: &mut DeviceBuffer<PointAffineNoInfinity>,
-    scalars: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
-    unsafe {
-        commit_batch_cuda(
-            res.as_device_ptr(),
-            scalars.as_device_ptr(),
-            points.as_device_ptr(),
-            scalars.len() / batch_size,
-            batch_size,
-            0,
-        );
-    }
-    return res;
-}
-
-/// Compute an in-place NTT on the input data.
-fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
-    let ret_code = unsafe {
-        ntt_cuda(
-            values as *mut _ as *mut Scalar,
-            values.len(),
-            inverse,
-            device_id,
-        )
-    };
-    ret_code
-}
-
-pub fn ntt(values: &mut [Scalar], device_id: usize) {
-    ntt_internal(values, device_id, false);
-}
-
-pub fn intt(values: &mut [Scalar], device_id: usize) {
-    ntt_internal(values, device_id, true);
-}
-
-/// Compute an in-place NTT on the input data.
-fn ntt_internal_batch(
-    values: &mut [Scalar],
-    device_id: usize,
-    batch_size: usize,
-    inverse: bool,
-) -> i32 {
-    unsafe {
-        ntt_batch_cuda(
-            values as *mut _ as *mut Scalar,
-            values.len(),
-            batch_size,
-            inverse,
-        )
-    }
-}
-
-pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
-    ntt_internal_batch(values, 0, batch_size, false);
-}
-
-pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
-    ntt_internal_batch(values, 0, batch_size, true);
-}
-
-/// Compute an in-place ECNTT on the input data.
-fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
-    unsafe {
-        ecntt_cuda(
-            values as *mut _ as *mut Point,
-            values.len(),
-            inverse,
-            device_id,
-        )
-    }
-}
-
-pub fn ecntt(values: &mut [Point], device_id: usize) {
-    ecntt_internal(values, false, device_id);
-}
-
-/// Compute an in-place iECNTT on the input data.
-pub fn iecntt(values: &mut [Point], device_id: usize) {
-    ecntt_internal(values, true, device_id);
-}
-
-/// Compute an in-place ECNTT on the input data.
-fn ecntt_internal_batch(
-    values: &mut [Point],
-    device_id: usize,
-    batch_size: usize,
-    inverse: bool,
-) -> i32 {
-    unsafe {
-        ecntt_batch_cuda(
-            values as *mut _ as *mut Point,
-            values.len(),
-            batch_size,
-            inverse,
-        )
-    }
-}
-
-pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
-    ecntt_internal_batch(values, 0, batch_size, false);
-}
-
-/// Compute an in-place iECNTT on the input data.
-pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
-    ecntt_internal_batch(values, 0, batch_size, true);
-}
-
-pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
-    unsafe {
-        DeviceBuffer::from_raw_parts(build_domain_cuda(
-            domain_size,
-            logn,
-            inverse,
-            0
-        ), domain_size)
-    }
-}
-
-
-pub fn reverse_order_scalars(
-    d_scalars: &mut DeviceBuffer<Scalar>,
-) {
-    unsafe { reverse_order_scalars_cuda(
-        d_scalars.as_device_ptr(),
-        d_scalars.len(),
-        0
-    ); }
-}
-
-pub fn reverse_order_scalars_batch(
-    d_scalars: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) {
-    unsafe { reverse_order_scalars_batch_cuda(
-        d_scalars.as_device_ptr(),
-        d_scalars.len() / batch_size,
-        batch_size,
-        0
-    ); }
-}
-
-pub fn reverse_order_points(
-    d_points: &mut DeviceBuffer<Point>,
-) {
-    unsafe { reverse_order_points_cuda(
-        d_points.as_device_ptr(),
-        d_points.len(),
-        0
-    ); }
-}
-
-pub fn reverse_order_points_batch(
-    d_points: &mut DeviceBuffer<Point>,
-    batch_size: usize,
-) {
-    unsafe { reverse_order_points_batch_cuda(
-        d_points.as_device_ptr(),
-        d_points.len() / batch_size,
-        batch_size,
-        0
-    ); }
-}
-
-pub fn interpolate_scalars(
-    d_evaluations: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe { interpolate_scalars_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_scalars_batch(
-    d_evaluations: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe { interpolate_scalars_batch_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        batch_size,
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_points(
-    d_evaluations: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe { interpolate_points_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_points_batch(
-    d_evaluations: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe { interpolate_points_batch_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        batch_size,
-        0
-    ) };
-    return res;
-}
-
-pub fn evaluate_scalars(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_scalars_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_batch(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_scalars_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_points_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_batch(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_points_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_on_coset(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_scalars_on_coset_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_on_coset_batch(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_scalars_on_coset_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_on_coset(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_points_on_coset_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_on_coset_batch(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_points_on_coset_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
-    assert_eq!(a.len(), b.len());
-    unsafe {
-        vec_mod_mult_point(
-            a as *mut _ as *mut Point,
-            b as *const _ as *const Scalar,
-            a.len(),
-            device_id,
-        );
-    }
-}
-
-pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
-    assert_eq!(a.len(), b.len());
-    unsafe {
-        vec_mod_mult_scalar(
-            a as *mut _ as *mut Scalar,
-            b as *const _ as *const Scalar,
-            a.len(),
-            device_id,
-        );
-    }
-}
-
-// Multiply a matrix by a scalar:
-//  `a` - flattenned matrix;
-//  `b` - vector to multiply `a` by;
-pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
-    let mut c = Vec::with_capacity(b.len());
-    for i in 0..b.len() {
-        c.push(Scalar::zero());
-    }
-    unsafe {
-        matrix_vec_mod_mult(
-            a as *const _ as *const Scalar,
-            b as *const _ as *const Scalar,
-            c.as_mut_slice() as *mut _ as *mut Scalar,
-            b.len(),
-            device_id,
-        );
-    }
-    c
-}
-
-pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
-    let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
-    unsafe { buf_cpy.copy_from(buf) };
-    return buf_cpy;
-}
-
-pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
-    let rng: Box<dyn RngCore> = match seed {
-        Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
-        None => Box::new(rand::thread_rng()),
-    };
-    rng
-}
-
-fn set_up_device() {
-    // Set up the context, load the module, and create a stream to run kernels in.
-    rustacuda::init(CudaFlags::empty()).unwrap();
-    let device = Device::get_device(0).unwrap();
-    let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
-}
-
-pub fn generate_random_points(
-    count: usize,
-    mut rng: Box<dyn RngCore>,
-) -> Vec<PointAffineNoInfinity> {
-    (0..count)
-        .map(|_| Point::from_ark(G1Projective_BLS12_377::rand(&mut rng)).to_xy_strip_z())
-        .collect()
-}
-
-pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
-    (0..count)
-        .map(|_| Point::from_ark(G1Projective_BLS12_377::rand(&mut rng)))
-        .collect()
-}
-
-pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
-    (0..count)
-        .map(|_| Scalar::from_ark(Fr_BLS12_377::rand(&mut rng).into_repr()))
-        .collect()
-}
-
-pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
-    set_up_device();
-
-    let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
-
-    let seed = Some(0); // fix the rng to get two equal scalar 
-    let vector = generate_random_points_proj(test_size, get_rng(seed));
-    let mut vector_mut = vector.clone();
-
-    let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
-    (vector_mut, d_vector, d_domain)
-}
-
-pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
-    set_up_device();
-
-    let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
-
-    let seed = Some(0); // fix the rng to get two equal scalars
-    let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
-
-    let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
-    (vector_mut, d_vector, d_domain)
-}
-
--- a/bls12-377/src/lib.rs
+++ b/bls12-377/src/lib.rs
@@ -1,4 +0,0 @@
-pub mod test_bls12_377;
-pub mod basic_structs;
-pub mod from_cuda;
-pub mod curve_structs;
--- a/bls12-377/src/test_bls12_377.rs
+++ b/bls12-377/src/test_bls12_377.rs
@@ -1,816 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use ark_std::UniformRand;
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda::CudaFlags;
-use rustacuda::memory::DeviceBox;
-use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
-use rustacuda_core::DevicePointer;
-use std::mem::transmute;
-pub use crate::basic_structs::scalar::ScalarTrait;
-pub use crate::curve_structs::*;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-use std::marker::PhantomData;
-use std::convert::TryInto;
-use ark_bls12_377::{Fq as Fq_BLS12_377, Fr as Fr_BLS12_377, G1Affine as G1Affine_BLS12_377, G1Projective as G1Projective_BLS12_377};
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger384, BigInteger256, PrimeField};
-use rustacuda::memory::{CopyDestination, DeviceCopy};
-
-
-impl Scalar {
-    pub fn to_biginteger254(&self) -> BigInteger256 {
-        BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn to_ark(&self) -> BigInteger256 {
-        BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn from_biginteger256(ark: BigInteger256) -> Self {
-        Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
-    }
-
-    pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
-        unsafe { transmute(*self) }
-    }
-
-    pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
-        Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
-    }
-
-    pub fn to_ark_transmute(&self) -> Fr_BLS12_377 {
-        unsafe { std::mem::transmute(*self) }
-    }
-
-    pub fn from_ark_transmute(v: &Fr_BLS12_377) -> Scalar {
-        unsafe { std::mem::transmute_copy(v) }
-    }
-
-    pub fn to_ark_mod_p(&self) -> Fr_BLS12_377 {
-        Fr_BLS12_377::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
-    }
-
-    pub fn to_ark_repr(&self) -> Fr_BLS12_377 {
-        Fr_BLS12_377::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
-    }
-
-    pub fn from_ark(v: BigInteger256) -> Scalar {
-        Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
-    }
-
-}
-
-impl Base {
-    pub fn to_ark(&self) -> BigInteger384 {
-        BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn from_ark(ark: BigInteger384) -> Self {
-        Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
-    }
-}
-
-
-impl Point {
-    pub fn to_ark(&self) -> G1Projective_BLS12_377 {
-        self.to_ark_affine().into_projective()
-    }
-
-    pub fn to_ark_affine(&self) -> G1Affine_BLS12_377 {
-        //TODO: generic conversion
-        use ark_ff::Field;
-        use std::ops::Mul;
-        let proj_x_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.x.to_bytes_le());
-        let proj_y_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.y.to_bytes_le());
-        let proj_z_field = Fq_BLS12_377::from_le_bytes_mod_order(&self.z.to_bytes_le());
-        let inverse_z = proj_z_field.inverse().unwrap();
-        let aff_x = proj_x_field.mul(inverse_z);
-        let aff_y = proj_y_field.mul(inverse_z);
-        G1Affine_BLS12_377::new(aff_x, aff_y, false)
-    }
-
-    pub fn from_ark(ark: G1Projective_BLS12_377) -> Point {
-        use ark_ff::Field;
-        let z_inv = ark.z.inverse().unwrap();
-        let z_invsq = z_inv * z_inv;
-        let z_invq3 = z_invsq * z_inv;
-        Point {
-            x: Base::from_ark((ark.x * z_invsq).into_repr()),
-            y: Base::from_ark((ark.y * z_invq3).into_repr()),
-            z: Base::one(),
-        }
-    }
-}
-
-impl PointAffineNoInfinity {
-
-    pub fn to_ark(&self) -> G1Affine_BLS12_377 {
-        G1Affine_BLS12_377::new(Fq_BLS12_377::new(self.x.to_ark()), Fq_BLS12_377::new(self.y.to_ark()), false)
-    }
-
-    pub fn to_ark_repr(&self) -> G1Affine_BLS12_377 {
-        G1Affine_BLS12_377::new(
-            Fq_BLS12_377::from_repr(self.x.to_ark()).unwrap(),
-            Fq_BLS12_377::from_repr(self.y.to_ark()).unwrap(),
-            false,
-        )
-    }
-
-    pub fn from_ark(p: &G1Affine_BLS12_377) -> Self {
-        PointAffineNoInfinity {
-            x: Base::from_ark(p.x.into_repr()),
-            y: Base::from_ark(p.y.into_repr()),
-        }
-    }
-}
-
-impl Point {
-    pub fn to_affine(&self) -> PointAffineNoInfinity {
-        let ark_affine = self.to_ark_affine();
-        PointAffineNoInfinity {
-            x: Base::from_ark(ark_affine.x.into_repr()),
-            y: Base::from_ark(ark_affine.y.into_repr()),
-        }
-    }
-}
-
-
-#[cfg(test)]
-pub(crate) mod tests_bls12_377 {
-    use std::ops::Add;
-    use ark_bls12_377::{Fr, G1Affine, G1Projective};
-    use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
-    use ark_ff::{FftField, Field, Zero, PrimeField};
-    use ark_std::UniformRand;
-    use rustacuda::prelude::{DeviceBuffer, CopyDestination};
-    use crate::curve_structs::{Point, Scalar, Base};
-    use crate::basic_structs::scalar::ScalarTrait;
-    use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
-
-    fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
-        let mut rng = ark_std::rand::thread_rng();
-        let mut points_ga: Vec<G1Projective> = Vec::new();
-        for _ in 0..nof_elements {
-            let aff = G1Projective::rand(&mut rng);
-            points_ga.push(aff);
-        }
-        points_ga
-    }
-
-    fn ecntt_arc_naive(
-        points: &Vec<G1Projective>,
-        size: usize,
-        inverse: bool,
-    ) -> Vec<G1Projective> {
-        let mut result: Vec<G1Projective> = Vec::new();
-        for _ in 0..size {
-            result.push(G1Projective::zero());
-        }
-        let rou: Fr;
-        if !inverse {
-            rou = Fr::get_root_of_unity(size).unwrap();
-        } else {
-            rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
-        }
-        for k in 0..size {
-            for l in 0..size {
-                let pow: [u64; 1] = [(l * k).try_into().unwrap()];
-                let mul_rou = Fr::pow(&rou, &pow);
-                result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
-            }
-        }
-        if inverse {
-            let size2 = size as u64;
-            for k in 0..size {
-                let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
-                result[k] = result[k].into_affine().mul(multfactor);
-            }
-        }
-        return result;
-    }
-
-    fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
-        let mut eq = true;
-        for i in 0..points.len() {
-            if points2[i].ne(&points[i]) {
-                eq = false;
-                break;
-            }
-        }
-        return eq;
-    }
-
-    fn test_naive_ark_ecntt(size: usize) {
-        let points = random_points_ark_proj(size);
-        let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
-        let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
-        assert!(!check_eq(&result2, &result1));
-        assert!(check_eq(&result2, &points));
-    }
-
-    #[test]
-    fn test_msm() {
-        let test_sizes = [6, 9];
-
-        for pow2 in test_sizes {
-            let count = 1 << pow2;
-            let seed = None; // set Some to provide seed
-            let points = generate_random_points(count, get_rng(seed));
-            let scalars = generate_random_scalars(count, get_rng(seed));
-
-            let msm_result = msm(&points, &scalars, 0);
-
-            let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
-            let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
-
-            let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
-
-            assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
-            assert_eq!(msm_result.to_ark(), msm_result_ark);
-            assert_eq!(
-                msm_result.to_ark_affine(),
-                Point::from_ark(msm_result_ark).to_ark_affine()
-            );
-        }
-    }
-
-    #[test]
-    fn test_batch_msm() {
-        for batch_pow2 in [2, 4] {
-            for pow2 in [4, 6] {
-                let msm_size = 1 << pow2;
-                let batch_size = 1 << batch_pow2;
-                let seed = None; // set Some to provide seed
-                let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
-                let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
-
-                let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
-                let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
-
-                let expected: Vec<_> = point_r_ark
-                    .chunks(msm_size)
-                    .zip(scalars_r_ark.chunks(msm_size))
-                    .map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
-                    .collect();
-
-                let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
-
-                assert_eq!(result, expected);
-            }
-        }
-    }
-
-    #[test]
-    fn test_commit() {
-        let test_size = 1 << 8;
-        let seed = Some(0);
-        let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
-        let mut points = generate_random_points(test_size, get_rng(seed));
-        let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
-
-        let msm_result = msm(&points, &scalars, 0);
-        let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
-        let mut h_commit_result = Point::zero();
-        d_commit_result.copy_to(&mut h_commit_result).unwrap();
-
-        assert_eq!(msm_result, h_commit_result);
-        assert_ne!(msm_result, Point::zero());
-        assert_ne!(h_commit_result, Point::zero());
-    }
-
-    #[test]
-    fn test_batch_commit() {
-        let batch_size = 4;
-        let test_size = 1 << 12;
-        let seed = Some(0);
-        let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
-        let points = generate_random_points(test_size * batch_size, get_rng(seed));
-        let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
-
-        let msm_result = msm_batch(&points, &scalars, batch_size, 0);
-        let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
-        let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
-        d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
-
-        assert_eq!(msm_result, h_commit_result);
-        for h in h_commit_result {
-            assert_ne!(h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_ntt() {
-        //NTT
-        let seed = None; //some value to fix the rng
-        let test_size = 1 << 3;
-
-        let scalars = generate_random_scalars(test_size, get_rng(seed));
-
-        let mut ntt_result = scalars.clone();
-        ntt(&mut ntt_result, 0);
-
-        assert_ne!(ntt_result, scalars);
-
-        let mut intt_result = ntt_result.clone();
-
-        intt(&mut intt_result, 0);
-
-        assert_eq!(intt_result, scalars);
-
-        //ECNTT
-        let points_proj = generate_random_points_proj(test_size, get_rng(seed));
-
-        test_naive_ark_ecntt(test_size);
-
-        assert!(points_proj[0].to_ark().into_affine().is_on_curve());
-
-        //naive ark
-        let points_proj_ark = points_proj
-            .iter()
-            .map(|p| p.to_ark())
-            .collect::<Vec<G1Projective>>();
-
-        let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
-
-        let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
-
-        assert_eq!(points_proj_ark, iecntt_result_naive);
-
-        //ingo gpu
-        let mut ecntt_result = points_proj.to_vec();
-        ecntt(&mut ecntt_result, 0);
-
-        assert_ne!(ecntt_result, points_proj);
-
-        let mut iecntt_result = ecntt_result.clone();
-        iecntt(&mut iecntt_result, 0);
-
-        assert_eq!(
-            iecntt_result_naive,
-            points_proj
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>()
-        );
-        assert_eq!(
-            iecntt_result
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>(),
-            points_proj
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>()
-        );
-    }
-
-    #[test]
-    fn test_ntt_batch() {
-        //NTT
-        let seed = None; //some value to fix the rng
-        let test_size = 1 << 5;
-        let batches = 4;
-
-        let scalars_batch: Vec<Scalar> =
-            generate_random_scalars(test_size * batches, get_rng(seed));
-
-        let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
-
-        for i in 0..batches {
-            scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
-        }
-
-        let mut ntt_result = scalars_batch.clone();
-
-        // do batch ntt
-        ntt_batch(&mut ntt_result, test_size, 0);
-
-        let mut ntt_result_vec_of_vec = Vec::new();
-
-        // do ntt for every chunk
-        for i in 0..batches {
-            ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
-            ntt(&mut ntt_result_vec_of_vec[i], 0);
-        }
-
-        // check that the ntt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                ntt_result_vec_of_vec[i],
-                ntt_result[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        // check that ntt output is different from input
-        assert_ne!(ntt_result, scalars_batch);
-
-        let mut intt_result = ntt_result.clone();
-
-        // do batch intt
-        intt_batch(&mut intt_result, test_size, 0);
-
-        let mut intt_result_vec_of_vec = Vec::new();
-
-        // do intt for every chunk
-        for i in 0..batches {
-            intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
-            intt(&mut intt_result_vec_of_vec[i], 0);
-        }
-
-        // check that the intt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                intt_result_vec_of_vec[i],
-                intt_result[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_eq!(intt_result, scalars_batch);
-
-        // //ECNTT
-        let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
-
-        let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
-
-        for i in 0..batches {
-            points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
-        }
-
-        let mut ntt_result_points = points_proj.clone();
-
-        // do batch ecintt
-        ecntt_batch(&mut ntt_result_points, test_size, 0);
-
-        let mut ntt_result_points_vec_of_vec = Vec::new();
-
-        for i in 0..batches {
-            ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
-            ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
-        }
-
-        for i in 0..batches {
-            assert_eq!(
-                ntt_result_points_vec_of_vec[i],
-                ntt_result_points[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_ne!(ntt_result_points, points_proj);
-
-        let mut intt_result_points = ntt_result_points.clone();
-
-        // do batch ecintt
-        iecntt_batch(&mut intt_result_points, test_size, 0);
-
-        let mut intt_result_points_vec_of_vec = Vec::new();
-
-        // do ecintt for every chunk
-        for i in 0..batches {
-            intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
-            iecntt(&mut intt_result_points_vec_of_vec[i], 0);
-        }
-
-        // check that the ecintt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                intt_result_points_vec_of_vec[i],
-                intt_result_points[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_eq!(intt_result_points, points_proj);
-    }
-
-    #[test]
-    fn test_scalar_interpolation() {
-        let log_test_size = 7;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
-
-        reverse_order_scalars(&mut d_evals);
-        let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
-        intt(&mut evals_mut, 0);
-        let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-
-        assert_eq!(h_coeffs, evals_mut);
-    }
-
-    #[test]
-    fn test_scalar_batch_interpolation() {
-        let batch_size = 4;
-        let log_test_size = 10;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
-
-        reverse_order_scalars_batch(&mut d_evals, batch_size);
-        let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
-        intt_batch(&mut evals_mut, test_size, 0);
-        let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-
-        assert_eq!(h_coeffs, evals_mut);
-    }
-
-    #[test]
-    fn test_point_interpolation() {
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
-
-        reverse_order_points(&mut d_evals);
-        let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
-        iecntt(&mut evals_mut[..], 0);
-        let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-        
-        assert_eq!(h_coeffs, *evals_mut);
-        for h in h_coeffs.iter() {
-            assert_ne!(*h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_interpolation() {
-        let batch_size = 4;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
-
-        reverse_order_points_batch(&mut d_evals, batch_size);
-        let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
-        iecntt_batch(&mut evals_mut[..], test_size, 0);
-        let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-        
-        assert_eq!(h_coeffs, *evals_mut);
-        for h in h_coeffs.iter() {
-            assert_ne!(*h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_scalar_evaluation() {
-        let log_test_domain_size = 8;
-        let coeff_size = 1 << 6;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
-        let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
-        for i in coeff_size.. (1 << log_test_domain_size) {
-            assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
-        }
-    }
-
-    #[test]
-    fn test_scalar_batch_evaluation() {
-        let batch_size = 6;
-        let log_test_domain_size = 8;
-        let domain_size = 1 << log_test_domain_size;
-        let coeff_size = 1 << 6;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
-        let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        for j in 0..batch_size {
-            assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
-            for i in coeff_size..domain_size {
-                assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
-            }
-        }
-    }
-
-    #[test]
-    fn test_point_evaluation() {
-        let log_test_domain_size = 7;
-        let coeff_size = 1 << 7;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
-        let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
-        let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
-        for i in coeff_size..(1 << log_test_domain_size) {
-            assert_eq!(Point::zero(), h_coeffs_domain[i]);
-        }
-        for i in 0..coeff_size {
-            assert_ne!(h_coeffs_domain[i], Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_evaluation() {
-        let batch_size = 4;
-        let log_test_domain_size = 6;
-        let domain_size = 1 << log_test_domain_size;
-        let coeff_size = 1 << 5;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
-        let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        for j in 0..batch_size {
-            assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
-            for i in coeff_size..domain_size {
-                assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
-            }
-            for i in j * domain_size..(j * domain_size + coeff_size) {
-                assert_ne!(h_coeffs_domain[i], Point::zero());
-            }
-        }
-    }
-
-    #[test]
-    fn test_scalar_evaluation_on_trivial_coset() {
-        // checks that the evaluations on the subgroup is the same as on the coset generated by 1
-        let log_test_domain_size = 8;
-        let coeff_size = 1 << 6;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
-        let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
-
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_coeffs[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_coeffs, h_evals_coset);
-    }
-
-    #[test]
-    fn test_scalar_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let log_test_size = 8;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
-        let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_evals[..], h_evals_large[..test_size]);
-        assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
-    }
-
-    #[test]
-    fn test_scalar_batch_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let batch_size = 4;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
-        let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        for i in 0..batch_size {
-            assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
-            assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
-        }
-    }
-
-    #[test]
-    fn test_point_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let log_test_size = 8;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
-        let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
-        let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_evals[..], h_evals_large[..test_size]);
-        assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
-        for i in 0..test_size {
-            assert_ne!(h_evals[i], Point::zero());
-            assert_ne!(h_evals_coset[i], Point::zero());
-            assert_ne!(h_evals_large[2 * i], Point::zero());
-            assert_ne!(h_evals_large[2 * i + 1], Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let batch_size = 2;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
-        let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        for i in 0..batch_size {
-            assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
-            assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
-        }
-        for i in 0..test_size * batch_size {
-            assert_ne!(h_evals[i], Point::zero());
-            assert_ne!(h_evals_coset[i], Point::zero());
-            assert_ne!(h_evals_large[2 * i], Point::zero());
-            assert_ne!(h_evals_large[2 * i + 1], Point::zero());
-        }
-    }
-
-    // testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
-    #[test]
-    fn test_matrix_multiplication() {
-        let seed = None; // some value to fix the rng
-        let test_size = 1 << 5;
-        let rou = Fr::get_root_of_unity(test_size).unwrap();
-        let matrix_flattened: Vec<Scalar> = (0..test_size).map(
-            |row_num| { (0..test_size).map( 
-                |col_num| {
-                    let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
-                    Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
-                }).collect::<Vec<Scalar>>()
-            }).flatten().collect::<Vec<_>>();
-        let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
-
-        let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
-        let mut ntt_result = vector.clone();
-        ntt(&mut ntt_result, 0);
-        
-        // we don't use the same roots of unity as arkworks, so the results are permutations
-        // of one another and the only guaranteed fixed scalars are the following ones:
-        assert_eq!(result[0], ntt_result[0]);
-        assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
-    }
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_vec_scalar_mul() {
-        let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
-        let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
-        mult_sc_vec(&mut intoo, &expected, 0);
-        assert_eq!(intoo, expected);
-    }
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_vec_point_mul() {
-        let dummy_one = Point {
-            x: Base::one(),
-            y: Base::one(),
-            z: Base::one(),
-        };
-
-        let mut inout = [dummy_one, dummy_one, Point::zero()];
-        let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
-        let expected = [dummy_one, Point::zero(), Point::zero()];
-        multp_vec(&mut inout, &scalars, 0);
-        assert_eq!(inout, expected);
-    }
-}
--- a/bls12-381/Cargo.toml
+++ b/bls12-381/Cargo.toml
@@ -1,34 +0,0 @@
-[package]
-name = "bls12-381"
-version = "0.1.0"
-edition = "2021"
-authors = [ "Ingonyama" ]
-
-[dependencies]
-icicle-core = { path = "../icicle-core" }
-
-hex = "*"
-ark-std = "0.3.0"
-ark-ff = "0.3.0"
-ark-poly = "0.3.0"
-ark-ec = { version = "0.3.0", features = [ "parallel" ] }
-ark-bls12-381 = "0.3.0"
-
-serde = { version = "1.0", features = ["derive"] }
-serde_derive = "1.0"
-serde_cbor = "0.11.2"
-
-rustacuda = "0.1"
-rustacuda_core = "0.1"
-rustacuda_derive = "0.1"
-
-rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
-
-[build-dependencies]
-cc = { version = "1.0", features = ["parallel"] }
-
-[dev-dependencies]
-"criterion" = "0.4.0"
-
-[features]
-g2 = []
--- a/bls12-381/build.rs
+++ b/bls12-381/build.rs
@@ -1,36 +0,0 @@
-use std::env;
-
-fn main() {
-    //TODO: check cargo features selected
-    //TODO: can conflict/duplicate with make ?
-
-    println!("cargo:rerun-if-env-changed=CXXFLAGS");
-    println!("cargo:rerun-if-changed=./icicle");
-
-    let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
-    let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
-
-    let mut arch = String::from("-arch=");
-    arch.push_str(&arch_type);
-    let mut stream = String::from("-default-stream=");
-    stream.push_str(&stream_type);
-
-    let mut nvcc = cc::Build::new();
-
-    println!("Compiling icicle library using arch: {}", &arch);
-
-    if cfg!(feature = "g2") {
-        nvcc.define("G2_DEFINED", None);
-    }
-    nvcc.cuda(true);
-    nvcc.define("FEATURE_BLS12_381", None);
-    nvcc.debug(false);
-    nvcc.flag(&arch);
-    nvcc.flag(&stream);
-    nvcc.shared_flag(false);
-    // nvcc.static_flag(true);
-    nvcc.files([
-        "../icicle-cuda/curves/index.cu",
-    ]);
-    nvcc.compile("ingo_icicle"); //TODO: extension??
-}
--- a/bls12-381/src/basic_structs/field.rs
+++ b/bls12-381/src/basic_structs/field.rs
@@ -1,4 +0,0 @@
-pub trait Field<const NUM_LIMBS: usize> {
-    const MODOLUS: [u32;NUM_LIMBS];
-    const LIMBS: usize = NUM_LIMBS;
-}
--- a/bls12-381/src/basic_structs/mod.rs
+++ b/bls12-381/src/basic_structs/mod.rs
@@ -1,3 +0,0 @@
-pub mod field; 
-pub mod scalar; 
-pub mod point; 
--- a/bls12-381/src/basic_structs/point.rs
+++ b/bls12-381/src/basic_structs/point.rs
@@ -1,106 +0,0 @@
-use std::ffi::c_uint;
-
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger256, PrimeField};
-use std::mem::transmute;
-use ark_ff::Field;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-
-use super::scalar::{get_fixed_limbs, self};
-
-
-#[derive(Debug, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointT<BF: scalar::ScalarTrait> {
-    pub x: BF,
-    pub y: BF,
-    pub z: BF,
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
-    fn default() -> Self {
-        PointT::zero()
-    }
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
-    pub fn zero() -> Self {
-        PointT {
-            x: BF::zero(),
-            y: BF::one(),
-            z: BF::zero(),
-        }
-    }
-
-    pub fn infinity() -> Self {
-        Self::zero()
-    }
-}
-
-#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointAffineNoInfinityT<BF> {
-    pub x: BF,
-    pub y: BF,
-}
-
-impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
-    fn default() -> Self {
-        PointAffineNoInfinityT {
-            x: BF::zero(),
-            y: BF::zero(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
-    ///From u32 limbs x,y
-    pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
-        PointAffineNoInfinityT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y)
-        }
-    }
-
-    pub fn limbs(&self) -> Vec<u32> {
-        [self.x.limbs(), self.y.limbs()].concat()
-    }
-
-    pub fn to_projective(&self) -> PointT<BF> {
-        PointT {
-            x: self.x,
-            y: self.y,
-            z: BF::one(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointT<BF>  {
-    pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
-        PointT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y),
-            z: BF::from_limbs(z)
-        }
-    }
-
-    pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
-        let l = value.len();
-        assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
-        PointT {
-            x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
-            y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
-            z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
-        }
-    }
-
-    pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
-        PointAffineNoInfinityT {
-            x: self.x,
-            y: self.y,
-        }
-    }
-}
--- a/bls12-381/src/basic_structs/scalar.rs
+++ b/bls12-381/src/basic_structs/scalar.rs
@@ -1,102 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-use std::mem::transmute;
-use rustacuda::prelude::*;
-use rustacuda_core::DevicePointer;
-use rustacuda::memory::{DeviceBox, CopyDestination};
-
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use std::marker::PhantomData;
-use std::convert::TryInto;
-
-use super::field::{Field, self};
-
-pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
-    match val.len() {
-        n if n < NUM_LIMBS => {
-            let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
-            padded[..val.len()].copy_from_slice(&val);
-            padded
-        }
-        n if n == NUM_LIMBS => val.try_into().unwrap(),
-        _ => panic!("slice has too many elements"),
-    }
-}
-
-pub trait ScalarTrait{
-    fn base_limbs() -> usize;
-    fn zero() -> Self;
-    fn from_limbs(value: &[u32]) -> Self;
-    fn one() -> Self;
-    fn to_bytes_le(&self) -> Vec<u8>;
-    fn limbs(&self) -> &[u32];
-}
-
-#[derive(Debug, PartialEq, Clone, Copy)]
-#[repr(C)]
-pub struct ScalarT<M, const NUM_LIMBS: usize> {
-    pub(crate) phantom: PhantomData<M>,
-    pub(crate) value : [u32; NUM_LIMBS]
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
-where
-    M: Field<NUM_LIMBS>,
-{
-
-    fn base_limbs() -> usize {
-        return NUM_LIMBS; 
-    }
-
-    fn zero() -> Self {
-        ScalarT {
-            value: [0u32; NUM_LIMBS],
-            phantom: PhantomData,
-        }
-    }
-
-    fn from_limbs(value: &[u32]) -> Self {
-        Self {
-            value: get_fixed_limbs(value),
-            phantom: PhantomData,
-        }
-    }
-
-    fn one() -> Self {
-        let mut s = [0u32; NUM_LIMBS];
-        s[0] = 1;
-        ScalarT { value: s, phantom: PhantomData }
-    }
-
-    fn to_bytes_le(&self) -> Vec<u8> {
-        self.value
-            .iter()
-            .map(|s| s.to_le_bytes().to_vec())
-            .flatten()
-            .collect::<Vec<_>>()
-    }
-
-    fn limbs(&self) -> &[u32] {
-        &self.value
-    }
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
-    pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-        Self::from_limbs(value)
-     }
- 
-    pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-         let mut value = value.to_vec();
-         value.reverse();
-         Self::from_limbs_le(&value)
-     }
- 
-     // Additional Functions
-     pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{  // overload + 
-         return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData }; 
-     }
-}
--- a/bls12-381/src/curve_structs.rs
+++ b/bls12-381/src/curve_structs.rs
@@ -1,62 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda_derive::DeviceCopy;
-use std::mem::transmute;
-use rustacuda::prelude::*;
-use rustacuda_core::DevicePointer;
-use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
-
-use std::marker::PhantomData;
-use std::convert::TryInto;
-
-use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
-use crate::basic_structs::scalar::ScalarT;
-use crate::basic_structs::field::Field;
-
-
-#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
-#[repr(C)]
-pub struct ScalarField;
-impl Field<8> for ScalarField {
-    const MODOLUS: [u32; 8] = [0x0;8];
-}
-
-#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
-#[repr(C)]
-pub struct BaseField;
-impl Field<12> for BaseField {
-    const MODOLUS: [u32; 12] = [0x0;12];
-}
-
-
-pub type Scalar = ScalarT<ScalarField,8>;
-impl Default for Scalar {
-    fn default() -> Self {
-        Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
-    }
-}
-
-unsafe impl DeviceCopy for Scalar{}
-
-
-pub type Base = ScalarT<BaseField,12>;
-impl Default for Base {
-    fn default() -> Self {
-        Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
-    }
-}
-
-unsafe impl DeviceCopy for Base{}
-
-pub type Point = PointT<Base>;
-pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
-
-extern "C" {
-    fn eq(point1: *const Point, point2: *const Point) -> c_uint;
-}
-
-impl PartialEq for Point {
-    fn eq(&self, other: &Self) -> bool {
-        unsafe { eq(self, other) != 0 }
-    }
-}
--- a/bls12-381/src/from_cuda.rs
+++ b/bls12-381/src/from_cuda.rs
@@ -1,798 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use ark_std::UniformRand;
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda::CudaFlags;
-use rustacuda::memory::DeviceBox;
-use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
-use rustacuda_core::DevicePointer;
-use std::mem::transmute;
-use crate::basic_structs::scalar::ScalarTrait;
-use crate::curve_structs::*;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-use std::marker::PhantomData;
-use std::convert::TryInto;
-use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger384, BigInteger256, PrimeField};
-use rustacuda::memory::{CopyDestination, DeviceCopy};
-
-extern "C" {
-    fn msm_cuda(
-        out: *mut Point,
-        points: *const PointAffineNoInfinity,
-        scalars: *const Scalar,
-        count: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn msm_batch_cuda(
-        out: *mut Point,
-        points: *const PointAffineNoInfinity,
-        scalars: *const Scalar,
-        batch_size: usize,
-        msm_size: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn commit_cuda(
-        d_out: DevicePointer<Point>,
-        d_scalars: DevicePointer<Scalar>,
-        d_points: DevicePointer<PointAffineNoInfinity>,
-        count: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn commit_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_scalars: DevicePointer<Scalar>,
-        d_points: DevicePointer<PointAffineNoInfinity>,
-        count: usize,
-        batch_size: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
-
-    fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
-
-    fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
-
-    fn ntt_batch_cuda(
-        inout: *mut Scalar,
-        arr_size: usize,
-        n: usize,
-        inverse: bool,
-    ) -> c_int;
-
-    fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
-
-    fn interpolate_scalars_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_evaluations: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>, 
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_scalars_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_evaluations: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_points_cuda(
-        d_out: DevicePointer<Point>,
-        d_evaluations: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_points_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_evaluations: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_on_coset_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_on_coset_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_on_coset_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_on_coset_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_scalars_cuda(
-        d_arr: DevicePointer<Scalar>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_scalars_batch_cuda(
-        d_arr: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_points_cuda(
-        d_arr: DevicePointer<Point>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_points_batch_cuda(
-        d_arr: DevicePointer<Point>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn vec_mod_mult_point(
-        inout: *mut Point,
-        scalars: *const Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-
-    fn vec_mod_mult_scalar(
-        inout: *mut Scalar,
-        scalars: *const Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-
-    fn matrix_vec_mod_mult(
-        matrix_flattened: *const Scalar,
-        input: *const Scalar,
-        output: *mut Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-}
-
-pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
-    let count = points.len();
-    if count != scalars.len() {
-        todo!("variable length")
-    }
-
-    let mut ret = Point::zero();
-    unsafe {
-        msm_cuda(
-            &mut ret as *mut _ as *mut Point,
-            points as *const _ as *const PointAffineNoInfinity,
-            scalars as *const _ as *const Scalar,
-            scalars.len(),
-            device_id,
-        )
-    };
-
-    ret
-}
-
-pub fn msm_batch(
-    points: &[PointAffineNoInfinity],
-    scalars: &[Scalar],
-    batch_size: usize,
-    device_id: usize,
-) -> Vec<Point> {
-    let count = points.len();
-    if count != scalars.len() {
-        todo!("variable length")
-    }
-
-    let mut ret = vec![Point::zero(); batch_size];
-
-    unsafe {
-        msm_batch_cuda(
-            &mut ret[0] as *mut _ as *mut Point,
-            points as *const _ as *const PointAffineNoInfinity,
-            scalars as *const _ as *const Scalar,
-            batch_size,
-            count / batch_size,
-            device_id,
-        )
-    };
-
-    ret
-}
-
-pub fn commit(
-    points: &mut DeviceBuffer<PointAffineNoInfinity>,
-    scalars: &mut DeviceBuffer<Scalar>,
-) -> DeviceBox<Point> {
-    let mut res = DeviceBox::new(&Point::zero()).unwrap();
-    unsafe {
-        commit_cuda(
-            res.as_device_ptr(),
-            scalars.as_device_ptr(),
-            points.as_device_ptr(),
-            scalars.len(),
-            0,
-        );
-    }
-    return res;
-}
-
-pub fn commit_batch(
-    points: &mut DeviceBuffer<PointAffineNoInfinity>,
-    scalars: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
-    unsafe {
-        commit_batch_cuda(
-            res.as_device_ptr(),
-            scalars.as_device_ptr(),
-            points.as_device_ptr(),
-            scalars.len() / batch_size,
-            batch_size,
-            0,
-        );
-    }
-    return res;
-}
-
-/// Compute an in-place NTT on the input data.
-fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
-    let ret_code = unsafe {
-        ntt_cuda(
-            values as *mut _ as *mut Scalar,
-            values.len(),
-            inverse,
-            device_id,
-        )
-    };
-    ret_code
-}
-
-pub fn ntt(values: &mut [Scalar], device_id: usize) {
-    ntt_internal(values, device_id, false);
-}
-
-pub fn intt(values: &mut [Scalar], device_id: usize) {
-    ntt_internal(values, device_id, true);
-}
-
-/// Compute an in-place NTT on the input data.
-fn ntt_internal_batch(
-    values: &mut [Scalar],
-    device_id: usize,
-    batch_size: usize,
-    inverse: bool,
-) -> i32 {
-    unsafe {
-        ntt_batch_cuda(
-            values as *mut _ as *mut Scalar,
-            values.len(),
-            batch_size,
-            inverse,
-        )
-    }
-}
-
-pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
-    ntt_internal_batch(values, 0, batch_size, false);
-}
-
-pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
-    ntt_internal_batch(values, 0, batch_size, true);
-}
-
-/// Compute an in-place ECNTT on the input data.
-fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
-    unsafe {
-        ecntt_cuda(
-            values as *mut _ as *mut Point,
-            values.len(),
-            inverse,
-            device_id,
-        )
-    }
-}
-
-pub fn ecntt(values: &mut [Point], device_id: usize) {
-    ecntt_internal(values, false, device_id);
-}
-
-/// Compute an in-place iECNTT on the input data.
-pub fn iecntt(values: &mut [Point], device_id: usize) {
-    ecntt_internal(values, true, device_id);
-}
-
-/// Compute an in-place ECNTT on the input data.
-fn ecntt_internal_batch(
-    values: &mut [Point],
-    device_id: usize,
-    batch_size: usize,
-    inverse: bool,
-) -> i32 {
-    unsafe {
-        ecntt_batch_cuda(
-            values as *mut _ as *mut Point,
-            values.len(),
-            batch_size,
-            inverse,
-        )
-    }
-}
-
-pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
-    ecntt_internal_batch(values, 0, batch_size, false);
-}
-
-/// Compute an in-place iECNTT on the input data.
-pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
-    ecntt_internal_batch(values, 0, batch_size, true);
-}
-
-pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
-    unsafe {
-        DeviceBuffer::from_raw_parts(build_domain_cuda(
-            domain_size,
-            logn,
-            inverse,
-            0
-        ), domain_size)
-    }
-}
-
-
-pub fn reverse_order_scalars(
-    d_scalars: &mut DeviceBuffer<Scalar>,
-) {
-    unsafe { reverse_order_scalars_cuda(
-        d_scalars.as_device_ptr(),
-        d_scalars.len(),
-        0
-    ); }
-}
-
-pub fn reverse_order_scalars_batch(
-    d_scalars: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) {
-    unsafe { reverse_order_scalars_batch_cuda(
-        d_scalars.as_device_ptr(),
-        d_scalars.len() / batch_size,
-        batch_size,
-        0
-    ); }
-}
-
-pub fn reverse_order_points(
-    d_points: &mut DeviceBuffer<Point>,
-) {
-    unsafe { reverse_order_points_cuda(
-        d_points.as_device_ptr(),
-        d_points.len(),
-        0
-    ); }
-}
-
-pub fn reverse_order_points_batch(
-    d_points: &mut DeviceBuffer<Point>,
-    batch_size: usize,
-) {
-    unsafe { reverse_order_points_batch_cuda(
-        d_points.as_device_ptr(),
-        d_points.len() / batch_size,
-        batch_size,
-        0
-    ); }
-}
-
-pub fn interpolate_scalars(
-    d_evaluations: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe { interpolate_scalars_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_scalars_batch(
-    d_evaluations: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe { interpolate_scalars_batch_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        batch_size,
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_points(
-    d_evaluations: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe { interpolate_points_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_points_batch(
-    d_evaluations: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe { interpolate_points_batch_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        batch_size,
-        0
-    ) };
-    return res;
-}
-
-pub fn evaluate_scalars(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_scalars_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_batch(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_scalars_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_points_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_batch(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_points_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_on_coset(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_scalars_on_coset_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_on_coset_batch(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_scalars_on_coset_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_on_coset(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_points_on_coset_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_on_coset_batch(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_points_on_coset_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
-    assert_eq!(a.len(), b.len());
-    unsafe {
-        vec_mod_mult_point(
-            a as *mut _ as *mut Point,
-            b as *const _ as *const Scalar,
-            a.len(),
-            device_id,
-        );
-    }
-}
-
-pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
-    assert_eq!(a.len(), b.len());
-    unsafe {
-        vec_mod_mult_scalar(
-            a as *mut _ as *mut Scalar,
-            b as *const _ as *const Scalar,
-            a.len(),
-            device_id,
-        );
-    }
-}
-
-// Multiply a matrix by a scalar:
-//  `a` - flattenned matrix;
-//  `b` - vector to multiply `a` by;
-pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
-    let mut c = Vec::with_capacity(b.len());
-    for i in 0..b.len() {
-        c.push(Scalar::zero());
-    }
-    unsafe {
-        matrix_vec_mod_mult(
-            a as *const _ as *const Scalar,
-            b as *const _ as *const Scalar,
-            c.as_mut_slice() as *mut _ as *mut Scalar,
-            b.len(),
-            device_id,
-        );
-    }
-    c
-}
-
-pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
-    let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
-    unsafe { buf_cpy.copy_from(buf) };
-    return buf_cpy;
-}
-
-pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
-    let rng: Box<dyn RngCore> = match seed {
-        Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
-        None => Box::new(rand::thread_rng()),
-    };
-    rng
-}
-
-fn set_up_device() {
-    // Set up the context, load the module, and create a stream to run kernels in.
-    rustacuda::init(CudaFlags::empty()).unwrap();
-    let device = Device::get_device(0).unwrap();
-    let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
-}
-
-pub fn generate_random_points(
-    count: usize,
-    mut rng: Box<dyn RngCore>,
-) -> Vec<PointAffineNoInfinity> {
-    (0..count)
-        .map(|_| Point::from_ark(G1Projective_BLS12_381::rand(&mut rng)).to_xy_strip_z())
-        .collect()
-}
-
-pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
-    (0..count)
-        .map(|_| Point::from_ark(G1Projective_BLS12_381::rand(&mut rng)))
-        .collect()
-}
-
-pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
-    (0..count)
-        .map(|_| Scalar::from_ark(Fr_BLS12_381::rand(&mut rng).into_repr()))
-        .collect()
-}
-
-pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
-    set_up_device();
-
-    let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
-
-    let seed = Some(0); // fix the rng to get two equal scalar 
-    let vector = generate_random_points_proj(test_size, get_rng(seed));
-    let mut vector_mut = vector.clone();
-
-    let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
-    (vector_mut, d_vector, d_domain)
-}
-
-pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
-    set_up_device();
-
-    let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
-
-    let seed = Some(0); // fix the rng to get two equal scalars
-    let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
-
-    let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
-    (vector_mut, d_vector, d_domain)
-}
-
--- a/bls12-381/src/lib.rs
+++ b/bls12-381/src/lib.rs
@@ -1,4 +0,0 @@
-pub mod test_bls12_381;
-pub mod basic_structs;
-pub mod from_cuda;
-pub mod curve_structs;
--- a/bls12-381/src/test_bls12_381.rs
+++ b/bls12-381/src/test_bls12_381.rs
@@ -1,816 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use ark_std::UniformRand;
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda::CudaFlags;
-use rustacuda::memory::DeviceBox;
-use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
-use rustacuda_core::DevicePointer;
-use std::mem::transmute;
-pub use crate::basic_structs::scalar::ScalarTrait;
-pub use crate::curve_structs::*;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-use std::marker::PhantomData;
-use std::convert::TryInto;
-use ark_bls12_381::{Fq as Fq_BLS12_381, Fr as Fr_BLS12_381, G1Affine as G1Affine_BLS12_381, G1Projective as G1Projective_BLS12_381};
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger384, BigInteger256, PrimeField};
-use rustacuda::memory::{CopyDestination, DeviceCopy};
-
-
-impl Scalar {
-    pub fn to_biginteger254(&self) -> BigInteger256 {
-        BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn to_ark(&self) -> BigInteger256 {
-        BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn from_biginteger256(ark: BigInteger256) -> Self {
-        Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
-    }
-
-    pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
-        unsafe { transmute(*self) }
-    }
-
-    pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
-        Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
-    }
-
-    pub fn to_ark_transmute(&self) -> Fr_BLS12_381 {
-        unsafe { std::mem::transmute(*self) }
-    }
-
-    pub fn from_ark_transmute(v: &Fr_BLS12_381) -> Scalar {
-        unsafe { std::mem::transmute_copy(v) }
-    }
-
-    pub fn to_ark_mod_p(&self) -> Fr_BLS12_381 {
-        Fr_BLS12_381::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
-    }
-
-    pub fn to_ark_repr(&self) -> Fr_BLS12_381 {
-        Fr_BLS12_381::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
-    }
-
-    pub fn from_ark(v: BigInteger256) -> Scalar {
-        Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
-    }
-
-}
-
-impl Base {
-    pub fn to_ark(&self) -> BigInteger384 {
-        BigInteger384::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn from_ark(ark: BigInteger384) -> Self {
-        Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
-    }
-}
-
-
-impl Point {
-    pub fn to_ark(&self) -> G1Projective_BLS12_381 {
-        self.to_ark_affine().into_projective()
-    }
-
-    pub fn to_ark_affine(&self) -> G1Affine_BLS12_381 {
-        //TODO: generic conversion
-        use ark_ff::Field;
-        use std::ops::Mul;
-        let proj_x_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.x.to_bytes_le());
-        let proj_y_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.y.to_bytes_le());
-        let proj_z_field = Fq_BLS12_381::from_le_bytes_mod_order(&self.z.to_bytes_le());
-        let inverse_z = proj_z_field.inverse().unwrap();
-        let aff_x = proj_x_field.mul(inverse_z);
-        let aff_y = proj_y_field.mul(inverse_z);
-        G1Affine_BLS12_381::new(aff_x, aff_y, false)
-    }
-
-    pub fn from_ark(ark: G1Projective_BLS12_381) -> Point {
-        use ark_ff::Field;
-        let z_inv = ark.z.inverse().unwrap();
-        let z_invsq = z_inv * z_inv;
-        let z_invq3 = z_invsq * z_inv;
-        Point {
-            x: Base::from_ark((ark.x * z_invsq).into_repr()),
-            y: Base::from_ark((ark.y * z_invq3).into_repr()),
-            z: Base::one(),
-        }
-    }
-}
-
-impl PointAffineNoInfinity {
-
-    pub fn to_ark(&self) -> G1Affine_BLS12_381 {
-        G1Affine_BLS12_381::new(Fq_BLS12_381::new(self.x.to_ark()), Fq_BLS12_381::new(self.y.to_ark()), false)
-    }
-
-    pub fn to_ark_repr(&self) -> G1Affine_BLS12_381 {
-        G1Affine_BLS12_381::new(
-            Fq_BLS12_381::from_repr(self.x.to_ark()).unwrap(),
-            Fq_BLS12_381::from_repr(self.y.to_ark()).unwrap(),
-            false,
-        )
-    }
-
-    pub fn from_ark(p: &G1Affine_BLS12_381) -> Self {
-        PointAffineNoInfinity {
-            x: Base::from_ark(p.x.into_repr()),
-            y: Base::from_ark(p.y.into_repr()),
-        }
-    }
-}
-
-impl Point {
-    pub fn to_affine(&self) -> PointAffineNoInfinity {
-        let ark_affine = self.to_ark_affine();
-        PointAffineNoInfinity {
-            x: Base::from_ark(ark_affine.x.into_repr()),
-            y: Base::from_ark(ark_affine.y.into_repr()),
-        }
-    }
-}
-
-
-#[cfg(test)]
-pub(crate) mod tests_bls12_381 {
-    use std::ops::Add;
-    use ark_bls12_381::{Fr, G1Affine, G1Projective};
-    use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
-    use ark_ff::{FftField, Field, Zero, PrimeField};
-    use ark_std::UniformRand;
-    use rustacuda::prelude::{DeviceBuffer, CopyDestination};
-    use crate::curve_structs::{Point, Scalar, Base};
-    use crate::basic_structs::scalar::ScalarTrait;
-    use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
-
-    fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
-        let mut rng = ark_std::rand::thread_rng();
-        let mut points_ga: Vec<G1Projective> = Vec::new();
-        for _ in 0..nof_elements {
-            let aff = G1Projective::rand(&mut rng);
-            points_ga.push(aff);
-        }
-        points_ga
-    }
-
-    fn ecntt_arc_naive(
-        points: &Vec<G1Projective>,
-        size: usize,
-        inverse: bool,
-    ) -> Vec<G1Projective> {
-        let mut result: Vec<G1Projective> = Vec::new();
-        for _ in 0..size {
-            result.push(G1Projective::zero());
-        }
-        let rou: Fr;
-        if !inverse {
-            rou = Fr::get_root_of_unity(size).unwrap();
-        } else {
-            rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
-        }
-        for k in 0..size {
-            for l in 0..size {
-                let pow: [u64; 1] = [(l * k).try_into().unwrap()];
-                let mul_rou = Fr::pow(&rou, &pow);
-                result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
-            }
-        }
-        if inverse {
-            let size2 = size as u64;
-            for k in 0..size {
-                let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
-                result[k] = result[k].into_affine().mul(multfactor);
-            }
-        }
-        return result;
-    }
-
-    fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
-        let mut eq = true;
-        for i in 0..points.len() {
-            if points2[i].ne(&points[i]) {
-                eq = false;
-                break;
-            }
-        }
-        return eq;
-    }
-
-    fn test_naive_ark_ecntt(size: usize) {
-        let points = random_points_ark_proj(size);
-        let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
-        let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
-        assert!(!check_eq(&result2, &result1));
-        assert!(check_eq(&result2, &points));
-    }
-
-    #[test]
-    fn test_msm() {
-        let test_sizes = [6, 9];
-
-        for pow2 in test_sizes {
-            let count = 1 << pow2;
-            let seed = None; // set Some to provide seed
-            let points = generate_random_points(count, get_rng(seed));
-            let scalars = generate_random_scalars(count, get_rng(seed));
-
-            let msm_result = msm(&points, &scalars, 0);
-
-            let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
-            let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
-
-            let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
-
-            assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
-            assert_eq!(msm_result.to_ark(), msm_result_ark);
-            assert_eq!(
-                msm_result.to_ark_affine(),
-                Point::from_ark(msm_result_ark).to_ark_affine()
-            );
-        }
-    }
-
-    #[test]
-    fn test_batch_msm() {
-        for batch_pow2 in [2, 4] {
-            for pow2 in [4, 6] {
-                let msm_size = 1 << pow2;
-                let batch_size = 1 << batch_pow2;
-                let seed = None; // set Some to provide seed
-                let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
-                let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
-
-                let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
-                let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
-
-                let expected: Vec<_> = point_r_ark
-                    .chunks(msm_size)
-                    .zip(scalars_r_ark.chunks(msm_size))
-                    .map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
-                    .collect();
-
-                let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
-
-                assert_eq!(result, expected);
-            }
-        }
-    }
-
-    #[test]
-    fn test_commit() {
-        let test_size = 1 << 8;
-        let seed = Some(0);
-        let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
-        let mut points = generate_random_points(test_size, get_rng(seed));
-        let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
-
-        let msm_result = msm(&points, &scalars, 0);
-        let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
-        let mut h_commit_result = Point::zero();
-        d_commit_result.copy_to(&mut h_commit_result).unwrap();
-
-        assert_eq!(msm_result, h_commit_result);
-        assert_ne!(msm_result, Point::zero());
-        assert_ne!(h_commit_result, Point::zero());
-    }
-
-    #[test]
-    fn test_batch_commit() {
-        let batch_size = 4;
-        let test_size = 1 << 12;
-        let seed = Some(0);
-        let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
-        let points = generate_random_points(test_size * batch_size, get_rng(seed));
-        let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
-
-        let msm_result = msm_batch(&points, &scalars, batch_size, 0);
-        let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
-        let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
-        d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
-
-        assert_eq!(msm_result, h_commit_result);
-        for h in h_commit_result {
-            assert_ne!(h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_ntt() {
-        //NTT
-        let seed = None; //some value to fix the rng
-        let test_size = 1 << 3;
-
-        let scalars = generate_random_scalars(test_size, get_rng(seed));
-
-        let mut ntt_result = scalars.clone();
-        ntt(&mut ntt_result, 0);
-
-        assert_ne!(ntt_result, scalars);
-
-        let mut intt_result = ntt_result.clone();
-
-        intt(&mut intt_result, 0);
-
-        assert_eq!(intt_result, scalars);
-
-        //ECNTT
-        let points_proj = generate_random_points_proj(test_size, get_rng(seed));
-
-        test_naive_ark_ecntt(test_size);
-
-        assert!(points_proj[0].to_ark().into_affine().is_on_curve());
-
-        //naive ark
-        let points_proj_ark = points_proj
-            .iter()
-            .map(|p| p.to_ark())
-            .collect::<Vec<G1Projective>>();
-
-        let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
-
-        let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
-
-        assert_eq!(points_proj_ark, iecntt_result_naive);
-
-        //ingo gpu
-        let mut ecntt_result = points_proj.to_vec();
-        ecntt(&mut ecntt_result, 0);
-
-        assert_ne!(ecntt_result, points_proj);
-
-        let mut iecntt_result = ecntt_result.clone();
-        iecntt(&mut iecntt_result, 0);
-
-        assert_eq!(
-            iecntt_result_naive,
-            points_proj
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>()
-        );
-        assert_eq!(
-            iecntt_result
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>(),
-            points_proj
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>()
-        );
-    }
-
-    #[test]
-    fn test_ntt_batch() {
-        //NTT
-        let seed = None; //some value to fix the rng
-        let test_size = 1 << 5;
-        let batches = 4;
-
-        let scalars_batch: Vec<Scalar> =
-            generate_random_scalars(test_size * batches, get_rng(seed));
-
-        let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
-
-        for i in 0..batches {
-            scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
-        }
-
-        let mut ntt_result = scalars_batch.clone();
-
-        // do batch ntt
-        ntt_batch(&mut ntt_result, test_size, 0);
-
-        let mut ntt_result_vec_of_vec = Vec::new();
-
-        // do ntt for every chunk
-        for i in 0..batches {
-            ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
-            ntt(&mut ntt_result_vec_of_vec[i], 0);
-        }
-
-        // check that the ntt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                ntt_result_vec_of_vec[i],
-                ntt_result[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        // check that ntt output is different from input
-        assert_ne!(ntt_result, scalars_batch);
-
-        let mut intt_result = ntt_result.clone();
-
-        // do batch intt
-        intt_batch(&mut intt_result, test_size, 0);
-
-        let mut intt_result_vec_of_vec = Vec::new();
-
-        // do intt for every chunk
-        for i in 0..batches {
-            intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
-            intt(&mut intt_result_vec_of_vec[i], 0);
-        }
-
-        // check that the intt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                intt_result_vec_of_vec[i],
-                intt_result[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_eq!(intt_result, scalars_batch);
-
-        // //ECNTT
-        let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
-
-        let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
-
-        for i in 0..batches {
-            points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
-        }
-
-        let mut ntt_result_points = points_proj.clone();
-
-        // do batch ecintt
-        ecntt_batch(&mut ntt_result_points, test_size, 0);
-
-        let mut ntt_result_points_vec_of_vec = Vec::new();
-
-        for i in 0..batches {
-            ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
-            ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
-        }
-
-        for i in 0..batches {
-            assert_eq!(
-                ntt_result_points_vec_of_vec[i],
-                ntt_result_points[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_ne!(ntt_result_points, points_proj);
-
-        let mut intt_result_points = ntt_result_points.clone();
-
-        // do batch ecintt
-        iecntt_batch(&mut intt_result_points, test_size, 0);
-
-        let mut intt_result_points_vec_of_vec = Vec::new();
-
-        // do ecintt for every chunk
-        for i in 0..batches {
-            intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
-            iecntt(&mut intt_result_points_vec_of_vec[i], 0);
-        }
-
-        // check that the ecintt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                intt_result_points_vec_of_vec[i],
-                intt_result_points[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_eq!(intt_result_points, points_proj);
-    }
-
-    #[test]
-    fn test_scalar_interpolation() {
-        let log_test_size = 7;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
-
-        reverse_order_scalars(&mut d_evals);
-        let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
-        intt(&mut evals_mut, 0);
-        let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-
-        assert_eq!(h_coeffs, evals_mut);
-    }
-
-    #[test]
-    fn test_scalar_batch_interpolation() {
-        let batch_size = 4;
-        let log_test_size = 10;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
-
-        reverse_order_scalars_batch(&mut d_evals, batch_size);
-        let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
-        intt_batch(&mut evals_mut, test_size, 0);
-        let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-
-        assert_eq!(h_coeffs, evals_mut);
-    }
-
-    #[test]
-    fn test_point_interpolation() {
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
-
-        reverse_order_points(&mut d_evals);
-        let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
-        iecntt(&mut evals_mut[..], 0);
-        let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-        
-        assert_eq!(h_coeffs, *evals_mut);
-        for h in h_coeffs.iter() {
-            assert_ne!(*h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_interpolation() {
-        let batch_size = 4;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
-
-        reverse_order_points_batch(&mut d_evals, batch_size);
-        let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
-        iecntt_batch(&mut evals_mut[..], test_size, 0);
-        let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-        
-        assert_eq!(h_coeffs, *evals_mut);
-        for h in h_coeffs.iter() {
-            assert_ne!(*h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_scalar_evaluation() {
-        let log_test_domain_size = 8;
-        let coeff_size = 1 << 6;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
-        let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
-        for i in coeff_size.. (1 << log_test_domain_size) {
-            assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
-        }
-    }
-
-    #[test]
-    fn test_scalar_batch_evaluation() {
-        let batch_size = 6;
-        let log_test_domain_size = 8;
-        let domain_size = 1 << log_test_domain_size;
-        let coeff_size = 1 << 6;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
-        let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        for j in 0..batch_size {
-            assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
-            for i in coeff_size..domain_size {
-                assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
-            }
-        }
-    }
-
-    #[test]
-    fn test_point_evaluation() {
-        let log_test_domain_size = 7;
-        let coeff_size = 1 << 7;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
-        let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
-        let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
-        for i in coeff_size..(1 << log_test_domain_size) {
-            assert_eq!(Point::zero(), h_coeffs_domain[i]);
-        }
-        for i in 0..coeff_size {
-            assert_ne!(h_coeffs_domain[i], Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_evaluation() {
-        let batch_size = 4;
-        let log_test_domain_size = 6;
-        let domain_size = 1 << log_test_domain_size;
-        let coeff_size = 1 << 5;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
-        let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        for j in 0..batch_size {
-            assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
-            for i in coeff_size..domain_size {
-                assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
-            }
-            for i in j * domain_size..(j * domain_size + coeff_size) {
-                assert_ne!(h_coeffs_domain[i], Point::zero());
-            }
-        }
-    }
-
-    #[test]
-    fn test_scalar_evaluation_on_trivial_coset() {
-        // checks that the evaluations on the subgroup is the same as on the coset generated by 1
-        let log_test_domain_size = 8;
-        let coeff_size = 1 << 6;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
-        let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
-
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_coeffs[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_coeffs, h_evals_coset);
-    }
-
-    #[test]
-    fn test_scalar_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let log_test_size = 8;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
-        let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_evals[..], h_evals_large[..test_size]);
-        assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
-    }
-
-    #[test]
-    fn test_scalar_batch_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let batch_size = 4;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
-        let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        for i in 0..batch_size {
-            assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
-            assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
-        }
-    }
-
-    #[test]
-    fn test_point_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let log_test_size = 8;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
-        let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
-        let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_evals[..], h_evals_large[..test_size]);
-        assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
-        for i in 0..test_size {
-            assert_ne!(h_evals[i], Point::zero());
-            assert_ne!(h_evals_coset[i], Point::zero());
-            assert_ne!(h_evals_large[2 * i], Point::zero());
-            assert_ne!(h_evals_large[2 * i + 1], Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let batch_size = 2;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
-        let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        for i in 0..batch_size {
-            assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
-            assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
-        }
-        for i in 0..test_size * batch_size {
-            assert_ne!(h_evals[i], Point::zero());
-            assert_ne!(h_evals_coset[i], Point::zero());
-            assert_ne!(h_evals_large[2 * i], Point::zero());
-            assert_ne!(h_evals_large[2 * i + 1], Point::zero());
-        }
-    }
-
-    // testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
-    #[test]
-    fn test_matrix_multiplication() {
-        let seed = None; // some value to fix the rng
-        let test_size = 1 << 5;
-        let rou = Fr::get_root_of_unity(test_size).unwrap();
-        let matrix_flattened: Vec<Scalar> = (0..test_size).map(
-            |row_num| { (0..test_size).map( 
-                |col_num| {
-                    let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
-                    Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
-                }).collect::<Vec<Scalar>>()
-            }).flatten().collect::<Vec<_>>();
-        let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
-
-        let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
-        let mut ntt_result = vector.clone();
-        ntt(&mut ntt_result, 0);
-        
-        // we don't use the same roots of unity as arkworks, so the results are permutations
-        // of one another and the only guaranteed fixed scalars are the following ones:
-        assert_eq!(result[0], ntt_result[0]);
-        assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
-    }
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_vec_scalar_mul() {
-        let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
-        let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
-        mult_sc_vec(&mut intoo, &expected, 0);
-        assert_eq!(intoo, expected);
-    }
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_vec_point_mul() {
-        let dummy_one = Point {
-            x: Base::one(),
-            y: Base::one(),
-            z: Base::one(),
-        };
-
-        let mut inout = [dummy_one, dummy_one, Point::zero()];
-        let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
-        let expected = [dummy_one, Point::zero(), Point::zero()];
-        multp_vec(&mut inout, &scalars, 0);
-        assert_eq!(inout, expected);
-    }
-}
--- a/bn254/Cargo.toml
+++ b/bn254/Cargo.toml
@@ -1,34 +0,0 @@
-[package]
-name = "bn254"
-version = "0.1.0"
-edition = "2021"
-authors = [ "Ingonyama" ]
-
-[dependencies]
-icicle-core = { path = "../icicle-core" }
-
-hex = "*"
-ark-std = "0.3.0"
-ark-ff = "0.3.0"
-ark-poly = "0.3.0"
-ark-ec = { version = "0.3.0", features = [ "parallel" ] }
-ark-bn254 = "0.3.0"
-
-serde = { version = "1.0", features = ["derive"] }
-serde_derive = "1.0"
-serde_cbor = "0.11.2"
-
-rustacuda = "0.1"
-rustacuda_core = "0.1"
-rustacuda_derive = "0.1"
-
-rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
-
-[build-dependencies]
-cc = { version = "1.0", features = ["parallel"] }
-
-[dev-dependencies]
-"criterion" = "0.4.0"
-
-[features]
-g2 = []
--- a/bn254/build.rs
+++ b/bn254/build.rs
@@ -1,36 +0,0 @@
-use std::env;
-
-fn main() {
-    //TODO: check cargo features selected
-    //TODO: can conflict/duplicate with make ?
-
-    println!("cargo:rerun-if-env-changed=CXXFLAGS");
-    println!("cargo:rerun-if-changed=./icicle");
-
-    let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
-    let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));
-
-    let mut arch = String::from("-arch=");
-    arch.push_str(&arch_type);
-    let mut stream = String::from("-default-stream=");
-    stream.push_str(&stream_type);
-
-    let mut nvcc = cc::Build::new();
-
-    println!("Compiling icicle library using arch: {}", &arch);
-
-    if cfg!(feature = "g2") {
-        nvcc.define("G2_DEFINED", None);
-    }
-    nvcc.cuda(true);
-    nvcc.define("FEATURE_BN254", None);
-    nvcc.debug(false);
-    nvcc.flag(&arch);
-    nvcc.flag(&stream);
-    nvcc.shared_flag(false);
-    // nvcc.static_flag(true);
-    nvcc.files([
-        "../icicle-cuda/curves/index.cu",
-    ]);
-    nvcc.compile("ingo_icicle"); //TODO: extension??
-}
--- a/bn254/src/basic_structs/field.rs
+++ b/bn254/src/basic_structs/field.rs
@@ -1,4 +0,0 @@
-pub trait Field<const NUM_LIMBS: usize> {
-    const MODOLUS: [u32;NUM_LIMBS];
-    const LIMBS: usize = NUM_LIMBS;
-}
--- a/bn254/src/basic_structs/mod.rs
+++ b/bn254/src/basic_structs/mod.rs
@@ -1,3 +0,0 @@
-pub mod field; 
-pub mod scalar; 
-pub mod point; 
--- a/bn254/src/basic_structs/point.rs
+++ b/bn254/src/basic_structs/point.rs
@@ -1,108 +0,0 @@
-use std::ffi::c_uint;
-
-use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
-
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger256, PrimeField};
-use std::mem::transmute;
-use ark_ff::Field;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-
-use super::scalar::{get_fixed_limbs, self};
-
-
-#[derive(Debug, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointT<BF: scalar::ScalarTrait> {
-    pub x: BF,
-    pub y: BF,
-    pub z: BF,
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
-    fn default() -> Self {
-        PointT::zero()
-    }
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
-    pub fn zero() -> Self {
-        PointT {
-            x: BF::zero(),
-            y: BF::one(),
-            z: BF::zero(),
-        }
-    }
-
-    pub fn infinity() -> Self {
-        Self::zero()
-    }
-}
-
-#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointAffineNoInfinityT<BF> {
-    pub x: BF,
-    pub y: BF,
-}
-
-impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
-    fn default() -> Self {
-        PointAffineNoInfinityT {
-            x: BF::zero(),
-            y: BF::zero(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
-    ///From u32 limbs x,y
-    pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
-        PointAffineNoInfinityT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y)
-        }
-    }
-
-    pub fn limbs(&self) -> Vec<u32> {
-        [self.x.limbs(), self.y.limbs()].concat()
-    }
-
-    pub fn to_projective(&self) -> PointT<BF> {
-        PointT {
-            x: self.x,
-            y: self.y,
-            z: BF::one(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointT<BF>  {
-    pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
-        PointT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y),
-            z: BF::from_limbs(z)
-        }
-    }
-
-    pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
-        let l = value.len();
-        assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
-        PointT {
-            x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
-            y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
-            z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
-        }
-    }
-
-    pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
-        PointAffineNoInfinityT {
-            x: self.x,
-            y: self.y,
-        }
-    }
-}
--- a/bn254/src/basic_structs/scalar.rs
+++ b/bn254/src/basic_structs/scalar.rs
@@ -1,102 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-use std::mem::transmute;
-use rustacuda::prelude::*;
-use rustacuda_core::DevicePointer;
-use rustacuda::memory::{DeviceBox, CopyDestination};
-
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use std::marker::PhantomData;
-use std::convert::TryInto;
-
-use super::field::{Field, self};
-
-pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
-    match val.len() {
-        n if n < NUM_LIMBS => {
-            let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
-            padded[..val.len()].copy_from_slice(&val);
-            padded
-        }
-        n if n == NUM_LIMBS => val.try_into().unwrap(),
-        _ => panic!("slice has too many elements"),
-    }
-}
-
-pub trait ScalarTrait{
-    fn base_limbs() -> usize;
-    fn zero() -> Self;
-    fn from_limbs(value: &[u32]) -> Self;
-    fn one() -> Self;
-    fn to_bytes_le(&self) -> Vec<u8>;
-    fn limbs(&self) -> &[u32];
-}
-
-#[derive(Debug, PartialEq, Clone, Copy)]
-#[repr(C)]
-pub struct ScalarT<M, const NUM_LIMBS: usize> {
-    pub(crate) phantom: PhantomData<M>,
-    pub(crate) value : [u32; NUM_LIMBS]
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
-where
-    M: Field<NUM_LIMBS>,
-{
-
-    fn base_limbs() -> usize {
-        return NUM_LIMBS; 
-    }
-
-    fn zero() -> Self {
-        ScalarT {
-            value: [0u32; NUM_LIMBS],
-            phantom: PhantomData,
-        }
-    }
-
-    fn from_limbs(value: &[u32]) -> Self {
-        Self {
-            value: get_fixed_limbs(value),
-            phantom: PhantomData,
-        }
-    }
-
-    fn one() -> Self {
-        let mut s = [0u32; NUM_LIMBS];
-        s[0] = 1;
-        ScalarT { value: s, phantom: PhantomData }
-    }
-
-    fn to_bytes_le(&self) -> Vec<u8> {
-        self.value
-            .iter()
-            .map(|s| s.to_le_bytes().to_vec())
-            .flatten()
-            .collect::<Vec<_>>()
-    }
-
-    fn limbs(&self) -> &[u32] {
-        &self.value
-    }
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
-    pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-        Self::from_limbs(value)
-     }
- 
-    pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-         let mut value = value.to_vec();
-         value.reverse();
-         Self::from_limbs_le(&value)
-     }
- 
-     // Additional Functions
-     pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{  // overload + 
-         return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData }; 
-     }
-}
--- a/bn254/src/curve_structs.rs
+++ b/bn254/src/curve_structs.rs
@@ -1,62 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda_derive::DeviceCopy;
-use std::mem::transmute;
-use rustacuda::prelude::*;
-use rustacuda_core::DevicePointer;
-use rustacuda::memory::{DeviceBox, CopyDestination, DeviceCopy};
-
-use std::marker::PhantomData;
-use std::convert::TryInto;
-
-use crate::basic_structs::point::{PointT, PointAffineNoInfinityT};
-use crate::basic_structs::scalar::ScalarT;
-use crate::basic_structs::field::Field;
-
-
-#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
-#[repr(C)]
-pub struct ScalarField;
-impl Field<8> for ScalarField {
-    const MODOLUS: [u32; 8] = [0x0;8];
-}
-
-#[derive(Debug, PartialEq, Clone, Copy,DeviceCopy)]
-#[repr(C)]
-pub struct BaseField;
-impl Field<8> for BaseField {
-    const MODOLUS: [u32; 8] = [0x0;8];
-}
-
-
-pub type Scalar = ScalarT<ScalarField,8>;
-impl Default for Scalar {
-    fn default() -> Self {
-        Self{value: [0x0;ScalarField::LIMBS], phantom: PhantomData }
-    }
-}
-
-unsafe impl DeviceCopy for Scalar{}
-
-
-pub type Base = ScalarT<BaseField,8>;
-impl Default for Base {
-    fn default() -> Self {
-        Self{value: [0x0;BaseField::LIMBS], phantom: PhantomData }
-    }
-}
-
-unsafe impl DeviceCopy for Base{}
-
-pub type Point = PointT<Base>;
-pub type PointAffineNoInfinity = PointAffineNoInfinityT<Base>;
-
-extern "C" {
-    fn eq(point1: *const Point, point2: *const Point) -> c_uint;
-}
-
-impl PartialEq for Point {
-    fn eq(&self, other: &Self) -> bool {
-        unsafe { eq(self, other) != 0 }
-    }
-}
--- a/bn254/src/from_cuda.rs
+++ b/bn254/src/from_cuda.rs
@@ -1,797 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use ark_std::UniformRand;
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda::CudaFlags;
-use rustacuda::memory::DeviceBox;
-use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
-use rustacuda_core::DevicePointer;
-use std::mem::transmute;
-use crate::basic_structs::scalar::ScalarTrait;
-use crate::curve_structs::*;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-use std::marker::PhantomData;
-use std::convert::TryInto;
-use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger384, BigInteger256, PrimeField};
-use rustacuda::memory::{CopyDestination, DeviceCopy};
-
-extern "C" {
-    fn msm_cuda(
-        out: *mut Point,
-        points: *const PointAffineNoInfinity,
-        scalars: *const Scalar,
-        count: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn msm_batch_cuda(
-        out: *mut Point,
-        points: *const PointAffineNoInfinity,
-        scalars: *const Scalar,
-        batch_size: usize,
-        msm_size: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn commit_cuda(
-        d_out: DevicePointer<Point>,
-        d_scalars: DevicePointer<Scalar>,
-        d_points: DevicePointer<PointAffineNoInfinity>,
-        count: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn commit_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_scalars: DevicePointer<Scalar>,
-        d_points: DevicePointer<PointAffineNoInfinity>,
-        count: usize,
-        batch_size: usize,
-        device_id: usize,
-    ) -> c_uint;
-
-    fn build_domain_cuda(domain_size: usize, logn: usize, inverse: bool, device_id: usize) -> DevicePointer<Scalar>;
-
-    fn ntt_cuda(inout: *mut Scalar, n: usize, inverse: bool, device_id: usize) -> c_int;
-
-    fn ecntt_cuda(inout: *mut Point, n: usize, inverse: bool, device_id: usize) -> c_int;
-
-    fn ntt_batch_cuda(
-        inout: *mut Scalar,
-        arr_size: usize,
-        n: usize,
-        inverse: bool,
-    ) -> c_int;
-
-    fn ecntt_batch_cuda(inout: *mut Point, arr_size: usize, n: usize, inverse: bool) -> c_int;
-
-    fn interpolate_scalars_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_evaluations: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>, 
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_scalars_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_evaluations: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_points_cuda(
-        d_out: DevicePointer<Point>,
-        d_evaluations: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn interpolate_points_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_evaluations: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_on_coset_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_scalars_on_coset_batch_cuda(
-        d_out: DevicePointer<Scalar>,
-        d_coefficients: DevicePointer<Scalar>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_on_coset_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn evaluate_points_on_coset_batch_cuda(
-        d_out: DevicePointer<Point>,
-        d_coefficients: DevicePointer<Point>,
-        d_domain: DevicePointer<Scalar>,
-        domain_size: usize,
-        n: usize,
-        batch_size: usize,
-        coset_powers: DevicePointer<Scalar>,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_scalars_cuda(
-        d_arr: DevicePointer<Scalar>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_scalars_batch_cuda(
-        d_arr: DevicePointer<Scalar>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_points_cuda(
-        d_arr: DevicePointer<Point>,
-        n: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn reverse_order_points_batch_cuda(
-        d_arr: DevicePointer<Point>,
-        n: usize,
-        batch_size: usize,
-        device_id: usize
-    ) -> c_int;
-
-    fn vec_mod_mult_point(
-        inout: *mut Point,
-        scalars: *const Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-
-    fn vec_mod_mult_scalar(
-        inout: *mut Scalar,
-        scalars: *const Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-
-    fn matrix_vec_mod_mult(
-        matrix_flattened: *const Scalar,
-        input: *const Scalar,
-        output: *mut Scalar,
-        n_elements: usize,
-        device_id: usize,
-    ) -> c_int;
-}
-
-pub fn msm(points: &[PointAffineNoInfinity], scalars: &[Scalar], device_id: usize) -> Point {
-    let count = points.len();
-    if count != scalars.len() {
-        todo!("variable length")
-    }
-    let mut ret = Point::zero();
-    unsafe {
-        msm_cuda(
-            &mut ret as *mut _ as *mut Point,
-            points as *const _ as *const PointAffineNoInfinity,
-            scalars as *const _ as *const Scalar,
-            scalars.len(),
-            device_id,
-        )
-    };
-
-    ret
-}
-
-pub fn msm_batch(
-    points: &[PointAffineNoInfinity],
-    scalars: &[Scalar],
-    batch_size: usize,
-    device_id: usize,
-) -> Vec<Point> {
-    let count = points.len();
-    if count != scalars.len() {
-        todo!("variable length")
-    }
-
-    let mut ret = vec![Point::zero(); batch_size];
-
-    unsafe {
-        msm_batch_cuda(
-            &mut ret[0] as *mut _ as *mut Point,
-            points as *const _ as *const PointAffineNoInfinity,
-            scalars as *const _ as *const Scalar,
-            batch_size,
-            count / batch_size,
-            device_id,
-        )
-    };
-
-    ret
-}
-
-pub fn commit(
-    points: &mut DeviceBuffer<PointAffineNoInfinity>,
-    scalars: &mut DeviceBuffer<Scalar>,
-) -> DeviceBox<Point> {
-    let mut res = DeviceBox::new(&Point::zero()).unwrap();
-    unsafe {
-        commit_cuda(
-            res.as_device_ptr(),
-            scalars.as_device_ptr(),
-            points.as_device_ptr(),
-            scalars.len(),
-            0,
-        );
-    }
-    return res;
-}
-
-pub fn commit_batch(
-    points: &mut DeviceBuffer<PointAffineNoInfinity>,
-    scalars: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(batch_size).unwrap() };
-    unsafe {
-        commit_batch_cuda(
-            res.as_device_ptr(),
-            scalars.as_device_ptr(),
-            points.as_device_ptr(),
-            scalars.len() / batch_size,
-            batch_size,
-            0,
-        );
-    }
-    return res;
-}
-
-/// Compute an in-place NTT on the input data.
-fn ntt_internal(values: &mut [Scalar], device_id: usize, inverse: bool) -> i32 {
-    let ret_code = unsafe {
-        ntt_cuda(
-            values as *mut _ as *mut Scalar,
-            values.len(),
-            inverse,
-            device_id,
-        )
-    };
-    ret_code
-}
-
-pub fn ntt(values: &mut [Scalar], device_id: usize) {
-    ntt_internal(values, device_id, false);
-}
-
-pub fn intt(values: &mut [Scalar], device_id: usize) {
-    ntt_internal(values, device_id, true);
-}
-
-/// Compute an in-place NTT on the input data.
-fn ntt_internal_batch(
-    values: &mut [Scalar],
-    device_id: usize,
-    batch_size: usize,
-    inverse: bool,
-) -> i32 {
-    unsafe {
-        ntt_batch_cuda(
-            values as *mut _ as *mut Scalar,
-            values.len(),
-            batch_size,
-            inverse,
-        )
-    }
-}
-
-pub fn ntt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
-    ntt_internal_batch(values, 0, batch_size, false);
-}
-
-pub fn intt_batch(values: &mut [Scalar], batch_size: usize, device_id: usize) {
-    ntt_internal_batch(values, 0, batch_size, true);
-}
-
-/// Compute an in-place ECNTT on the input data.
-fn ecntt_internal(values: &mut [Point], inverse: bool, device_id: usize) -> i32 {
-    unsafe {
-        ecntt_cuda(
-            values as *mut _ as *mut Point,
-            values.len(),
-            inverse,
-            device_id,
-        )
-    }
-}
-
-pub fn ecntt(values: &mut [Point], device_id: usize) {
-    ecntt_internal(values, false, device_id);
-}
-
-/// Compute an in-place iECNTT on the input data.
-pub fn iecntt(values: &mut [Point], device_id: usize) {
-    ecntt_internal(values, true, device_id);
-}
-
-/// Compute an in-place ECNTT on the input data.
-fn ecntt_internal_batch(
-    values: &mut [Point],
-    device_id: usize,
-    batch_size: usize,
-    inverse: bool,
-) -> i32 {
-    unsafe {
-        ecntt_batch_cuda(
-            values as *mut _ as *mut Point,
-            values.len(),
-            batch_size,
-            inverse,
-        )
-    }
-}
-
-pub fn ecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
-    ecntt_internal_batch(values, 0, batch_size, false);
-}
-
-/// Compute an in-place iECNTT on the input data.
-pub fn iecntt_batch(values: &mut [Point], batch_size: usize, device_id: usize) {
-    ecntt_internal_batch(values, 0, batch_size, true);
-}
-
-pub fn build_domain(domain_size: usize, logn: usize, inverse: bool) -> DeviceBuffer<Scalar> {
-    unsafe {
-        DeviceBuffer::from_raw_parts(build_domain_cuda(
-            domain_size,
-            logn,
-            inverse,
-            0
-        ), domain_size)
-    }
-}
-
-
-pub fn reverse_order_scalars(
-    d_scalars: &mut DeviceBuffer<Scalar>,
-) {
-    unsafe { reverse_order_scalars_cuda(
-        d_scalars.as_device_ptr(),
-        d_scalars.len(),
-        0
-    ); }
-}
-
-pub fn reverse_order_scalars_batch(
-    d_scalars: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) {
-    unsafe { reverse_order_scalars_batch_cuda(
-        d_scalars.as_device_ptr(),
-        d_scalars.len() / batch_size,
-        batch_size,
-        0
-    ); }
-}
-
-pub fn reverse_order_points(
-    d_points: &mut DeviceBuffer<Point>,
-) {
-    unsafe { reverse_order_points_cuda(
-        d_points.as_device_ptr(),
-        d_points.len(),
-        0
-    ); }
-}
-
-pub fn reverse_order_points_batch(
-    d_points: &mut DeviceBuffer<Point>,
-    batch_size: usize,
-) {
-    unsafe { reverse_order_points_batch_cuda(
-        d_points.as_device_ptr(),
-        d_points.len() / batch_size,
-        batch_size,
-        0
-    ); }
-}
-
-pub fn interpolate_scalars(
-    d_evaluations: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe { interpolate_scalars_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_scalars_batch(
-    d_evaluations: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe { interpolate_scalars_batch_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        batch_size,
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_points(
-    d_evaluations: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe { interpolate_points_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        0
-    ) };
-    return res;
-}
-
-pub fn interpolate_points_batch(
-    d_evaluations: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe { interpolate_points_batch_cuda(
-        res.as_device_ptr(),
-        d_evaluations.as_device_ptr(),
-        d_domain.as_device_ptr(),
-        d_domain.len(),
-        batch_size,
-        0
-    ) };
-    return res;
-}
-
-pub fn evaluate_scalars(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_scalars_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_batch(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_scalars_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_points_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_batch(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_points_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_on_coset(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_scalars_on_coset_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_scalars_on_coset_batch(
-    d_coefficients: &mut DeviceBuffer<Scalar>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Scalar> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_scalars_on_coset_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_on_coset(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len()).unwrap() };
-    unsafe {
-        evaluate_points_on_coset_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len(),
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn evaluate_points_on_coset_batch(
-    d_coefficients: &mut DeviceBuffer<Point>,
-    d_domain: &mut DeviceBuffer<Scalar>,
-    batch_size: usize,
-    coset_powers: &mut DeviceBuffer<Scalar>,
-) -> DeviceBuffer<Point> {
-    let mut res = unsafe { DeviceBuffer::uninitialized(d_domain.len() * batch_size).unwrap() };
-    unsafe {
-        evaluate_points_on_coset_batch_cuda(
-            res.as_device_ptr(),
-            d_coefficients.as_device_ptr(),
-            d_domain.as_device_ptr(),
-            d_domain.len(),
-            d_coefficients.len() / batch_size,
-            batch_size,
-            coset_powers.as_device_ptr(),
-            0
-        );
-    }
-    return res;
-}
-
-pub fn multp_vec(a: &mut [Point], b: &[Scalar], device_id: usize) {
-    assert_eq!(a.len(), b.len());
-    unsafe {
-        vec_mod_mult_point(
-            a as *mut _ as *mut Point,
-            b as *const _ as *const Scalar,
-            a.len(),
-            device_id,
-        );
-    }
-}
-
-pub fn mult_sc_vec(a: &mut [Scalar], b: &[Scalar], device_id: usize) {
-    assert_eq!(a.len(), b.len());
-    unsafe {
-        vec_mod_mult_scalar(
-            a as *mut _ as *mut Scalar,
-            b as *const _ as *const Scalar,
-            a.len(),
-            device_id,
-        );
-    }
-}
-
-// Multiply a matrix by a scalar:
-//  `a` - flattenned matrix;
-//  `b` - vector to multiply `a` by;
-pub fn mult_matrix_by_vec(a: &[Scalar], b: &[Scalar], device_id: usize) -> Vec<Scalar> {
-    let mut c = Vec::with_capacity(b.len());
-    for i in 0..b.len() {
-        c.push(Scalar::zero());
-    }
-    unsafe {
-        matrix_vec_mod_mult(
-            a as *const _ as *const Scalar,
-            b as *const _ as *const Scalar,
-            c.as_mut_slice() as *mut _ as *mut Scalar,
-            b.len(),
-            device_id,
-        );
-    }
-    c
-}
-
-pub fn clone_buffer<T: DeviceCopy>(buf: &mut DeviceBuffer<T>) -> DeviceBuffer<T> {
-    let mut buf_cpy = unsafe { DeviceBuffer::uninitialized(buf.len()).unwrap() };
-    unsafe { buf_cpy.copy_from(buf) };
-    return buf_cpy;
-}
-
-pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> {
-    let rng: Box<dyn RngCore> = match seed {
-        Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
-        None => Box::new(rand::thread_rng()),
-    };
-    rng
-}
-
-fn set_up_device() {
-    // Set up the context, load the module, and create a stream to run kernels in.
-    rustacuda::init(CudaFlags::empty()).unwrap();
-    let device = Device::get_device(0).unwrap();
-    let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device).unwrap();
-}
-
-pub fn generate_random_points(
-    count: usize,
-    mut rng: Box<dyn RngCore>,
-) -> Vec<PointAffineNoInfinity> {
-    (0..count)
-        .map(|_| Point::from_ark(G1Projective_BN254::rand(&mut rng)).to_xy_strip_z())
-        .collect()
-}
-
-pub fn generate_random_points_proj(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Point> {
-    (0..count)
-        .map(|_| Point::from_ark(G1Projective_BN254::rand(&mut rng)))
-        .collect()
-}
-
-pub fn generate_random_scalars(count: usize, mut rng: Box<dyn RngCore>) -> Vec<Scalar> {
-    (0..count)
-        .map(|_| Scalar::from_ark(Fr_BN254::rand(&mut rng).into_repr()))
-        .collect()
-}
-
-pub fn set_up_points(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Point>, DeviceBuffer<Point>, DeviceBuffer<Scalar>) {
-    set_up_device();
-
-    let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
-
-    let seed = Some(0); // fix the rng to get two equal scalar 
-    let vector = generate_random_points_proj(test_size, get_rng(seed));
-    let mut vector_mut = vector.clone();
-
-    let mut d_vector = DeviceBuffer::from_slice(&vector[..]).unwrap();
-    (vector_mut, d_vector, d_domain)
-}
-
-pub fn set_up_scalars(test_size: usize, log_domain_size: usize, inverse: bool) -> (Vec<Scalar>, DeviceBuffer<Scalar>, DeviceBuffer<Scalar>) {
-    set_up_device();
-
-    let d_domain = build_domain(1 << log_domain_size, log_domain_size, inverse);
-
-    let seed = Some(0); // fix the rng to get two equal scalars
-    let mut vector_mut = generate_random_scalars(test_size, get_rng(seed));
-
-    let mut d_vector = DeviceBuffer::from_slice(&vector_mut[..]).unwrap();
-    (vector_mut, d_vector, d_domain)
-}
-
--- a/bn254/src/lib.rs
+++ b/bn254/src/lib.rs
@@ -1,4 +0,0 @@
-pub mod test_bn254;
-pub mod basic_structs;
-pub mod from_cuda;
-pub mod curve_structs;
--- a/bn254/src/test_bn254.rs
+++ b/bn254/src/test_bn254.rs
@@ -1,816 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use ark_std::UniformRand;
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda::CudaFlags;
-use rustacuda::memory::DeviceBox;
-use rustacuda::prelude::{DeviceBuffer, Device, ContextFlags, Context};
-use rustacuda_core::DevicePointer;
-use std::mem::transmute;
-pub use crate::basic_structs::scalar::ScalarTrait;
-pub use crate::curve_structs::*;
-use icicle_core::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-use std::marker::PhantomData;
-use std::convert::TryInto;
-use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger384, BigInteger256, PrimeField};
-use rustacuda::memory::{CopyDestination, DeviceCopy};
-
-
-impl Scalar {
-    pub fn to_biginteger254(&self) -> BigInteger256 {
-        BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn to_ark(&self) -> BigInteger256 {
-        BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn from_biginteger256(ark: BigInteger256) -> Self {
-        Self{ value: u64_vec_to_u32_vec(&ark.0).try_into().unwrap(), phantom : PhantomData}
-    }
-
-    pub fn to_biginteger256_transmute(&self) -> BigInteger256 {
-        unsafe { transmute(*self) }
-    }
-
-    pub fn from_biginteger_transmute(v: BigInteger256) -> Scalar {
-        Scalar{ value: unsafe{ transmute(v)}, phantom : PhantomData }
-    }
-
-    pub fn to_ark_transmute(&self) -> Fr_BN254 {
-        unsafe { std::mem::transmute(*self) }
-    }
-
-    pub fn from_ark_transmute(v: &Fr_BN254) -> Scalar {
-        unsafe { std::mem::transmute_copy(v) }
-    }
-
-    pub fn to_ark_mod_p(&self) -> Fr_BN254 {
-        Fr_BN254::new(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap()))
-    }
-
-    pub fn to_ark_repr(&self) -> Fr_BN254 {
-        Fr_BN254::from_repr(BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())).unwrap()
-    }
-
-    pub fn from_ark(v: BigInteger256) -> Scalar {
-        Self { value : u64_vec_to_u32_vec(&v.0).try_into().unwrap(), phantom: PhantomData}
-    }
-
-}
-
-impl Base {
-    pub fn to_ark(&self) -> BigInteger256 {
-        BigInteger256::new(u32_vec_to_u64_vec(&self.limbs()).try_into().unwrap())
-    }
-
-    pub fn from_ark(ark: BigInteger256) -> Self {
-        Self::from_limbs(&u64_vec_to_u32_vec(&ark.0))
-    }
-}
-
-
-impl Point {
-    pub fn to_ark(&self) -> G1Projective_BN254 {
-        self.to_ark_affine().into_projective()
-    }
-
-    pub fn to_ark_affine(&self) -> G1Affine_BN254 {
-        //TODO: generic conversion
-        use ark_ff::Field;
-        use std::ops::Mul;
-        let proj_x_field = Fq_BN254::from_le_bytes_mod_order(&self.x.to_bytes_le());
-        let proj_y_field = Fq_BN254::from_le_bytes_mod_order(&self.y.to_bytes_le());
-        let proj_z_field = Fq_BN254::from_le_bytes_mod_order(&self.z.to_bytes_le());
-        let inverse_z = proj_z_field.inverse().unwrap();
-        let aff_x = proj_x_field.mul(inverse_z);
-        let aff_y = proj_y_field.mul(inverse_z);
-        G1Affine_BN254::new(aff_x, aff_y, false)
-    }
-
-    pub fn from_ark(ark: G1Projective_BN254) -> Point {
-        use ark_ff::Field;
-        let z_inv = ark.z.inverse().unwrap();
-        let z_invsq = z_inv * z_inv;
-        let z_invq3 = z_invsq * z_inv;
-        Point {
-            x: Base::from_ark((ark.x * z_invsq).into_repr()),
-            y: Base::from_ark((ark.y * z_invq3).into_repr()),
-            z: Base::one(),
-        }
-    }
-}
-
-impl PointAffineNoInfinity {
-
-    pub fn to_ark(&self) -> G1Affine_BN254 {
-        G1Affine_BN254::new(Fq_BN254::new(self.x.to_ark()), Fq_BN254::new(self.y.to_ark()), false)
-    }
-
-    pub fn to_ark_repr(&self) -> G1Affine_BN254 {
-        G1Affine_BN254::new(
-            Fq_BN254::from_repr(self.x.to_ark()).unwrap(),
-            Fq_BN254::from_repr(self.y.to_ark()).unwrap(),
-            false,
-        )
-    }
-
-    pub fn from_ark(p: &G1Affine_BN254) -> Self {
-        PointAffineNoInfinity {
-            x: Base::from_ark(p.x.into_repr()),
-            y: Base::from_ark(p.y.into_repr()),
-        }
-    }
-}
-
-impl Point {
-    pub fn to_affine(&self) -> PointAffineNoInfinity {
-        let ark_affine = self.to_ark_affine();
-        PointAffineNoInfinity {
-            x: Base::from_ark(ark_affine.x.into_repr()),
-            y: Base::from_ark(ark_affine.y.into_repr()),
-        }
-    }
-}
-
-
-#[cfg(test)]
-pub(crate) mod tests_bn254 {
-    use std::ops::Add;
-    use ark_bn254::{Fr, G1Affine, G1Projective};
-    use ark_ec::{msm::VariableBaseMSM, AffineCurve, ProjectiveCurve};
-    use ark_ff::{FftField, Field, Zero, PrimeField};
-    use ark_std::UniformRand;
-    use rustacuda::prelude::{DeviceBuffer, CopyDestination};
-    use crate::curve_structs::{Point, Scalar, Base};
-    use crate::basic_structs::scalar::ScalarTrait;
-    use crate::from_cuda::{generate_random_points, get_rng, generate_random_scalars, msm, msm_batch, set_up_scalars, commit, commit_batch, ntt, intt, generate_random_points_proj, ecntt, iecntt, ntt_batch, ecntt_batch, iecntt_batch, intt_batch, reverse_order_scalars_batch, interpolate_scalars_batch, set_up_points, reverse_order_points, interpolate_points, reverse_order_points_batch, interpolate_points_batch, evaluate_scalars, interpolate_scalars, reverse_order_scalars, evaluate_points, build_domain, evaluate_scalars_on_coset, evaluate_points_on_coset, mult_matrix_by_vec, mult_sc_vec, multp_vec,evaluate_scalars_batch, evaluate_points_batch, evaluate_scalars_on_coset_batch, evaluate_points_on_coset_batch};
-
-    fn random_points_ark_proj(nof_elements: usize) -> Vec<G1Projective> {
-        let mut rng = ark_std::rand::thread_rng();
-        let mut points_ga: Vec<G1Projective> = Vec::new();
-        for _ in 0..nof_elements {
-            let aff = G1Projective::rand(&mut rng);
-            points_ga.push(aff);
-        }
-        points_ga
-    }
-
-    fn ecntt_arc_naive(
-        points: &Vec<G1Projective>,
-        size: usize,
-        inverse: bool,
-    ) -> Vec<G1Projective> {
-        let mut result: Vec<G1Projective> = Vec::new();
-        for _ in 0..size {
-            result.push(G1Projective::zero());
-        }
-        let rou: Fr;
-        if !inverse {
-            rou = Fr::get_root_of_unity(size).unwrap();
-        } else {
-            rou = Fr::inverse(&Fr::get_root_of_unity(size).unwrap()).unwrap();
-        }
-        for k in 0..size {
-            for l in 0..size {
-                let pow: [u64; 1] = [(l * k).try_into().unwrap()];
-                let mul_rou = Fr::pow(&rou, &pow);
-                result[k] = result[k].add(points[l].into_affine().mul(mul_rou));
-            }
-        }
-        if inverse {
-            let size2 = size as u64;
-            for k in 0..size {
-                let multfactor = Fr::inverse(&Fr::from(size2)).unwrap();
-                result[k] = result[k].into_affine().mul(multfactor);
-            }
-        }
-        return result;
-    }
-
-    fn check_eq(points: &Vec<G1Projective>, points2: &Vec<G1Projective>) -> bool {
-        let mut eq = true;
-        for i in 0..points.len() {
-            if points2[i].ne(&points[i]) {
-                eq = false;
-                break;
-            }
-        }
-        return eq;
-    }
-
-    fn test_naive_ark_ecntt(size: usize) {
-        let points = random_points_ark_proj(size);
-        let result1: Vec<G1Projective> = ecntt_arc_naive(&points, size, false);
-        let result2: Vec<G1Projective> = ecntt_arc_naive(&result1, size, true);
-        assert!(!check_eq(&result2, &result1));
-        assert!(check_eq(&result2, &points));
-    }
-
-    #[test]
-    fn test_msm() {
-        let test_sizes = [6, 9];
-
-        for pow2 in test_sizes {
-            let count = 1 << pow2;
-            let seed = None; // set Some to provide seed
-            let points = generate_random_points(count, get_rng(seed));
-            let scalars = generate_random_scalars(count, get_rng(seed));
-
-            let msm_result = msm(&points, &scalars, 0);
-
-            let point_r_ark: Vec<_> = points.iter().map(|x| x.to_ark_repr()).collect();
-            let scalars_r_ark: Vec<_> = scalars.iter().map(|x| x.to_ark()).collect();
-
-            let msm_result_ark = VariableBaseMSM::multi_scalar_mul(&point_r_ark, &scalars_r_ark);
-
-            assert_eq!(msm_result.to_ark_affine(), msm_result_ark);
-            assert_eq!(msm_result.to_ark(), msm_result_ark);
-            assert_eq!(
-                msm_result.to_ark_affine(),
-                Point::from_ark(msm_result_ark).to_ark_affine()
-            );
-        }
-    }
-
-    #[test]
-    fn test_batch_msm() {
-        for batch_pow2 in [2, 4] {
-            for pow2 in [4, 6] {
-                let msm_size = 1 << pow2;
-                let batch_size = 1 << batch_pow2;
-                let seed = None; // set Some to provide seed
-                let points_batch = generate_random_points(msm_size * batch_size, get_rng(seed));
-                let scalars_batch = generate_random_scalars(msm_size * batch_size, get_rng(seed));
-
-                let point_r_ark: Vec<_> = points_batch.iter().map(|x| x.to_ark_repr()).collect();
-                let scalars_r_ark: Vec<_> = scalars_batch.iter().map(|x| x.to_ark()).collect();
-
-                let expected: Vec<_> = point_r_ark
-                    .chunks(msm_size)
-                    .zip(scalars_r_ark.chunks(msm_size))
-                    .map(|p| Point::from_ark(VariableBaseMSM::multi_scalar_mul(p.0, p.1)))
-                    .collect();
-
-                let result = msm_batch(&points_batch, &scalars_batch, batch_size, 0);
-
-                assert_eq!(result, expected);
-            }
-        }
-    }
-
-    #[test]
-    fn test_commit() {
-        let test_size = 1 << 8;
-        let seed = Some(0);
-        let (mut scalars, mut d_scalars, _) = set_up_scalars(test_size, 0, false);
-        let mut points = generate_random_points(test_size, get_rng(seed));
-        let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
-
-        let msm_result = msm(&points, &scalars, 0);
-        let mut d_commit_result = commit(&mut d_points, &mut d_scalars);
-        let mut h_commit_result = Point::zero();
-        d_commit_result.copy_to(&mut h_commit_result).unwrap();
-
-        assert_eq!(msm_result, h_commit_result);
-        assert_ne!(msm_result, Point::zero());
-        assert_ne!(h_commit_result, Point::zero());
-    }
-
-    #[test]
-    fn test_batch_commit() {
-        let batch_size = 4;
-        let test_size = 1 << 12;
-        let seed = Some(0);
-        let (scalars, mut d_scalars, _) = set_up_scalars(test_size * batch_size, 0, false);
-        let points = generate_random_points(test_size * batch_size, get_rng(seed));
-        let mut d_points = DeviceBuffer::from_slice(&points[..]).unwrap();
-
-        let msm_result = msm_batch(&points, &scalars, batch_size, 0);
-        let mut d_commit_result = commit_batch(&mut d_points, &mut d_scalars, batch_size);
-        let mut h_commit_result: Vec<Point> = (0..batch_size).map(|_| Point::zero()).collect();
-        d_commit_result.copy_to(&mut h_commit_result[..]).unwrap();
-
-        assert_eq!(msm_result, h_commit_result);
-        for h in h_commit_result {
-            assert_ne!(h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_ntt() {
-        //NTT
-        let seed = None; //some value to fix the rng
-        let test_size = 1 << 3;
-
-        let scalars = generate_random_scalars(test_size, get_rng(seed));
-
-        let mut ntt_result = scalars.clone();
-        ntt(&mut ntt_result, 0);
-
-        assert_ne!(ntt_result, scalars);
-
-        let mut intt_result = ntt_result.clone();
-
-        intt(&mut intt_result, 0);
-
-        assert_eq!(intt_result, scalars);
-
-        //ECNTT
-        let points_proj = generate_random_points_proj(test_size, get_rng(seed));
-
-        test_naive_ark_ecntt(test_size);
-
-        assert!(points_proj[0].to_ark().into_affine().is_on_curve());
-
-        //naive ark
-        let points_proj_ark = points_proj
-            .iter()
-            .map(|p| p.to_ark())
-            .collect::<Vec<G1Projective>>();
-
-        let ecntt_result_naive = ecntt_arc_naive(&points_proj_ark, points_proj_ark.len(), false);
-
-        let iecntt_result_naive = ecntt_arc_naive(&ecntt_result_naive, points_proj_ark.len(), true);
-
-        assert_eq!(points_proj_ark, iecntt_result_naive);
-
-        //ingo gpu
-        let mut ecntt_result = points_proj.to_vec();
-        ecntt(&mut ecntt_result, 0);
-
-        assert_ne!(ecntt_result, points_proj);
-
-        let mut iecntt_result = ecntt_result.clone();
-        iecntt(&mut iecntt_result, 0);
-
-        assert_eq!(
-            iecntt_result_naive,
-            points_proj
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>()
-        );
-        assert_eq!(
-            iecntt_result
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>(),
-            points_proj
-                .iter()
-                .map(|p| p.to_ark_affine())
-                .collect::<Vec<G1Affine>>()
-        );
-    }
-
-    #[test]
-    fn test_ntt_batch() {
-        //NTT
-        let seed = None; //some value to fix the rng
-        let test_size = 1 << 5;
-        let batches = 4;
-
-        let scalars_batch: Vec<Scalar> =
-            generate_random_scalars(test_size * batches, get_rng(seed));
-
-        let mut scalar_vec_of_vec: Vec<Vec<Scalar>> = Vec::new();
-
-        for i in 0..batches {
-            scalar_vec_of_vec.push(scalars_batch[i * test_size..(i + 1) * test_size].to_vec());
-        }
-
-        let mut ntt_result = scalars_batch.clone();
-
-        // do batch ntt
-        ntt_batch(&mut ntt_result, test_size, 0);
-
-        let mut ntt_result_vec_of_vec = Vec::new();
-
-        // do ntt for every chunk
-        for i in 0..batches {
-            ntt_result_vec_of_vec.push(scalar_vec_of_vec[i].clone());
-            ntt(&mut ntt_result_vec_of_vec[i], 0);
-        }
-
-        // check that the ntt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                ntt_result_vec_of_vec[i],
-                ntt_result[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        // check that ntt output is different from input
-        assert_ne!(ntt_result, scalars_batch);
-
-        let mut intt_result = ntt_result.clone();
-
-        // do batch intt
-        intt_batch(&mut intt_result, test_size, 0);
-
-        let mut intt_result_vec_of_vec = Vec::new();
-
-        // do intt for every chunk
-        for i in 0..batches {
-            intt_result_vec_of_vec.push(ntt_result_vec_of_vec[i].clone());
-            intt(&mut intt_result_vec_of_vec[i], 0);
-        }
-
-        // check that the intt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                intt_result_vec_of_vec[i],
-                intt_result[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_eq!(intt_result, scalars_batch);
-
-        // //ECNTT
-        let points_proj = generate_random_points_proj(test_size * batches, get_rng(seed));
-
-        let mut points_vec_of_vec: Vec<Vec<Point>> = Vec::new();
-
-        for i in 0..batches {
-            points_vec_of_vec.push(points_proj[i * test_size..(i + 1) * test_size].to_vec());
-        }
-
-        let mut ntt_result_points = points_proj.clone();
-
-        // do batch ecintt
-        ecntt_batch(&mut ntt_result_points, test_size, 0);
-
-        let mut ntt_result_points_vec_of_vec = Vec::new();
-
-        for i in 0..batches {
-            ntt_result_points_vec_of_vec.push(points_vec_of_vec[i].clone());
-            ecntt(&mut ntt_result_points_vec_of_vec[i], 0);
-        }
-
-        for i in 0..batches {
-            assert_eq!(
-                ntt_result_points_vec_of_vec[i],
-                ntt_result_points[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_ne!(ntt_result_points, points_proj);
-
-        let mut intt_result_points = ntt_result_points.clone();
-
-        // do batch ecintt
-        iecntt_batch(&mut intt_result_points, test_size, 0);
-
-        let mut intt_result_points_vec_of_vec = Vec::new();
-
-        // do ecintt for every chunk
-        for i in 0..batches {
-            intt_result_points_vec_of_vec.push(ntt_result_points_vec_of_vec[i].clone());
-            iecntt(&mut intt_result_points_vec_of_vec[i], 0);
-        }
-
-        // check that the ecintt of each vec of scalars is equal to the intt of the specific batch
-        for i in 0..batches {
-            assert_eq!(
-                intt_result_points_vec_of_vec[i],
-                intt_result_points[i * test_size..(i + 1) * test_size]
-            );
-        }
-
-        assert_eq!(intt_result_points, points_proj);
-    }
-
-    #[test]
-    fn test_scalar_interpolation() {
-        let log_test_size = 7;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size, log_test_size, true);
-
-        reverse_order_scalars(&mut d_evals);
-        let mut d_coeffs = interpolate_scalars(&mut d_evals, &mut d_domain);
-        intt(&mut evals_mut, 0);
-        let mut h_coeffs: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-
-        assert_eq!(h_coeffs, evals_mut);
-    }
-
-    #[test]
-    fn test_scalar_batch_interpolation() {
-        let batch_size = 4;
-        let log_test_size = 10;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, true);
-
-        reverse_order_scalars_batch(&mut d_evals, batch_size);
-        let mut d_coeffs = interpolate_scalars_batch(&mut d_evals, &mut d_domain, batch_size);
-        intt_batch(&mut evals_mut, test_size, 0);
-        let mut h_coeffs: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-
-        assert_eq!(h_coeffs, evals_mut);
-    }
-
-    #[test]
-    fn test_point_interpolation() {
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size, log_test_size, true);
-
-        reverse_order_points(&mut d_evals);
-        let mut d_coeffs = interpolate_points(&mut d_evals, &mut d_domain);
-        iecntt(&mut evals_mut[..], 0);
-        let mut h_coeffs: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-        
-        assert_eq!(h_coeffs, *evals_mut);
-        for h in h_coeffs.iter() {
-            assert_ne!(*h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_interpolation() {
-        let batch_size = 4;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (mut evals_mut, mut d_evals, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, true);
-
-        reverse_order_points_batch(&mut d_evals, batch_size);
-        let mut d_coeffs = interpolate_points_batch(&mut d_evals, &mut d_domain, batch_size);
-        iecntt_batch(&mut evals_mut[..], test_size, 0);
-        let mut h_coeffs: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_coeffs.copy_to(&mut h_coeffs[..]).unwrap();
-        
-        assert_eq!(h_coeffs, *evals_mut);
-        for h in h_coeffs.iter() {
-            assert_ne!(*h, Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_scalar_evaluation() {
-        let log_test_domain_size = 8;
-        let coeff_size = 1 << 6;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut d_coeffs_domain = interpolate_scalars(&mut d_evals, &mut d_domain_inv);
-        let mut h_coeffs_domain: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        assert_eq!(h_coeffs, h_coeffs_domain[..coeff_size]);
-        for i in coeff_size.. (1 << log_test_domain_size) {
-            assert_eq!(Scalar::zero(), h_coeffs_domain[i]);
-        }
-    }
-
-    #[test]
-    fn test_scalar_batch_evaluation() {
-        let batch_size = 6;
-        let log_test_domain_size = 8;
-        let domain_size = 1 << log_test_domain_size;
-        let coeff_size = 1 << 6;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size * batch_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut d_coeffs_domain = interpolate_scalars_batch(&mut d_evals, &mut d_domain_inv, batch_size);
-        let mut h_coeffs_domain: Vec<Scalar> = (0..domain_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        for j in 0..batch_size {
-            assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..j * domain_size + coeff_size]);
-            for i in coeff_size..domain_size {
-                assert_eq!(Scalar::zero(), h_coeffs_domain[j * domain_size + i]);
-            }
-        }
-    }
-
-    #[test]
-    fn test_point_evaluation() {
-        let log_test_domain_size = 7;
-        let coeff_size = 1 << 7;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
-        let mut d_coeffs_domain = interpolate_points(&mut d_evals, &mut d_domain_inv);
-        let mut h_coeffs_domain: Vec<Point> = (0..1 << log_test_domain_size).map(|_| Point::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        assert_eq!(h_coeffs[..], h_coeffs_domain[..coeff_size]);
-        for i in coeff_size..(1 << log_test_domain_size) {
-            assert_eq!(Point::zero(), h_coeffs_domain[i]);
-        }
-        for i in 0..coeff_size {
-            assert_ne!(h_coeffs_domain[i], Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_evaluation() {
-        let batch_size = 4;
-        let log_test_domain_size = 6;
-        let domain_size = 1 << log_test_domain_size;
-        let coeff_size = 1 << 5;
-        let (h_coeffs, mut d_coeffs, mut d_domain) = set_up_points(coeff_size * batch_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_points(0, log_test_domain_size, true);
-
-        let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut d_coeffs_domain = interpolate_points_batch(&mut d_evals, &mut d_domain_inv, batch_size);
-        let mut h_coeffs_domain: Vec<Point> = (0..domain_size * batch_size).map(|_| Point::zero()).collect();
-        d_coeffs_domain.copy_to(&mut h_coeffs_domain[..]).unwrap();
-
-        for j in 0..batch_size {
-            assert_eq!(h_coeffs[j * coeff_size..(j + 1) * coeff_size], h_coeffs_domain[j * domain_size..(j * domain_size + coeff_size)]);
-            for i in coeff_size..domain_size {
-                assert_eq!(Point::zero(), h_coeffs_domain[j * domain_size + i]);
-            }
-            for i in j * domain_size..(j * domain_size + coeff_size) {
-                assert_ne!(h_coeffs_domain[i], Point::zero());
-            }
-        }
-    }
-
-    #[test]
-    fn test_scalar_evaluation_on_trivial_coset() {
-        // checks that the evaluations on the subgroup is the same as on the coset generated by 1
-        let log_test_domain_size = 8;
-        let coeff_size = 1 << 6;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(coeff_size, log_test_domain_size, false);
-        let (_, _, mut d_domain_inv) = set_up_scalars(coeff_size, log_test_domain_size, true);
-        let mut d_trivial_coset_powers = build_domain(1 << log_test_domain_size, 0, false);
-
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut h_coeffs: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_coeffs[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_trivial_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..1 << log_test_domain_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_coeffs, h_evals_coset);
-    }
-
-    #[test]
-    fn test_scalar_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let log_test_size = 8;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_scalars(&mut d_coeffs, &mut d_large_domain);
-        let mut h_evals_large: Vec<Scalar> = (0..2 * test_size).map(|_| Scalar::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_scalars(&mut d_coeffs, &mut d_domain);
-        let mut h_evals: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..test_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_evals[..], h_evals_large[..test_size]);
-        assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
-    }
-
-    #[test]
-    fn test_scalar_batch_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let batch_size = 4;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_scalars(test_size * batch_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_scalars(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_scalars_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
-        let mut h_evals_large: Vec<Scalar> = (0..2 * test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_scalars_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut h_evals: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_scalars_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Scalar> = (0..test_size * batch_size).map(|_| Scalar::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        for i in 0..batch_size {
-            assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
-            assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
-        }
-    }
-
-    #[test]
-    fn test_point_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let log_test_size = 8;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_points(&mut d_coeffs, &mut d_large_domain);
-        let mut h_evals_large: Vec<Point> = (0..2 * test_size).map(|_| Point::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_points(&mut d_coeffs, &mut d_domain);
-        let mut h_evals: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_points_on_coset(&mut d_coeffs, &mut d_domain, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Point> = (0..test_size).map(|_| Point::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        assert_eq!(h_evals[..], h_evals_large[..test_size]);
-        assert_eq!(h_evals_coset[..], h_evals_large[test_size..2 * test_size]);
-        for i in 0..test_size {
-            assert_ne!(h_evals[i], Point::zero());
-            assert_ne!(h_evals_coset[i], Point::zero());
-            assert_ne!(h_evals_large[2 * i], Point::zero());
-            assert_ne!(h_evals_large[2 * i + 1], Point::zero());
-        }
-    }
-
-    #[test]
-    fn test_point_batch_evaluation_on_coset() {
-        // checks that evaluating a polynomial on a subgroup and its coset is the same as evaluating on a 2x larger subgroup 
-        let batch_size = 2;
-        let log_test_size = 6;
-        let test_size = 1 << log_test_size;
-        let (_, mut d_coeffs, mut d_domain) = set_up_points(test_size * batch_size, log_test_size, false);
-        let (_, _, mut d_large_domain) = set_up_points(0, log_test_size + 1, false);
-        let mut d_coset_powers = build_domain(test_size, log_test_size + 1, false);
-
-        let mut d_evals_large = evaluate_points_batch(&mut d_coeffs, &mut d_large_domain, batch_size);
-        let mut h_evals_large: Vec<Point> = (0..2 * test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals_large.copy_to(&mut h_evals_large[..]).unwrap();
-        let mut d_evals = evaluate_points_batch(&mut d_coeffs, &mut d_domain, batch_size);
-        let mut h_evals: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals.copy_to(&mut h_evals[..]).unwrap();
-        let mut d_evals_coset = evaluate_points_on_coset_batch(&mut d_coeffs, &mut d_domain, batch_size, &mut d_coset_powers);
-        let mut h_evals_coset: Vec<Point> = (0..test_size * batch_size).map(|_| Point::zero()).collect();
-        d_evals_coset.copy_to(&mut h_evals_coset[..]).unwrap();
-
-        for i in 0..batch_size {
-            assert_eq!(h_evals_large[2 * i * test_size..(2 * i + 1) * test_size], h_evals[i * test_size..(i + 1) * test_size]);
-            assert_eq!(h_evals_large[(2 * i + 1) * test_size..(2 * i + 2) * test_size], h_evals_coset[i * test_size..(i + 1) * test_size]);
-        }
-        for i in 0..test_size * batch_size {
-            assert_ne!(h_evals[i], Point::zero());
-            assert_ne!(h_evals_coset[i], Point::zero());
-            assert_ne!(h_evals_large[2 * i], Point::zero());
-            assert_ne!(h_evals_large[2 * i + 1], Point::zero());
-        }
-    }
-
-    // testing matrix multiplication by comparing the result of FFT with the naive multiplication by the DFT matrix
-    #[test]
-    fn test_matrix_multiplication() {
-        let seed = None; // some value to fix the rng
-        let test_size = 1 << 5;
-        let rou = Fr::get_root_of_unity(test_size).unwrap();
-        let matrix_flattened: Vec<Scalar> = (0..test_size).map(
-            |row_num| { (0..test_size).map( 
-                |col_num| {
-                    let pow: [u64; 1] = [(row_num * col_num).try_into().unwrap()];
-                    Scalar::from_ark(Fr::pow(&rou, &pow).into_repr())
-                }).collect::<Vec<Scalar>>()
-            }).flatten().collect::<Vec<_>>();
-        let vector: Vec<Scalar> = generate_random_scalars(test_size, get_rng(seed));
-
-        let result = mult_matrix_by_vec(&matrix_flattened, &vector, 0);
-        let mut ntt_result = vector.clone();
-        ntt(&mut ntt_result, 0);
-        
-        // we don't use the same roots of unity as arkworks, so the results are permutations
-        // of one another and the only guaranteed fixed scalars are the following ones:
-        assert_eq!(result[0], ntt_result[0]);
-        assert_eq!(result[test_size >> 1], ntt_result[test_size >> 1]);
-    }
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_vec_scalar_mul() {
-        let mut intoo = [Scalar::one(), Scalar::one(), Scalar::zero()];
-        let expected = [Scalar::one(), Scalar::zero(), Scalar::zero()];
-        mult_sc_vec(&mut intoo, &expected, 0);
-        assert_eq!(intoo, expected);
-    }
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_vec_point_mul() {
-        let dummy_one = Point {
-            x: Base::one(),
-            y: Base::one(),
-            z: Base::one(),
-        };
-
-        let mut inout = [dummy_one, dummy_one, Point::zero()];
-        let scalars = [Scalar::one(), Scalar::zero(), Scalar::zero()];
-        let expected = [dummy_one, Point::zero(), Point::zero()];
-        multp_vec(&mut inout, &scalars, 0);
-        assert_eq!(inout, expected);
-    }
-}
--- a/bls12-377/build.rs
+++ b/bls12-377/build.rs
@@ -8,12 +8,9 @@ fn main() {
    println!("cargo:rerun-if-changed=./icicle");

    let arch_type = env::var("ARCH_TYPE").unwrap_or(String::from("native"));
-    let stream_type = env::var("DEFAULT_STREAM").unwrap_or(String::from("legacy"));

    let mut arch = String::from("-arch=");
    arch.push_str(&arch_type);
-    let mut stream = String::from("-default-stream=");
-    stream.push_str(&stream_type);

    let mut nvcc = cc::Build::new();

@@ -23,12 +20,10 @@ fn main() {
        nvcc.define("G2_DEFINED", None);
    }
    nvcc.cuda(true);
-    nvcc.define("FEATURE_BLS12_377", None);
    nvcc.debug(false);
    nvcc.flag(&arch);
-    nvcc.flag(&stream);
    nvcc.files([
-        "../icicle-cuda/curves/index.cu",
+        "./icicle/curves/index.cu",
    ]);
    nvcc.compile("ingo_icicle"); //TODO: extension??
 }
--- a/curve_parameters/new_curve_script.py
+++ b/curve_parameters/new_curve_script.py
@@ -90,36 +90,36 @@ def get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q,

 # Create Cuda interface

-newpath = "./icicle-cuda/curves/"+curve_name 
+newpath = "./icicle/curves/"+curve_name 
 if not os.path.exists(newpath):
    os.makedirs(newpath)

 fc = get_config_file_content(modolus_p, bit_count_p, limb_p, ntt_size, modolus_q, bit_count_q, limb_q, weierstrass_b)
-text_file = open("./icicle-cuda/curves/"+curve_name+"/params.cuh", "w")
+text_file = open("./icicle/curves/"+curve_name+"/params.cuh", "w")
 n = text_file.write(fc)
 text_file.close()

-with open("./icicle-cuda/curves/curve_template/lde.cu", "r") as lde_file:
+with open("./icicle/curves/curve_template/lde.cu", "r") as lde_file:
    content = lde_file.read()
    content = content.replace("CURVE_NAME_U",curve_name.upper())
    content = content.replace("CURVE_NAME_L",curve_name.lower())
-    text_file = open("./icicle-cuda/curves/"+curve_name+"/lde.cu", "w")
+    text_file = open("./icicle/curves/"+curve_name+"/lde.cu", "w")
    n = text_file.write(content)
    text_file.close()
    
-with open("./icicle-cuda/curves/curve_template/msm.cu", "r") as msm_file:
+with open("./icicle/curves/curve_template/msm.cu", "r") as msm_file:
    content = msm_file.read()
    content = content.replace("CURVE_NAME_U",curve_name.upper())
    content = content.replace("CURVE_NAME_L",curve_name.lower())
-    text_file = open("./icicle-cuda/curves/"+curve_name+"/msm.cu", "w")
+    text_file = open("./icicle/curves/"+curve_name+"/msm.cu", "w")
    n = text_file.write(content)
    text_file.close()

-with open("./icicle-cuda/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
+with open("./icicle/curves/curve_template/ve_mod_mult.cu", "r") as ve_mod_mult_file:
    content = ve_mod_mult_file.read()
    content = content.replace("CURVE_NAME_U",curve_name.upper())
    content = content.replace("CURVE_NAME_L",curve_name.lower())
-    text_file = open("./icicle-cuda/curves/"+curve_name+"/ve_mod_mult.cu", "w")
+    text_file = open("./icicle/curves/"+curve_name+"/ve_mod_mult.cu", "w")
    n = text_file.write(content)
    text_file.close()
    
@@ -132,7 +132,7 @@ namespace = '#include "params.cuh"\n'+'''namespace CURVE_NAME_U {
    typedef Affine<point_field_t> affine_t;
 }'''

-with open('./icicle-cuda/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
+with open('./icicle/curves/'+curve_name+'/curve_config.cuh', 'w') as f:
    f.write(namespace.replace("CURVE_NAME_U",curve_name.upper()))
    
    
@@ -145,7 +145,7 @@ extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U
    return (*point1 == *point2);
 }'''

-with open('./icicle-cuda/curves/'+curve_name+'/projective.cu', 'w') as f:
+with open('./icicle/curves/'+curve_name+'/projective.cu', 'w') as f:
    f.write(eq.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))

 supported_operations = '''
@@ -155,10 +155,10 @@ supported_operations = '''
 #include "ve_mod_mult.cu"
 '''

-with open('./icicle-cuda/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
+with open('./icicle/curves/'+curve_name+'/supported_operations.cu', 'w') as f:
    f.write(supported_operations.replace("CURVE_NAME_U",curve_name.upper()).replace("CURVE_NAME_L",curve_name.lower()))
    
-with open('./icicle-cuda/curves/index.cu', 'a') as f:
+with open('./icicle/curves/index.cu', 'a') as f:
    f.write('\n#include "'+curve_name.lower()+'/supported_operations.cu"')
    

--- a/icicle-core/Cargo.toml
+++ b/icicle-core/Cargo.toml
@@ -1,49 +0,0 @@
-[package]
-name = "icicle-core"
-version = "0.1.0"
-edition = "2021"
-authors = [ "Ingonyama" ]
-description = "An implementation of the Ingonyama CUDA Library"
-homepage = "https://www.ingonyama.com"
-repository = "https://github.com/ingonyama-zk/icicle"
-
-[[bench]]
-name = "ntt"
-path = "benches/ntt.rs"
-harness = false
-
-[[bench]]
-name = "msm"
-path = "benches/msm.rs"
-harness = false
-
-[dependencies] 
-hex = "*"
-ark-std = "0.3.0"
-ark-ff = "0.3.0"
-ark-poly = "0.3.0"
-ark-ec = { version = "0.3.0", features = [ "parallel" ] }
-ark-bls12-381 = "0.3.0"
-ark-bls12-377 = "0.3.0"
-ark-bn254 = "0.3.0"
-
-serde = { version = "1.0", features = ["derive"] }
-serde_derive = "1.0"
-serde_cbor = "0.11.2"
-
-rustacuda = "0.1"
-rustacuda_core = "0.1"
-rustacuda_derive = "0.1"
-
-rand = "*" #TODO: move rand and ark dependencies to dev once random scalar/point generation is done "natively"
-
-[build-dependencies]
-cc = { version = "1.0", features = ["parallel"] }
-
-[dev-dependencies]
-"criterion" = "0.4.0"
-
-[features]
-default = ["bls12-381"]
-bls12-381 = ["ark-bls12-381/curve"]
-g2 = []
--- a/icicle-core/src/basic_structs/field.rs
+++ b/icicle-core/src/basic_structs/field.rs
@@ -1,4 +0,0 @@
-pub trait Field<const NUM_LIMBS: usize> {
-    const MODOLUS: [u32;NUM_LIMBS];
-    const LIMBS: usize = NUM_LIMBS;
-}
--- a/icicle-core/src/basic_structs/mod.rs
+++ b/icicle-core/src/basic_structs/mod.rs
@@ -1,3 +0,0 @@
-pub mod field; 
-pub mod scalar; 
-pub mod point; 
--- a/icicle-core/src/basic_structs/point.rs
+++ b/icicle-core/src/basic_structs/point.rs
@@ -1,108 +0,0 @@
-use std::ffi::c_uint;
-
-use ark_bn254::{Fq as Fq_BN254, Fr as Fr_BN254, G1Affine as G1Affine_BN254, G1Projective as G1Projective_BN254};
-
-use ark_ec::AffineCurve;
-use ark_ff::{BigInteger256, PrimeField};
-use std::mem::transmute;
-use ark_ff::Field;
-use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-
-use super::scalar::{get_fixed_limbs, self};
-
-
-#[derive(Debug, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointT<BF: scalar::ScalarTrait> {
-    pub x: BF,
-    pub y: BF,
-    pub z: BF,
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> Default for PointT<BF> {
-    fn default() -> Self {
-        PointT::zero()
-    }
-}
-
-impl<BF: DeviceCopy + scalar::ScalarTrait> PointT<BF> {
-    pub fn zero() -> Self {
-        PointT {
-            x: BF::zero(),
-            y: BF::one(),
-            z: BF::zero(),
-        }
-    }
-
-    pub fn infinity() -> Self {
-        Self::zero()
-    }
-}
-
-#[derive(Debug, PartialEq, Clone, Copy, DeviceCopy)]
-#[repr(C)]
-pub struct PointAffineNoInfinityT<BF> {
-    pub x: BF,
-    pub y: BF,
-}
-
-impl<BF: scalar::ScalarTrait> Default for PointAffineNoInfinityT<BF> {
-    fn default() -> Self {
-        PointAffineNoInfinityT {
-            x: BF::zero(),
-            y: BF::zero(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointAffineNoInfinityT<BF> {
-    ///From u32 limbs x,y
-    pub fn from_limbs(x: &[u32], y: &[u32]) -> Self {
-        PointAffineNoInfinityT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y)
-        }
-    }
-
-    pub fn limbs(&self) -> Vec<u32> {
-        [self.x.limbs(), self.y.limbs()].concat()
-    }
-
-    pub fn to_projective(&self) -> PointT<BF> {
-        PointT {
-            x: self.x,
-            y: self.y,
-            z: BF::one(),
-        }
-    }
-}
-
-impl<BF: Copy + scalar::ScalarTrait> PointT<BF>  {
-    pub fn from_limbs(x: &[u32], y: &[u32], z: &[u32]) -> Self {
-        PointT {
-            x: BF::from_limbs(x),
-            y: BF::from_limbs(y),
-            z: BF::from_limbs(z)
-        }
-    }
-
-    pub fn from_xy_limbs(value: &[u32]) -> PointT<BF> {
-        let l = value.len();
-        assert_eq!(l, 3 * BF::base_limbs(), "length must be 3 * {}", BF::base_limbs());
-        PointT {
-            x: BF::from_limbs(value[..BF::base_limbs()].try_into().unwrap()),
-            y: BF::from_limbs(value[BF::base_limbs()..BF::base_limbs() * 2].try_into().unwrap()),
-            z: BF::from_limbs(value[BF::base_limbs() * 2..].try_into().unwrap())
-        }
-    }
-
-    pub fn to_xy_strip_z(&self) -> PointAffineNoInfinityT<BF> {
-        PointAffineNoInfinityT {
-            x: self.x,
-            y: self.y,
-        }
-    }
-}
--- a/icicle-core/src/basic_structs/scalar.rs
+++ b/icicle-core/src/basic_structs/scalar.rs
@@ -1,102 +0,0 @@
-use std::ffi::{c_int, c_uint};
-use rand::{rngs::StdRng, RngCore, SeedableRng};
-use rustacuda_core::DeviceCopy;
-use rustacuda_derive::DeviceCopy;
-use std::mem::transmute;
-use rustacuda::prelude::*;
-use rustacuda_core::DevicePointer;
-use rustacuda::memory::{DeviceBox, CopyDestination};
-
-use crate::utils::{u32_vec_to_u64_vec, u64_vec_to_u32_vec};
-
-use std::marker::PhantomData;
-use std::convert::TryInto;
-
-use super::field::{Field, self};
-
-pub fn get_fixed_limbs<const NUM_LIMBS: usize>(val: &[u32]) -> [u32; NUM_LIMBS] {
-    match val.len() {
-        n if n < NUM_LIMBS => {
-            let mut padded: [u32; NUM_LIMBS] = [0; NUM_LIMBS];
-            padded[..val.len()].copy_from_slice(&val);
-            padded
-        }
-        n if n == NUM_LIMBS => val.try_into().unwrap(),
-        _ => panic!("slice has too many elements"),
-    }
-}
-
-pub trait ScalarTrait{
-    fn base_limbs() -> usize;
-    fn zero() -> Self;
-    fn from_limbs(value: &[u32]) -> Self;
-    fn one() -> Self;
-    fn to_bytes_le(&self) -> Vec<u8>;
-    fn limbs(&self) -> &[u32];
-}
-
-#[derive(Debug, PartialEq, Clone, Copy)]
-#[repr(C)]
-pub struct ScalarT<M, const NUM_LIMBS: usize> {
-    pub(crate) phantom: PhantomData<M>,
-    pub(crate) value : [u32; NUM_LIMBS]
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarTrait for ScalarT<M, NUM_LIMBS>
-where
-    M: Field<NUM_LIMBS>,
-{
-
-    fn base_limbs() -> usize {
-        return NUM_LIMBS; 
-    }
-
-    fn zero() -> Self {
-        ScalarT {
-            value: [0u32; NUM_LIMBS],
-            phantom: PhantomData,
-        }
-    }
-
-    fn from_limbs(value: &[u32]) -> Self {
-        Self {
-            value: get_fixed_limbs(value),
-            phantom: PhantomData,
-        }
-    }
-
-    fn one() -> Self {
-        let mut s = [0u32; NUM_LIMBS];
-        s[0] = 1;
-        ScalarT { value: s, phantom: PhantomData }
-    }
-
-    fn to_bytes_le(&self) -> Vec<u8> {
-        self.value
-            .iter()
-            .map(|s| s.to_le_bytes().to_vec())
-            .flatten()
-            .collect::<Vec<_>>()
-    }
-
-    fn limbs(&self) -> &[u32] {
-        &self.value
-    }
-}
-
-impl<M, const NUM_LIMBS: usize> ScalarT<M, NUM_LIMBS> where M: field::Field<NUM_LIMBS>{
-    pub fn from_limbs_le(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-        Self::from_limbs(value)
-     }
- 
-    pub fn from_limbs_be(value: &[u32]) -> ScalarT<M,NUM_LIMBS> {
-         let mut value = value.to_vec();
-         value.reverse();
-         Self::from_limbs_le(&value)
-     }
- 
-     // Additional Functions
-     pub fn add(&self, other:ScalarT<M, NUM_LIMBS>) -> ScalarT<M,NUM_LIMBS>{  // overload + 
-         return ScalarT{value: [self.value[0] + other.value[0];NUM_LIMBS], phantom: PhantomData }; 
-     }
-}
--- a/icicle-core/src/lib.rs
+++ b/icicle-core/src/lib.rs
@@ -1,2 +0,0 @@
-pub mod utils;
-pub mod basic_structs;
--- a/icicle-core/src/utils.rs
+++ b/icicle-core/src/utils.rs
@@ -1,42 +0,0 @@
-use rand::RngCore;
-use rand::rngs::StdRng;
-use rand::SeedableRng;
-
-pub fn from_limbs<T>(limbs: Vec<u32>, chunk_size: usize, f: fn(&[u32]) -> T) -> Vec<T> {
-    let points = limbs
-        .chunks(chunk_size)
-        .map(|lmbs| f(lmbs))
-        .collect::<Vec<T>>();
-    points
-}
-
-pub fn u32_vec_to_u64_vec(arr_u32: &[u32]) -> Vec<u64> {
-    let len = (arr_u32.len() / 2) as usize;
-    let mut arr_u64 = vec![0u64; len];
-
-    for i in 0..len {
-        arr_u64[i] = u64::from(arr_u32[i * 2]) | (u64::from(arr_u32[i * 2 + 1]) << 32);
-    }
-
-    arr_u64
-}
-
-pub fn u64_vec_to_u32_vec(arr_u64: &[u64]) -> Vec<u32> {
-    let len = arr_u64.len() * 2;
-    let mut arr_u32 = vec![0u32; len];
-
-    for i in 0..arr_u64.len() {
-        arr_u32[i * 2] = arr_u64[i] as u32;
-        arr_u32[i * 2 + 1] = (arr_u64[i] >> 32) as u32;
-    }
-
-    arr_u32
-}
-
-pub fn get_rng(seed: Option<u64>) -> Box<dyn RngCore> { //TOOD: this func is universal
-    let rng: Box<dyn RngCore> = match seed {
-        Some(seed) => Box::new(StdRng::seed_from_u64(seed)),
-        None => Box::new(rand::thread_rng()),
-    };
-    rng
-}
--- a/icicle-cuda/appUtils/poseidon/constants.cuh
+++ b/icicle-cuda/appUtils/poseidon/constants.cuh
@@ -1,51 +0,0 @@
-#pragma once
-
-#include <map>
-#include <stdexcept>
-#include <cassert>
-
-#include "constants/constants_2.h"
-#include "constants/constants_4.h"
-#include "constants/constants_8.h"
-#include "constants/constants_11.h"
-
-uint32_t partial_rounds_number_from_arity(const uint32_t arity) {
-    switch (arity) {
-        case 2:
-            return 55;
-        case 4:
-            return 56;
-        case 8:
-            return 57;
-        case 11:
-            return 57;
-        default:
-            throw std::invalid_argument( "unsupported arity" );
-    }
-};
-
-// TO-DO: change to mapping
-const uint32_t FULL_ROUNDS_DEFAULT = 4;
-
-// TO-DO: for now, the constants are only generated in bls12_381
-template <typename S>
-S * load_constants(const uint32_t arity) {
-    unsigned char * constants;
-    switch (arity) {
-        case 2:
-            constants = constants_2;
-            break;
-        case 4:
-            constants = constants_4;
-            break;
-        case 8:
-            constants = constants_8;
-            break;
-        case 11:
-            constants = constants_11;
-            break;
-        default:
-            throw std::invalid_argument( "unsupported arity" );
-    }
-    return reinterpret_cast< S * >(constants);
-}
--- a/icicle-cuda/appUtils/poseidon/constants/constants_11.h
+++ b/icicle-cuda/appUtils/poseidon/constants/constants_11.h
--- a/icicle-cuda/appUtils/poseidon/constants/constants_2.h
+++ b/icicle-cuda/appUtils/poseidon/constants/constants_2.h
@@ -1,995 +0,0 @@
-unsigned char constants_2[] = {
-  0xd8, 0xd3, 0x6e, 0x9d, 0x00, 0x0a, 0x32, 0xa7, 0x36, 0x8b, 0x75, 0xa2,
-  0x92, 0xac, 0x1e, 0x50, 0x24, 0x4a, 0xbb, 0x1d, 0x86, 0x51, 0xbd, 0x23,
-  0x7a, 0xe1, 0x3a, 0xfa, 0x4b, 0x06, 0x9f, 0x66, 0x15, 0x3f, 0x9d, 0x2b,
-  0x84, 0xab, 0x72, 0x6e, 0x34, 0x27, 0xac, 0x45, 0x96, 0x7c, 0xe6, 0xee,
-  0x6c, 0xa6, 0x4f, 0xc8, 0xf2, 0x7f, 0x53, 0xe4, 0x36, 0xca, 0xac, 0xfb,
-  0xde, 0xa8, 0x61, 0x0a, 0xd5, 0x65, 0x81, 0x12, 0x71, 0x47, 0x23, 0x1e,
-  0x30, 0x49, 0xaa, 0x1a, 0x4e, 0x2b, 0x29, 0x17, 0x5b, 0x27, 0xdf, 0x45,
-  0x8a, 0x1e, 0x1b, 0xf9, 0x09, 0x9d, 0xb8, 0x24, 0xfa, 0xce, 0xe9, 0x21,
-  0xd1, 0xa0, 0x12, 0x2f, 0xca, 0x56, 0x5f, 0x2f, 0x1b, 0x40, 0x6c, 0x31,
-  0x90, 0x55, 0x2f, 0x1f, 0x2b, 0xd0, 0xd2, 0xd2, 0xc9, 0x24, 0x26, 0x38,
-  0x05, 0x18, 0x53, 0x38, 0x1d, 0x42, 0xfc, 0x0b, 0xc8, 0xc5, 0x8b, 0x5a,
-  0xf3, 0x19, 0xca, 0xff, 0xf5, 0x3b, 0xef, 0x15, 0x4e, 0xf4, 0xcc, 0xbe,
-  0xe8, 0x42, 0x69, 0x68, 0xf4, 0xfc, 0xd3, 0xc3, 0xf0, 0x5d, 0x03, 0x89,
-  0x4a, 0xae, 0xb0, 0x13, 0x43, 0x39, 0xaa, 0x45, 0xb2, 0x41, 0x38, 0xf8,
-  0x20, 0x2d, 0xd1, 0x1f, 0x3c, 0xc4, 0xaa, 0xf1, 0x40, 0xd0, 0x26, 0xe4,
-  0x81, 0x74, 0x41, 0xc1, 0xb4, 0xd0, 0x64, 0x8d, 0xf9, 0xdd, 0x9a, 0x4b,
-  0x38, 0x45, 0x02, 0xcc, 0x01, 0x65, 0x25, 0x72, 0x24, 0x2b, 0xba, 0x3f,
-  0x2c, 0x1a, 0xbf, 0xc6, 0x3c, 0xcf, 0xa1, 0xef, 0x9c, 0xda, 0x2e, 0x9b,
-  0x14, 0xc7, 0x81, 0x65, 0x85, 0xc3, 0x24, 0x2c, 0x65, 0xc6, 0x51, 0x3a,
-  0xd3, 0xc1, 0xd1, 0xd5, 0x42, 0x8f, 0x2f, 0x3c, 0x0e, 0x61, 0xaf, 0xb8,
-  0xf6, 0x3c, 0x32, 0x1a, 0x9f, 0x28, 0x91, 0x9d, 0x02, 0x18, 0x97, 0x47,
-  0x79, 0x21, 0xf9, 0x61, 0x40, 0x5c, 0x16, 0xa9, 0xc5, 0x6e, 0xca, 0x9f,
-  0x37, 0xf1, 0x2a, 0x13, 0xf1, 0xf0, 0xf0, 0xef, 0xb4, 0x56, 0xf4, 0x08,
-  0x6a, 0x47, 0x52, 0x4a, 0x32, 0xbf, 0xb3, 0xab, 0xa5, 0xdf, 0x36, 0x12,
-  0x63, 0x5f, 0x2e, 0xc2, 0xf4, 0x17, 0xa4, 0x0c, 0xfd, 0xeb, 0x3d, 0xe9,
-  0xc7, 0x1d, 0x97, 0x5e, 0x52, 0x61, 0x75, 0x96, 0xfb, 0x11, 0x60, 0xcd,
-  0xf8, 0xca, 0xa8, 0x11, 0xdc, 0x6e, 0xcd, 0x59, 0xf3, 0x37, 0x41, 0xd6,
-  0x61, 0xb3, 0x74, 0xe5, 0xa8, 0xc1, 0x51, 0xf5, 0xa2, 0x57, 0x2e, 0x32,
-  0xe4, 0x0e, 0xd2, 0xed, 0x73, 0xca, 0x58, 0x7a, 0x81, 0x16, 0x9c, 0xa0,
-  0xa0, 0xc0, 0xaa, 0x65, 0xe0, 0x3f, 0x43, 0xb7, 0x03, 0xb0, 0x35, 0x84,
-  0x61, 0xf6, 0x60, 0x0e, 0x18, 0xb3, 0x0a, 0xc0, 0x59, 0x98, 0x57, 0x80,
-  0x7e, 0x26, 0x8b, 0x26, 0x0f, 0x94, 0x44, 0xbc, 0xc9, 0x71, 0xf8, 0x19,
-  0x9a, 0x3b, 0x0a, 0xea, 0x9a, 0xc0, 0x41, 0x26, 0x9b, 0x50, 0xe7, 0x5d,
-  0x1b, 0x59, 0x22, 0x26, 0x79, 0x3a, 0xae, 0x39, 0x61, 0x13, 0x9c, 0x8f,
-  0x8e, 0xd0, 0xbf, 0x84, 0xb8, 0xca, 0x3f, 0x71, 0x41, 0x70, 0x35, 0x88,
-  0x03, 0x63, 0x0d, 0xc5, 0x1a, 0xcb, 0x63, 0x11, 0x32, 0x90, 0xb6, 0xaa,
-  0xfb, 0xdc, 0xd9, 0xc3, 0xa1, 0x93, 0x41, 0xe8, 0xa1, 0xfb, 0x2d, 0x88,
-  0x9e, 0xe6, 0x37, 0x21, 0xb2, 0xbe, 0xfc, 0x64, 0x18, 0x37, 0x87, 0xbc,
-  0x36, 0xf2, 0xe4, 0x08, 0x5e, 0x87, 0x5f, 0x78, 0xbc, 0xbd, 0x4c, 0x91,
-  0x53, 0xb5, 0xf3, 0x3c, 0xe9, 0x8c, 0x1d, 0xa7, 0x0a, 0x95, 0x90, 0x55,
-  0xfd, 0xfd, 0x61, 0xd1, 0x38, 0x21, 0xca, 0x5c, 0x8f, 0xc0, 0xc9, 0x39,
-  0x81, 0x8e, 0x2d, 0x7c, 0xa7, 0xab, 0x84, 0xef, 0x09, 0xd3, 0x1f, 0xb2,
-  0xc8, 0xe7, 0x6a, 0x9c, 0xe5, 0x0d, 0xea, 0x15, 0x0f, 0xdf, 0x55, 0x7e,
-  0x25, 0x01, 0xad, 0x36, 0xdc, 0xfe, 0x2c, 0xc2, 0xf5, 0xd1, 0x57, 0xef,
-  0xf2, 0x1d, 0xdd, 0x82, 0xb0, 0x20, 0xbf, 0xfe, 0x8a, 0xa8, 0x4d, 0xb5,
-  0xd2, 0x03, 0x0d, 0x49, 0x43, 0xaf, 0x4a, 0xac, 0x95, 0x64, 0x6b, 0x62,
-  0x6e, 0x75, 0x84, 0x85, 0x56, 0x8f, 0x99, 0x6d, 0xfa, 0xb4, 0x37, 0x30,
-  0xc4, 0x06, 0x82, 0x32, 0xf0, 0x86, 0x6e, 0x5f, 0xde, 0x62, 0xa3, 0x61,
-  0xdc, 0x17, 0x37, 0x5c, 0xc8, 0x9b, 0x78, 0x6a, 0xf1, 0xa2, 0x77, 0x76,
-  0x44, 0x93, 0xbe, 0x6b, 0x71, 0x39, 0x0a, 0x35, 0x86, 0xa3, 0x4c, 0x84,
-  0x0f, 0xb1, 0xbf, 0x51, 0x88, 0x18, 0x88, 0x57, 0x09, 0x97, 0x55, 0xdf,
-  0x29, 0xe6, 0xff, 0xaa, 0xaf, 0x7b, 0x27, 0x29, 0xca, 0xf5, 0x11, 0x64,
-  0xa2, 0x2e, 0xb9, 0x99, 0xc5, 0xc4, 0x56, 0x1b, 0x03, 0x0c, 0xf1, 0x7e,
-  0x9b, 0xf1, 0x8b, 0x57, 0xc7, 0x4c, 0x4a, 0x05, 0x84, 0x78, 0x67, 0x3c,
-  0x82, 0xee, 0xe4, 0x55, 0xfb, 0xf2, 0x2e, 0xcb, 0x3a, 0x64, 0x88, 0x44,
-  0x15, 0x1b, 0x23, 0xaa, 0xe9, 0x9a, 0x04, 0xbd, 0xb4, 0xbd, 0x34, 0x28,
-  0x84, 0x34, 0x55, 0x13, 0x2c, 0xbd, 0x43, 0x1c, 0x3b, 0xaf, 0x1d, 0x95,
-  0x28, 0x18, 0x5f, 0xe7, 0x33, 0xa2, 0x4c, 0x58, 0xca, 0x42, 0xbe, 0x9e,
-  0x8e, 0x72, 0xae, 0xf1, 0x08, 0x40, 0x8f, 0x55, 0x61, 0x68, 0xa3, 0x2e,
-  0xff, 0x75, 0xe7, 0x38, 0x44, 0x68, 0x4a, 0x40, 0x05, 0x3f, 0x64, 0xf2,
-  0xf3, 0xd7, 0x8d, 0xd4, 0x3d, 0x69, 0x2e, 0xc9, 0x94, 0x3f, 0xc8, 0x75,
-  0xa1, 0xa1, 0xe5, 0x0b, 0x26, 0xec, 0x36, 0xe9, 0x29, 0x67, 0x4b, 0xc9,
-  0x2b, 0x0f, 0x4b, 0xa0, 0x56, 0xaf, 0x8b, 0x81, 0xea, 0x11, 0xf5, 0x42,
-  0xd3, 0xf2, 0x6e, 0x91, 0xf3, 0x35, 0x60, 0xe6, 0xa0, 0x80, 0x09, 0x45,
-  0xfe, 0x29, 0x4c, 0xde, 0x96, 0x76, 0x6e, 0x27, 0xfc, 0x64, 0xd3, 0xf7,
-  0xb4, 0xbf, 0xfa, 0x8c, 0x13, 0x68, 0x52, 0xf7, 0x9c, 0x86, 0x74, 0xe1,
-  0x8a, 0x01, 0x97, 0x73, 0x69, 0x29, 0x21, 0x0d, 0xae, 0xcf, 0xa7, 0x83,
-  0xf2, 0x8b, 0x93, 0x8d, 0xef, 0xf2, 0x7c, 0xc1, 0xfd, 0x50, 0xca, 0x95,
-  0x53, 0x77, 0x46, 0xa8, 0xe0, 0xb9, 0x6f, 0x4e, 0xe5, 0x55, 0x35, 0x2b,
-  0x6b, 0x67, 0x5e, 0x4e, 0x24, 0x51, 0x85, 0xf3, 0x19, 0x3d, 0x4c, 0x5c,
-  0x94, 0x7c, 0xb4, 0xe5, 0x49, 0xe8, 0xdf, 0xdd, 0x34, 0x4c, 0x64, 0x13,
-  0x6e, 0x67, 0x6a, 0xc6, 0x5e, 0x82, 0x1f, 0xdc, 0x0e, 0xf6, 0x15, 0x2a,
-  0x6f, 0xdd, 0x3a, 0x5c, 0x7d, 0x20, 0xbf, 0xd5, 0x89, 0xa1, 0x25, 0x2f,
-  0x59, 0xe7, 0xca, 0xa2, 0xb4, 0xde, 0x72, 0x2c, 0xe8, 0xe6, 0xc5, 0x3d,
-  0x93, 0xa5, 0xe0, 0x47, 0x7d, 0xe5, 0x65, 0x58, 0x59, 0xec, 0x62, 0x79,
-  0xc5, 0x69, 0x21, 0xfb, 0x12, 0x45, 0xe7, 0xb3, 0xa0, 0x5c, 0xba, 0xfb,
-  0x70, 0x38, 0x8b, 0x80, 0x95, 0x90, 0x72, 0x85, 0xf8, 0x61, 0xb3, 0x6f,
-  0x5f, 0x9d, 0x2d, 0x36, 0x9f, 0xe0, 0xeb, 0xc2, 0xd2, 0xcd, 0x33, 0x5a,
-  0x26, 0x78, 0xa7, 0x7f, 0x24, 0x52, 0x52, 0x3a, 0xe6, 0xf6, 0xf4, 0xa0,
-  0x9c, 0x52, 0x1d, 0xd5, 0x26, 0x5d, 0x9a, 0x7b, 0x9f, 0xba, 0x63, 0x6a,
-  0xda, 0xb9, 0xed, 0xec, 0x37, 0x8b, 0x24, 0x76, 0xcf, 0x1d, 0xa0, 0x3e,
-  0x1e, 0xc7, 0x60, 0x73, 0xc5, 0x5b, 0x7f, 0x93, 0x84, 0x62, 0x9b, 0xe8,
-  0x28, 0x07, 0xac, 0x77, 0xe7, 0xb3, 0x7d, 0x6f, 0x51, 0x91, 0xc7, 0xf3,
-  0x4d, 0x17, 0xeb, 0xe7, 0xc5, 0x31, 0x1e, 0x2d, 0x75, 0x2e, 0x30, 0xd8,
-  0xe8, 0x75, 0x4c, 0x37, 0x7a, 0xd6, 0x5c, 0x75, 0x1d, 0xc0, 0xb4, 0x99,
-  0xa2, 0x49, 0xe0, 0x72, 0xe2, 0xb3, 0x30, 0xed, 0x8b, 0xa7, 0x7e, 0x07,
-  0x79, 0x36, 0x77, 0xee, 0x15, 0x71, 0x1f, 0xe0, 0x0a, 0x98, 0x0a, 0xee,
-  0xcf, 0x0c, 0x59, 0xcc, 0xc7, 0x48, 0x50, 0xd3, 0xea, 0x41, 0xe1, 0x66,
-  0xd4, 0x3b, 0x24, 0xe9, 0x63, 0x4c, 0x16, 0xec, 0x51, 0x8e, 0x06, 0xc2,
-  0x11, 0x53, 0x58, 0x35, 0xd0, 0xd1, 0x77, 0x43, 0x59, 0x7f, 0xdb, 0x35,
-  0xe6, 0xea, 0x04, 0x1b, 0x69, 0x2e, 0x03, 0x2e, 0x5e, 0xa9, 0x67, 0xc7,
-  0x24, 0x52, 0xef, 0x5e, 0x1d, 0x8c, 0xe8, 0xa3, 0xa4, 0x8e, 0xc4, 0xcb,
-  0x5d, 0x8a, 0x57, 0x31, 0xdf, 0x3c, 0x38, 0xdf, 0xe6, 0xaf, 0x21, 0x77,
-  0x49, 0x02, 0xbc, 0x32, 0xde, 0x1e, 0x9f, 0x6a, 0x95, 0x9f, 0x94, 0x3b,
-  0x84, 0xdc, 0xea, 0x0b, 0x09, 0x76, 0x2f, 0x93, 0x70, 0x12, 0x8c, 0xb6,
-  0xd0, 0x20, 0xc3, 0xe2, 0x94, 0x8a, 0xb6, 0x2f, 0x9a, 0x03, 0xef, 0x5b,
-  0xc0, 0x47, 0xbf, 0xd0, 0xa7, 0x90, 0xe6, 0x13, 0xac, 0xc9, 0x2e, 0x10,
-  0xef, 0x10, 0xd1, 0x81, 0x65, 0x5d, 0xfa, 0x50, 0x65, 0xc0, 0xd6, 0x59,
-  0x3a, 0xe0, 0x5c, 0x94, 0xbd, 0xf8, 0xc6, 0x25, 0x85, 0x61, 0x2f, 0xa5,
-  0x5c, 0x0d, 0x7e, 0xe1, 0xa8, 0x04, 0x3b, 0x1f, 0x61, 0x34, 0x4b, 0x30,
-  0xf3, 0x84, 0x8e, 0x89, 0xb1, 0x58, 0xe2, 0x48, 0xf4, 0x79, 0x7f, 0x5f,
-  0x95, 0x1d, 0xe7, 0x71, 0x47, 0x5d, 0x43, 0x69, 0xd4, 0x7b, 0xe6, 0x87,
-  0x9e, 0x11, 0x12, 0x2a, 0x4f, 0xf7, 0x0c, 0xfb, 0x3c, 0x0b, 0x1d, 0xe7,
-  0xa3, 0x0b, 0xdf, 0xc7, 0xd1, 0x35, 0xdb, 0x7d, 0x58, 0x7b, 0x46, 0x40,
-  0x3e, 0xf6, 0xc1, 0xb6, 0x22, 0x99, 0x13, 0xd0, 0xd9, 0x3f, 0x28, 0xc5,
-  0xef, 0xeb, 0x6a, 0xda, 0xf5, 0xfb, 0x2d, 0x9d, 0x3c, 0x23, 0x23, 0x7d,
-  0x1f, 0x81, 0x55, 0xaf, 0xd4, 0xec, 0x7b, 0x09, 0x79, 0xe1, 0x90, 0xde,
-  0xe3, 0xff, 0x9a, 0x13, 0x2b, 0x4e, 0x70, 0x5c, 0x63, 0x72, 0x88, 0xfa,
-  0x74, 0x4f, 0xb7, 0xd1, 0x33, 0x3b, 0x8a, 0xec, 0x2e, 0x9b, 0x77, 0x0b,
-  0x8c, 0x3a, 0x91, 0x2c, 0x63, 0x3c, 0x03, 0x40, 0x1e, 0x78, 0x83, 0x4c,
-  0xcc, 0x0a, 0x3b, 0x99, 0x8d, 0x10, 0x54, 0x79, 0x3e, 0x85, 0x9d, 0xab,
-  0x2f, 0xd6, 0x9b, 0xab, 0x63, 0x85, 0x7a, 0x80, 0xe2, 0x43, 0xc0, 0x31,
-  0xa9, 0x77, 0x9a, 0x12, 0xf6, 0xcb, 0x8d, 0xfb, 0x65, 0xed, 0xb7, 0x11,
-  0xff, 0x5c, 0xe0, 0x8f, 0x16, 0xc6, 0x9b, 0x36, 0x56, 0x2b, 0x8a, 0xe1,
-  0x9b, 0xe1, 0xfc, 0x01, 0x3f, 0xa4, 0x49, 0x5d, 0x59, 0x19, 0xbd, 0xbe,
-  0x17, 0x49, 0xe5, 0xa1, 0xa7, 0xf7, 0x26, 0x19, 0xa4, 0x0f, 0xd3, 0x5b,
-  0x74, 0xa9, 0xfe, 0x53, 0x88, 0x51, 0xa8, 0x9c, 0x3f, 0xde, 0xbd, 0x19,
-  0xa0, 0x40, 0x31, 0x50, 0x1f, 0x8b, 0x92, 0x97, 0xb2, 0x1c, 0xc7, 0xb0,
-  0xdd, 0xd5, 0xae, 0x88, 0x92, 0x00, 0x4a, 0xd7, 0xb7, 0xf8, 0x02, 0xaa,
-  0x25, 0xbb, 0x05, 0x89, 0x78, 0xda, 0x9c, 0x00, 0xb5, 0x48, 0x2c, 0x0d,
-  0xf3, 0xfa, 0xfc, 0x4e, 0x6f, 0x3d, 0x96, 0x74, 0x92, 0xb5, 0x16, 0x01,
-  0x88, 0xb2, 0x4a, 0x9c, 0x43, 0x35, 0x75, 0xef, 0x3d, 0x6e, 0xd0, 0x92,
-  0xc0, 0x24, 0xf6, 0xd6, 0xc0, 0x01, 0xef, 0x23, 0xb0, 0x6e, 0x27, 0x21,
-  0x5e, 0xa1, 0x8c, 0x0f, 0x69, 0xbc, 0x09, 0x47, 0x2c, 0x13, 0x5d, 0xba,
-  0x32, 0x3c, 0x37, 0x62, 0x3a, 0xdf, 0x38, 0x5a, 0x17, 0xe2, 0xfc, 0xe3,
-  0x8e, 0xe2, 0xd6, 0x6d, 0x50, 0x1b, 0xd1, 0xcc, 0x4b, 0x9d, 0x66, 0x0a,
-  0x90, 0x85, 0x01, 0x3b, 0xa2, 0x77, 0xd4, 0x95, 0x90, 0x63, 0x49, 0x5e,
-  0x27, 0xe7, 0xab, 0xc5, 0xf1, 0xf9, 0xa8, 0xf2, 0x40, 0xb1, 0x14, 0x35,
-  0x4d, 0x69, 0x4c, 0x51, 0x3b, 0x9b, 0x10, 0x50, 0x70, 0x34, 0xf4, 0xbe,
-  0x14, 0x88, 0xb5, 0x40, 0x1a, 0x68, 0x74, 0x40, 0x4c, 0xa3, 0xa7, 0x0d,
-  0x32, 0x64, 0xaa, 0xef, 0xf5, 0x7b, 0x1a, 0x60, 0x1d, 0xfc, 0x33, 0xf2,
-  0x50, 0xc6, 0x39, 0x28, 0x53, 0xe7, 0x98, 0xbf, 0xbd, 0x1e, 0xac, 0x80,
-  0x35, 0x5d, 0x7a, 0x18, 0x96, 0x8f, 0xb1, 0x41, 0xc2, 0xcb, 0x7d, 0xd0,
-  0x75, 0xd4, 0xc2, 0x11, 0x78, 0xd8, 0xa1, 0x98, 0x53, 0x1c, 0x59, 0x72,
-  0xac, 0xc1, 0x37, 0x0f, 0x42, 0x13, 0x0b, 0x98, 0xf9, 0x6e, 0x6f, 0x36,
-  0x53, 0x8d, 0x66, 0x46, 0x65, 0xf0, 0x27, 0xd3, 0xe3, 0xf0, 0x10, 0x5d,
-  0x1b, 0xae, 0x8d, 0x49, 0xec, 0xe6, 0x40, 0xfc, 0xfa, 0xbe, 0x55, 0x60,
-  0x4b, 0xfe, 0xd0, 0xca, 0x6a, 0x45, 0xd0, 0xd5, 0xe1, 0x5f, 0x20, 0x67,
-  0x09, 0x4e, 0x6d, 0x59, 0xef, 0xba, 0xec, 0x57, 0x41, 0xfa, 0x62, 0x1c,
-  0x54, 0xa4, 0x74, 0x46, 0xd1, 0x91, 0x48, 0xc9, 0xa6, 0x07, 0x01, 0xd1,
-  0x43, 0xa0, 0xe7, 0x7f, 0x35, 0xa0, 0x6f, 0xe4, 0x57, 0xb0, 0xb8, 0x99,
-  0x7c, 0x93, 0x4a, 0x0d, 0x4b, 0x0a, 0xd6, 0x24, 0xb2, 0x27, 0xd1, 0xa8,
-  0x2e, 0x5b, 0x3c, 0xcc, 0x17, 0xb2, 0x8a, 0x70, 0x93, 0x2b, 0x00, 0x96,
-  0x2d, 0x90, 0x4d, 0x67, 0x62, 0xb8, 0xc6, 0xd1, 0x46, 0xda, 0x3b, 0x6d,
-  0xdf, 0xd6, 0x03, 0xf2, 0x01, 0xa2, 0x89, 0x6c, 0x50, 0xd5, 0xf0, 0xb1,
-  0xd2, 0x24, 0xdd, 0x02, 0x42, 0xde, 0x1d, 0x5b, 0x00, 0xe0, 0x5f, 0x5f,
-  0x31, 0xf8, 0x59, 0x9d, 0xc8, 0xa4, 0x70, 0x4d, 0x49, 0x54, 0xc3, 0x94,
-  0xbc, 0x58, 0x2e, 0x03, 0x02, 0xba, 0x43, 0x2b, 0xfd, 0x0f, 0x9c, 0x0f,
-  0x91, 0x28, 0xf4, 0x3b, 0xe7, 0xb1, 0x3b, 0x69, 0xbd, 0x6a, 0x8f, 0x20,
-  0xab, 0x8f, 0xd2, 0x5a, 0xf4, 0x00, 0x92, 0xcd, 0x45, 0xd5, 0x96, 0x37,
-  0x31, 0x0e, 0xfd, 0x75, 0xda, 0xa4, 0x0c, 0x57, 0xcf, 0x7b, 0x1b, 0xf5,
-  0xa9, 0xcd, 0xff, 0xaf, 0xe8, 0x54, 0x52, 0x8a, 0x9e, 0x03, 0x97, 0x5e,
-  0x62, 0x3f, 0x09, 0x6d, 0x54, 0x61, 0x7d, 0xfc, 0x7a, 0x33, 0x85, 0x38,
-  0x9a, 0x67, 0x4d, 0xb2, 0x24, 0xa7, 0x7d, 0x33, 0xff, 0x3d, 0xe5, 0x7f,
-  0x7d, 0x09, 0x60, 0x87, 0xa6, 0xe4, 0x96, 0x2d, 0x3d, 0x1a, 0xa4, 0x3d,
-  0x2e, 0x49, 0xcd, 0xb3, 0x62, 0x45, 0xa9, 0x84, 0xb3, 0xd8, 0xa5, 0x94,
-  0x07, 0xf0, 0x67, 0x39, 0xbc, 0x85, 0x9d, 0x3f, 0x14, 0xd2, 0x53, 0x83,
-  0x2e, 0x85, 0x89, 0x69, 0xc7, 0xe7, 0x88, 0xbf, 0x3e, 0x1d, 0x40, 0x53,
-  0x95, 0xc8, 0x78, 0x03, 0x87, 0x80, 0x93, 0x9c, 0x88, 0x32, 0x70, 0x2e,
-  0x91, 0x7b, 0x8f, 0x2b, 0x83, 0xd7, 0x32, 0x88, 0x5c, 0x94, 0x65, 0x4b,
-  0x1a, 0x31, 0xe1, 0x16, 0x25, 0x03, 0x6e, 0xfd, 0x91, 0x7c, 0x33, 0x81,
-  0xcd, 0x36, 0xbb, 0xf5, 0xd2, 0x7a, 0x65, 0x29, 0x34, 0xd7, 0x0e, 0x58,
-  0x75, 0xef, 0xda, 0x5e, 0xc0, 0x38, 0x16, 0x02, 0xff, 0x42, 0x1a, 0xad,
-  0xc5, 0x17, 0x61, 0x89, 0x83, 0xe1, 0xc0, 0x2c, 0x5f, 0xae, 0xaa, 0x4e,
-  0x0f, 0x4c, 0xd6, 0xe6, 0x14, 0xf2, 0xe9, 0x06, 0xc0, 0x16, 0x05, 0x9d,
-  0xd4, 0xa3, 0x32, 0x69, 0xa8, 0x8f, 0x51, 0x8c, 0x23, 0xfe, 0x66, 0x8b,
-  0x79, 0x79, 0xc2, 0x6c, 0xe8, 0xff, 0x1f, 0x24, 0xf9, 0x7e, 0xe1, 0x17,
-  0x23, 0x65, 0xf1, 0x53, 0x8e, 0x74, 0xd5, 0xb8, 0xc8, 0x95, 0x65, 0x00,
-  0xf3, 0x5f, 0x88, 0x99, 0x77, 0x8f, 0x71, 0xe5, 0xac, 0xee, 0x85, 0x4a,
-  0x22, 0x8b, 0x3b, 0xb9, 0xa6, 0x71, 0x54, 0x0a, 0x03, 0x60, 0x21, 0x82,
-  0x2f, 0xd6, 0x20, 0x91, 0x5d, 0xd9, 0x33, 0x5e, 0x54, 0x48, 0xf7, 0xfb,
-  0x6b, 0xd1, 0xef, 0x89, 0x0e, 0xd6, 0x4a, 0x18, 0x7d, 0x89, 0x19, 0x45,
-  0xae, 0x60, 0x2c, 0x91, 0x0a, 0x2e, 0x9c, 0xae, 0x8b, 0xd4, 0xd8, 0x03,
-  0x3d, 0x33, 0xc1, 0x31, 0x68, 0x7e, 0xed, 0xa8, 0xe3, 0xa8, 0x13, 0x65,
-  0x64, 0xc6, 0x5c, 0x5d, 0x60, 0xef, 0xfa, 0xf1, 0x2d, 0x33, 0x32, 0xcb,
-  0xc5, 0x4d, 0x0b, 0x48, 0xec, 0x84, 0x39, 0x92, 0x9a, 0xdc, 0x60, 0x0b,
-  0x0a, 0x66, 0xd9, 0xaa, 0x04, 0x53, 0x84, 0xe4, 0xeb, 0x71, 0x40, 0x82,
-  0xc6, 0x6c, 0xb8, 0xc0, 0xd1, 0x44, 0x9b, 0xb2, 0xb8, 0xa1, 0x5b, 0x14,
-  0x3d, 0xc7, 0x13, 0x1b, 0xee, 0x19, 0xb0, 0x60, 0xc5, 0x0a, 0xc6, 0x40,
-  0x7f, 0x0c, 0x2e, 0x75, 0x3c, 0x6f, 0x49, 0xba, 0x38, 0x0d, 0x03, 0x54,
-  0x04, 0xa3, 0x51, 0x0e, 0xaf, 0x3e, 0x34, 0xcb, 0x13, 0x22, 0x15, 0xfa,
-  0xc1, 0x16, 0x9e, 0x58, 0x78, 0x47, 0x9c, 0xad, 0x06, 0xcd, 0xf4, 0x10,
-  0xee, 0x0d, 0x78, 0x64, 0x8d, 0x70, 0x45, 0x55, 0xc2, 0x76, 0x45, 0x40,
-  0xf0, 0xe2, 0xfc, 0x0e, 0x8a, 0xc4, 0x78, 0x56, 0x7f, 0x8f, 0x05, 0x16,
-  0x31, 0xd3, 0x42, 0xd7, 0xfe, 0xbb, 0xaa, 0x89, 0x3c, 0x53, 0xd8, 0xa6,
-  0x95, 0x43, 0x72, 0xa2, 0x00, 0x15, 0xe7, 0x83, 0xea, 0x9e, 0xad, 0x69,
-  0x93, 0xc5, 0x9d, 0xfc, 0xc2, 0x8d, 0x3c, 0x58, 0xd8, 0x70, 0xd7, 0xcb,
-  0x65, 0x53, 0xc0, 0x9e, 0x83, 0x7e, 0x02, 0xbf, 0x6a, 0x90, 0x2f, 0xa2,
-  0xbe, 0x5d, 0x68, 0xeb, 0xbd, 0xad, 0xaf, 0xee, 0x91, 0x8a, 0xeb, 0x4e,
-  0x98, 0xc9, 0xed, 0x24, 0x0d, 0x0d, 0x8d, 0xdf, 0x16, 0xf6, 0x8e, 0x10,
-  0x59, 0x38, 0xa4, 0xd1, 0x0c, 0x09, 0x61, 0x06, 0x63, 0xcc, 0x5a, 0x84,
-  0x63, 0x1d, 0x90, 0xc8, 0xfb, 0x7b, 0x49, 0xcb, 0xcf, 0x5b, 0x34, 0x47,
-  0x96, 0x4e, 0xb1, 0x13, 0x66, 0xd5, 0x1f, 0x18, 0x30, 0xdd, 0xd7, 0x9d,
-  0xb8, 0x98, 0x41, 0xf2, 0x13, 0x1d, 0xc0, 0xbc, 0xf9, 0x03, 0x37, 0x48,
-  0x06, 0x89, 0xcf, 0xa8, 0x2e, 0x68, 0x3f, 0x16, 0xf3, 0x41, 0x22, 0xfc,
-  0x11, 0x2d, 0xb2, 0x29, 0x5c, 0x56, 0x0f, 0xf9, 0x84, 0x22, 0xee, 0x59,
-  0xb3, 0x95, 0x0b, 0xc7, 0xf0, 0x3a, 0xe4, 0x08, 0xce, 0x91, 0xdc, 0xa4,
-  0x1e, 0xad, 0x5e, 0x30, 0x9a, 0x0e, 0xe3, 0xb2, 0x81, 0x13, 0x97, 0x57,
-  0x31, 0x8f, 0x9c, 0xe1, 0x92, 0xa3, 0xab, 0x69, 0x94, 0x02, 0x5c, 0x68,
-  0xe9, 0x41, 0x1a, 0xc7, 0x17, 0xde, 0xaf, 0x77, 0xc5, 0x44, 0x2d, 0x6a,
-  0x2f, 0xd0, 0x16, 0x41, 0x40, 0x74, 0x57, 0x01, 0xd0, 0xaa, 0xe6, 0x70,
-  0x13, 0xf5, 0x3f, 0x0a, 0x39, 0x4d, 0xb1, 0x82, 0x87, 0x9b, 0xd6, 0x2b,
-  0xbd, 0xec, 0xa7, 0xcb, 0xdb, 0x57, 0x4e, 0x49, 0xa5, 0x8a, 0xa4, 0xbe,
-  0x83, 0xe6, 0x7c, 0x8b, 0x79, 0x4a, 0x7f, 0x39, 0x23, 0x07, 0xa8, 0xa9,
-  0x4e, 0xbd, 0xc3, 0xbb, 0xcf, 0xbc, 0xe2, 0x48, 0xa3, 0x60, 0xc0, 0x2c,
-  0x8d, 0x60, 0x26, 0x49, 0x07, 0x92, 0x78, 0xdb, 0x99, 0x94, 0x0e, 0x02,
-  0x27, 0x08, 0x9c, 0xc8, 0x23, 0x03, 0xfb, 0x6d, 0x18, 0x89, 0xe7, 0x3c,
-  0x08, 0xe5, 0x93, 0x31, 0xaa, 0x06, 0xa9, 0x86, 0x70, 0x40, 0x9e, 0x08,
-  0x5d, 0x7d, 0x8d, 0xa8, 0xee, 0xa0, 0x31, 0x49, 0x35, 0x99, 0x78, 0xf3,
-  0x97, 0x77, 0x05, 0x5f, 0xb2, 0xc8, 0xdc, 0xd8, 0xec, 0x9d, 0x48, 0xb5,
-  0xa4, 0x13, 0x01, 0x45, 0xe9, 0xe3, 0x84, 0x1c, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x01, 0x00, 0x00, 0x40, 0xff, 0xff, 0xff, 0x3f,
-  0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe, 0x03, 0x62, 0x39, 0x07,
-  0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f, 0x7e, 0x3d, 0xf2, 0x56,
-  0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66,
-  0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
-  0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0x01, 0x00, 0x00, 0x40,
-  0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
-  0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
-  0x7e, 0x3d, 0xf2, 0x56, 0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc,
-  0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
-  0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
-  0x01, 0x00, 0x00, 0x80, 0x54, 0x55, 0x55, 0xd5, 0xa9, 0x4c, 0xa9, 0x2a,
-  0xad, 0x08, 0x1e, 0x1b, 0xaf, 0xde, 0x06, 0x08, 0x5c, 0x89, 0x05, 0x80,
-  0x11, 0x93, 0x58, 0x4d, 0xc5, 0x60, 0x9b, 0x60, 0x34, 0x33, 0x33, 0x33,
-  0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
-  0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
-  0x98, 0x97, 0x8e, 0x45, 0x01, 0x00, 0x00, 0x80, 0x54, 0x55, 0x55, 0xd5,
-  0xa9, 0x4c, 0xa9, 0x2a, 0xad, 0x08, 0x1e, 0x1b, 0xaf, 0xde, 0x06, 0x08,
-  0x5c, 0x89, 0x05, 0x80, 0x11, 0x93, 0x58, 0x4d, 0xc5, 0x60, 0x9b, 0x60,
-  0x25, 0x49, 0x92, 0x24, 0xdb, 0xb6, 0x6d, 0xdb, 0x48, 0x1a, 0x24, 0x49,
-  0xdc, 0x2e, 0x36, 0xaa, 0x4a, 0x62, 0x77, 0x70, 0x4b, 0x62, 0xc7, 0x57,
-  0x82, 0x91, 0x51, 0xe7, 0x60, 0x54, 0x1f, 0x21, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x3e, 0x2c, 0xe4, 0xf5, 0x63, 0xa7, 0xf2, 0x0e,
-  0x51, 0x85, 0x27, 0x20, 0x86, 0xec, 0x5b, 0x36, 0xe4, 0xea, 0xf3, 0x0c,
-  0x61, 0xaa, 0xc6, 0x06, 0x74, 0xa3, 0xcc, 0xc1, 0x24, 0x1c, 0x32, 0x69,
-  0x95, 0x82, 0xd1, 0xb0, 0xf9, 0x5d, 0x48, 0x7e, 0x90, 0xf2, 0x18, 0x4e,
-  0xad, 0x05, 0x11, 0x3b, 0xc9, 0xfa, 0xfe, 0xf4, 0xe4, 0x3a, 0xfc, 0xd1,
-  0xb8, 0x4b, 0xee, 0xab, 0x1c, 0x80, 0x90, 0x0d, 0x01, 0x00, 0x00, 0x40,
-  0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
-  0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
-  0x7e, 0x3d, 0xf2, 0x56, 0xa5, 0xd2, 0xd6, 0x97, 0x8d, 0x8a, 0xf9, 0x2a,
-  0x81, 0x39, 0xfd, 0xef, 0xe5, 0xd1, 0x41, 0x7d, 0xb2, 0xa6, 0x20, 0x46,
-  0x9a, 0xc4, 0x02, 0xcc, 0x8e, 0x38, 0xd8, 0xa2, 0x2d, 0x8f, 0xa7, 0x5e,
-  0xc2, 0x91, 0x1b, 0xf6, 0x2c, 0xb3, 0x21, 0xed, 0x91, 0x8c, 0x3e, 0xb5,
-  0x8e, 0x10, 0x26, 0x1f, 0x86, 0x4d, 0xae, 0x08, 0x65, 0x72, 0x2b, 0x5d,
-  0x7c, 0x64, 0x10, 0xfa, 0xc1, 0xa6, 0x20, 0x55, 0x34, 0x33, 0x33, 0x33,
-  0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
-  0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
-  0x98, 0x97, 0x8e, 0x45, 0xc2, 0x91, 0x1b, 0xf6, 0x2c, 0xb3, 0x21, 0xed,
-  0x91, 0x8c, 0x3e, 0xb5, 0x8e, 0x10, 0x26, 0x1f, 0x86, 0x4d, 0xae, 0x08,
-  0x65, 0x72, 0x2b, 0x5d, 0x7c, 0x64, 0x10, 0xfa, 0xc1, 0xa6, 0x20, 0x55,
-  0x54, 0x24, 0x25, 0x3c, 0xb1, 0x71, 0x3e, 0xfe, 0x4e, 0xcd, 0x04, 0x70,
-  0xb5, 0xcd, 0x65, 0x35, 0x9b, 0xb9, 0x69, 0x70, 0xfc, 0x9e, 0xc1, 0xa4,
-  0x2c, 0xe6, 0xb5, 0xd6, 0x38, 0x75, 0x07, 0x52, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xa2, 0x92, 0xa6, 0xd6, 0xd4, 0x73, 0x2f, 0x8d,
-  0x27, 0x96, 0x8a, 0xc6, 0xc2, 0xbe, 0x74, 0x03, 0x5f, 0x90, 0x8e, 0xac,
-  0x46, 0x55, 0x68, 0x45, 0x37, 0x1a, 0xb9, 0xf9, 0x97, 0x93, 0xd3, 0x59,
-  0x64, 0xe7, 0x4f, 0xee, 0x6e, 0x02, 0x20, 0x8d, 0xbb, 0xe5, 0x84, 0x23,
-  0xf2, 0x41, 0x5f, 0x9f, 0xb2, 0xcf, 0xe4, 0x7d, 0xa9, 0x3a, 0xde, 0xdf,
-  0xd5, 0xb6, 0x90, 0xd5, 0x24, 0xa0, 0xe0, 0x47, 0xd4, 0xfc, 0x91, 0xd8,
-  0xff, 0x9b, 0x40, 0xeb, 0x00, 0x2b, 0x35, 0x8c, 0x86, 0x3c, 0x71, 0xa0,
-  0x84, 0xfc, 0x18, 0xf1, 0x16, 0x08, 0x99, 0xe5, 0x0c, 0x47, 0x83, 0xcb,
-  0x7e, 0x7f, 0x96, 0x58, 0x72, 0x40, 0x10, 0x59, 0x28, 0x31, 0x2f, 0x0c,
-  0x1c, 0xa1, 0x19, 0x7f, 0x78, 0x5a, 0x5b, 0x8b, 0x40, 0xe2, 0x52, 0xa3,
-  0xda, 0xea, 0xec, 0x8f, 0x71, 0xbc, 0x5d, 0x9a, 0xb2, 0xe8, 0x86, 0x6b,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xf6, 0xd4, 0x61, 0x0e,
-  0xe7, 0x6c, 0x28, 0x1a, 0x40, 0x1b, 0x06, 0x94, 0x15, 0xb8, 0x36, 0x75,
-  0xd6, 0x24, 0xc8, 0x9b, 0x0d, 0xde, 0x46, 0xc4, 0x4d, 0x88, 0x8c, 0xb5,
-  0x17, 0xfd, 0x3a, 0x6b, 0x8b, 0xfc, 0x1a, 0xd8, 0x20, 0x32, 0xfa, 0x3d,
-  0xe4, 0xac, 0xa3, 0xc4, 0x8c, 0xce, 0xac, 0xbf, 0x42, 0x77, 0x63, 0x24,
-  0x8d, 0x33, 0x7f, 0xc1, 0x04, 0x61, 0x88, 0x6e, 0xae, 0x24, 0x52, 0x2f,
-  0xc6, 0xe7, 0x50, 0x37, 0x91, 0x5c, 0xb7, 0x6a, 0x69, 0xeb, 0xfc, 0x51,
-  0x70, 0x80, 0xa5, 0x77, 0x83, 0xe5, 0x3e, 0xba, 0x99, 0xfa, 0x5b, 0xfa,
-  0x44, 0xc2, 0x17, 0xe5, 0x7b, 0xab, 0xfe, 0x58, 0x07, 0x0a, 0x3e, 0x04,
-  0x21, 0x6c, 0x74, 0x03, 0x24, 0x15, 0x15, 0x1a, 0x63, 0xa3, 0xf5, 0xaf,
-  0x38, 0xf9, 0xf0, 0x3f, 0x1c, 0xf2, 0x2f, 0xba, 0x9e, 0x7d, 0xac, 0x24,
-  0x16, 0xc3, 0x93, 0x1e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x7c, 0x21, 0x29, 0xeb, 0xdd, 0xd9, 0xc2, 0xc2, 0x87, 0x30, 0x82, 0xe0,
-  0xe9, 0xa7, 0x35, 0x64, 0xc9, 0x67, 0xa9, 0xed, 0xd0, 0xc5, 0x91, 0x3b,
-  0xc9, 0xf2, 0xd4, 0xd9, 0xc5, 0x89, 0x91, 0x4f, 0xc3, 0xd3, 0xbe, 0x3f,
-  0xb1, 0x31, 0x98, 0x25, 0x25, 0x83, 0x24, 0xcd, 0x54, 0x99, 0xdb, 0x6f,
-  0xa7, 0x2d, 0x31, 0xc4, 0x53, 0xe1, 0x69, 0xa6, 0x35, 0xd5, 0x8d, 0x11,
-  0x70, 0xfa, 0x26, 0x1e, 0x28, 0xbd, 0xfe, 0x69, 0x57, 0x63, 0x6c, 0x33,
-  0xe6, 0xb6, 0x10, 0x41, 0xb8, 0xbe, 0x1f, 0xf6, 0x3a, 0xbe, 0xb5, 0x6a,
-  0x57, 0x66, 0xd0, 0xe4, 0x2a, 0x6b, 0xc3, 0xaa, 0x4f, 0xf2, 0xba, 0x5b,
-  0xd7, 0xbe, 0xb3, 0xcc, 0x01, 0x81, 0x30, 0xdc, 0x9a, 0xd6, 0xab, 0x6a,
-  0x87, 0x56, 0x69, 0x23, 0xef, 0x37, 0xac, 0xbc, 0xaf, 0xec, 0x35, 0xea,
-  0x74, 0xc9, 0xbf, 0x9d, 0x06, 0x76, 0xbc, 0x1d, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xcb, 0xf6, 0x96, 0x28, 0xa7, 0x00, 0x6b, 0x60,
-  0x4b, 0xd1, 0x5a, 0x13, 0x03, 0xa3, 0xda, 0x7d, 0xb6, 0x2d, 0xaa, 0x2b,
-  0x12, 0xf8, 0x5b, 0x0c, 0x81, 0xb4, 0x61, 0x51, 0x55, 0x40, 0x5c, 0x5e,
-  0xb1, 0x5a, 0x71, 0xf9, 0x06, 0x13, 0xed, 0x2b, 0x89, 0xc0, 0x01, 0x55,
-  0xa8, 0xaa, 0x45, 0x36, 0xc8, 0x84, 0xfc, 0x78, 0x69, 0x40, 0x55, 0x89,
-  0x88, 0xbf, 0x02, 0xd3, 0xa7, 0xd9, 0xc5, 0x48, 0x6a, 0x73, 0xaa, 0x9b,
-  0x54, 0x06, 0x6c, 0x88, 0x2b, 0xef, 0x5a, 0x3f, 0x70, 0xbd, 0xb8, 0x38,
-  0x75, 0x9d, 0xe1, 0xf2, 0x8a, 0x8a, 0x69, 0x26, 0x6f, 0x07, 0xf1, 0x5b,
-  0x00, 0x0a, 0x28, 0x41, 0x3a, 0xb1, 0xf8, 0x8d, 0xaa, 0x7f, 0xd5, 0x90,
-  0x34, 0xdf, 0xc9, 0xa0, 0x59, 0x13, 0x36, 0x53, 0x28, 0x15, 0xed, 0x06,
-  0x7c, 0x1c, 0x2d, 0x43, 0xab, 0x50, 0x3c, 0xc9, 0x4b, 0x14, 0xda, 0x36,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x7a, 0xc0, 0x5b, 0x5c,
-  0x19, 0x50, 0x23, 0xa3, 0x8b, 0x3d, 0x80, 0x09, 0x63, 0x38, 0x5e, 0x65,
-  0x16, 0xd0, 0xa3, 0xa3, 0xe7, 0x28, 0x5a, 0x5f, 0xa7, 0x0a, 0x4f, 0x0c,
-  0x34, 0x95, 0x26, 0x70, 0x85, 0x11, 0x97, 0x36, 0xa2, 0x76, 0x64, 0xd9,
-  0x7a, 0x8e, 0xa6, 0xa1, 0x51, 0x9a, 0x89, 0xda, 0x38, 0x46, 0xd0, 0x68,
-  0x30, 0xde, 0x70, 0xf8, 0x88, 0x7d, 0xa9, 0x19, 0x45, 0x62, 0xb0, 0x6f,
-  0xf3, 0xc4, 0xd7, 0xfd, 0x95, 0xb9, 0xd1, 0x1c, 0x7e, 0xb5, 0x58, 0xa6,
-  0x63, 0xaf, 0xcb, 0x4b, 0x52, 0x83, 0x85, 0x0e, 0xed, 0x33, 0xfa, 0xb3,
-  0x61, 0x90, 0x61, 0x68, 0xc2, 0xba, 0x54, 0x5d, 0x23, 0xc8, 0xfb, 0x0e,
-  0x7d, 0x7a, 0x8c, 0xa9, 0x09, 0x2a, 0x21, 0x4e, 0x23, 0x02, 0xf2, 0x0d,
-  0x84, 0xd1, 0xab, 0x8a, 0x13, 0x8c, 0x9a, 0x6a, 0x20, 0xd2, 0x09, 0x82,
-  0xfb, 0x1e, 0xe6, 0x17, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0xd1, 0x0f, 0xb4, 0xbd, 0x0e, 0x68, 0x31, 0x1a, 0x30, 0xc8, 0x92, 0xd8,
-  0xaa, 0xcd, 0x04, 0x05, 0x87, 0x5c, 0x49, 0x68, 0xd2, 0xba, 0x3e, 0xb2,
-  0x09, 0xa3, 0xb0, 0x8a, 0x59, 0xcf, 0xd2, 0x4b, 0xeb, 0xf7, 0xaa, 0x35,
-  0xf6, 0x48, 0x57, 0x40, 0x6b, 0x0e, 0x26, 0x37, 0x2a, 0x91, 0x0c, 0xea,
-  0x3f, 0x26, 0x97, 0xc8, 0xc4, 0x4a, 0x18, 0xd3, 0x06, 0x12, 0x93, 0xa5,
-  0xad, 0x9a, 0x69, 0x6a, 0xc1, 0x4d, 0x2c, 0x31, 0x45, 0x03, 0x3a, 0x2e,
-  0x24, 0xd5, 0xd0, 0xdf, 0xeb, 0xdb, 0xdf, 0xd0, 0x6f, 0x3d, 0x14, 0xa8,
-  0x7a, 0x5f, 0x53, 0xe3, 0x9e, 0xb8, 0x68, 0x4d, 0x91, 0xe2, 0x0a, 0x48,
-  0x94, 0x73, 0xd9, 0x84, 0x9a, 0x55, 0x3e, 0x42, 0x99, 0xe7, 0x1e, 0x73,
-  0x64, 0xd4, 0x8d, 0x26, 0xf0, 0x16, 0x36, 0x08, 0x7b, 0xde, 0x8c, 0xe9,
-  0x0a, 0x27, 0x3a, 0x90, 0x6f, 0x6c, 0x90, 0x66, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xd1, 0x81, 0xc0, 0x8c, 0xc5, 0x6d, 0x18, 0x32,
-  0xd7, 0x82, 0x81, 0xb4, 0x0f, 0x0b, 0x34, 0x91, 0xe1, 0xec, 0x57, 0xf2,
-  0x17, 0xca, 0x56, 0x15, 0x1f, 0x7d, 0xa7, 0x27, 0x3a, 0xaf, 0xc3, 0x24,
-  0x0e, 0x26, 0x24, 0x5e, 0x31, 0x77, 0x45, 0xfb, 0x9c, 0x71, 0xaf, 0x19,
-  0x73, 0xd0, 0x33, 0x0b, 0x22, 0xd8, 0xde, 0xd0, 0x42, 0x79, 0xc0, 0x40,
-  0x23, 0x44, 0x1e, 0xa3, 0xdf, 0x65, 0x48, 0x50, 0x96, 0xdc, 0xc5, 0x98,
-  0x9b, 0x13, 0xa8, 0x29, 0x6e, 0x79, 0x02, 0xef, 0x50, 0xd0, 0xdf, 0x71,
-  0x29, 0xd3, 0xa4, 0x3a, 0xa4, 0x13, 0xc9, 0x0f, 0xa3, 0xff, 0x73, 0x25,
-  0xb0, 0xb8, 0xe0, 0x07, 0x02, 0xfd, 0xb5, 0x6f, 0xb4, 0x95, 0xc5, 0x49,
-  0x8b, 0x13, 0xb4, 0xb9, 0x4f, 0xba, 0xd3, 0x83, 0xc2, 0x8c, 0x7a, 0xbe,
-  0x9a, 0x0b, 0x7d, 0x03, 0x75, 0xf6, 0xbd, 0x84, 0xbd, 0xc0, 0xd4, 0x33,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x60, 0x31, 0xd7, 0xab,
-  0x48, 0x21, 0x3b, 0x89, 0xc4, 0x3f, 0xe9, 0x30, 0xf6, 0xbc, 0x55, 0xf0,
-  0xcf, 0x49, 0x4d, 0xf3, 0x3f, 0x66, 0xbe, 0x39, 0x3c, 0xcd, 0x53, 0x36,
-  0xc6, 0xd6, 0x04, 0x62, 0x37, 0x64, 0x6e, 0x86, 0x83, 0x2f, 0x1a, 0xe3,
-  0xd8, 0xcb, 0x6b, 0xcc, 0x18, 0x8c, 0xbc, 0x89, 0x97, 0x69, 0xa7, 0xe9,
-  0x61, 0x1f, 0xe6, 0x92, 0x3e, 0x34, 0x7b, 0xfa, 0xee, 0x9d, 0xcb, 0x03,
-  0x26, 0x8e, 0xd5, 0xc7, 0x11, 0xfb, 0x18, 0xc6, 0xe0, 0xd0, 0x7e, 0xb7,
-  0x77, 0x2c, 0x6e, 0xc0, 0x48, 0x33, 0x34, 0x11, 0x1c, 0x7d, 0x55, 0xa5,
-  0xca, 0xb3, 0x2d, 0xc6, 0x06, 0x59, 0x9b, 0x27, 0x8a, 0x1a, 0xd6, 0xa4,
-  0x5c, 0x48, 0x9f, 0x72, 0x20, 0x69, 0xdc, 0xbd, 0xf0, 0xba, 0x39, 0x4c,
-  0x70, 0xa5, 0x78, 0xb5, 0x87, 0x9c, 0x00, 0xe0, 0xc8, 0xf1, 0x8c, 0x03,
-  0x3a, 0x2c, 0x1c, 0x2e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x83, 0x09, 0xd7, 0x49, 0xb5, 0x30, 0x90, 0x05, 0x98, 0x2a, 0x2f, 0x01,
-  0x25, 0x9f, 0x29, 0xf4, 0xa1, 0x30, 0x62, 0x62, 0x05, 0xbb, 0xa6, 0xda,
-  0x2f, 0x82, 0x41, 0xad, 0x2f, 0x4a, 0x49, 0x2f, 0x06, 0x35, 0xd8, 0x2f,
-  0x0c, 0xfa, 0xa5, 0x8c, 0x8e, 0xe7, 0x8a, 0x31, 0x83, 0x67, 0xf4, 0x34,
-  0xa2, 0xa2, 0x88, 0x6c, 0x71, 0xc7, 0xf1, 0x4c, 0xca, 0xba, 0x0d, 0x57,
-  0xc8, 0xef, 0x8f, 0x42, 0x9b, 0x2d, 0x86, 0x4a, 0x6a, 0x2c, 0xe7, 0x42,
-  0x56, 0xe5, 0x36, 0x46, 0xf6, 0xa6, 0x25, 0x4c, 0x83, 0xc1, 0x46, 0x19,
-  0x22, 0xf9, 0xcd, 0x19, 0x31, 0x55, 0xaa, 0x2b, 0xa5, 0x59, 0x78, 0x70,
-  0x90, 0x84, 0x93, 0x55, 0xb8, 0x46, 0x4d, 0x54, 0xaa, 0x13, 0x1e, 0x5f,
-  0x72, 0x9a, 0xb5, 0x05, 0x48, 0x28, 0x3d, 0x78, 0xaf, 0xd3, 0x25, 0x46,
-  0x9b, 0xd1, 0x06, 0x60, 0x74, 0x34, 0x4e, 0x38, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x79, 0xc6, 0x8a, 0xe3, 0xae, 0xf7, 0xaf, 0x3a,
-  0xb6, 0xe4, 0xd3, 0xdd, 0x69, 0x6a, 0x24, 0x45, 0x6e, 0x16, 0x97, 0x1c,
-  0x3e, 0xb8, 0x34, 0x46, 0xf3, 0xd2, 0x73, 0x3f, 0x83, 0x89, 0xd2, 0x0c,
-  0x5a, 0x95, 0x79, 0x68, 0x8d, 0x99, 0x66, 0xad, 0xfc, 0x2d, 0x15, 0xfb,
-  0x2b, 0xce, 0x6d, 0x15, 0x95, 0x82, 0x9d, 0xae, 0x31, 0x31, 0x3b, 0xd5,
-  0x7a, 0xe3, 0x66, 0x40, 0x34, 0x8b, 0xc0, 0x2f, 0x94, 0x52, 0x55, 0x33,
-  0xce, 0x37, 0x27, 0xe3, 0x35, 0x3f, 0x63, 0x58, 0x7f, 0x92, 0x2a, 0x4e,
-  0xbd, 0x43, 0x10, 0x6e, 0xc6, 0xc3, 0x86, 0x31, 0xd8, 0xb8, 0xe0, 0x39,
-  0x48, 0xf1, 0xa0, 0x49, 0xec, 0x14, 0x25, 0x1b, 0xf1, 0x2d, 0x6f, 0x1a,
-  0x96, 0xb2, 0x0c, 0x08, 0x86, 0x9b, 0x9f, 0xfa, 0xe5, 0x1a, 0x00, 0xb6,
-  0x54, 0x35, 0xcd, 0x4a, 0xb2, 0x93, 0x6e, 0x09, 0xb4, 0xb1, 0x61, 0x4c,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xdb, 0x04, 0x30, 0x03,
-  0xcb, 0xf5, 0x5e, 0xe4, 0xbc, 0x0e, 0xcf, 0x6f, 0x40, 0x4d, 0xfa, 0x18,
-  0x32, 0x71, 0x77, 0x86, 0x99, 0x1d, 0xb7, 0xb6, 0x34, 0x8a, 0x42, 0x0e,
-  0x08, 0x86, 0x14, 0x50, 0x2e, 0x67, 0x22, 0x47, 0x1e, 0x39, 0xfb, 0x7d,
-  0x35, 0xc4, 0x12, 0x18, 0x0e, 0x30, 0x86, 0x19, 0xfb, 0x76, 0x16, 0x19,
-  0x6d, 0x8c, 0x75, 0x95, 0x23, 0x69, 0x48, 0x28, 0xfd, 0x9b, 0x0b, 0x64,
-  0x91, 0xe6, 0x92, 0xd3, 0x1a, 0x8d, 0x5f, 0x08, 0xa9, 0xee, 0x34, 0x8b,
-  0xe2, 0x71, 0x86, 0x8c, 0xf8, 0xa8, 0x27, 0x08, 0xd1, 0x00, 0x12, 0x77,
-  0xce, 0x0b, 0xae, 0x05, 0x38, 0x38, 0x3b, 0x6f, 0xc8, 0xda, 0x82, 0x9c,
-  0x50, 0x72, 0x45, 0xaf, 0xad, 0x71, 0x4a, 0x6b, 0x19, 0x6b, 0x7c, 0x1e,
-  0x6e, 0xe8, 0x87, 0xaa, 0x9b, 0xe5, 0x38, 0x9a, 0x22, 0x19, 0xd2, 0x9a,
-  0x94, 0x15, 0x70, 0x4c, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0xd6, 0xbc, 0xf5, 0x83, 0xf1, 0xfe, 0xde, 0xf5, 0xd8, 0xae, 0xf8, 0xb3,
-  0x54, 0x9d, 0x69, 0x55, 0x8b, 0x2b, 0xa9, 0x5e, 0x78, 0xaf, 0x24, 0x05,
-  0x58, 0x70, 0x69, 0xcd, 0x88, 0xc4, 0x0f, 0x4f, 0x68, 0xc6, 0x43, 0x2f,
-  0xa6, 0x92, 0xea, 0x6e, 0xb9, 0x77, 0x74, 0xae, 0x8c, 0xfd, 0x9f, 0x79,
-  0xc4, 0xe1, 0x7a, 0x07, 0x6c, 0x38, 0x40, 0xdd, 0xf9, 0x1c, 0x6d, 0x19,
-  0xc8, 0xf1, 0xe0, 0x18, 0xc2, 0xa5, 0xf2, 0x5f, 0xde, 0x70, 0x37, 0x1c,
-  0x82, 0x56, 0x5e, 0xde, 0x09, 0x70, 0x48, 0xad, 0xb8, 0x73, 0xe7, 0x90,
-  0x36, 0x88, 0x4d, 0x68, 0x32, 0x0b, 0x1d, 0x77, 0x71, 0x9a, 0x21, 0x1c,
-  0x12, 0x3a, 0x4e, 0x82, 0x34, 0xc7, 0xfa, 0xa9, 0x2b, 0x10, 0xa1, 0x6b,
-  0x9b, 0x11, 0xdb, 0x82, 0x42, 0x91, 0x02, 0x88, 0xe4, 0xba, 0x5f, 0x57,
-  0xd9, 0xac, 0x30, 0x98, 0x05, 0xa8, 0x2c, 0x4d, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xc5, 0xb5, 0x00, 0xde, 0x7e, 0x5c, 0xfe, 0x18,
-  0xec, 0xaa, 0x55, 0x85, 0xcb, 0x66, 0x55, 0x52, 0xad, 0xcb, 0x74, 0x28,
-  0x93, 0x85, 0xd8, 0x94, 0x97, 0x4d, 0x64, 0x6e, 0xd8, 0xfe, 0x99, 0x3a,
-  0x57, 0x1d, 0x51, 0xc1, 0xea, 0xdd, 0xd2, 0x38, 0xdd, 0x3d, 0xba, 0x2a,
-  0x71, 0x96, 0xa2, 0x97, 0xa6, 0x00, 0xc9, 0xc6, 0x65, 0xbb, 0xc6, 0x27,
-  0xd6, 0x04, 0x49, 0x3a, 0x5c, 0xb3, 0xb0, 0x05, 0x05, 0xea, 0x70, 0x92,
-  0xe7, 0xf2, 0x43, 0x67, 0x49, 0x6b, 0x96, 0x0f, 0x95, 0x7a, 0x15, 0x3a,
-  0x4f, 0x0f, 0xf5, 0xf6, 0xf1, 0x51, 0xe7, 0x12, 0x83, 0x8d, 0x2a, 0xad,
-  0xb3, 0x29, 0x0b, 0x66, 0x20, 0x30, 0xf5, 0x8f, 0xb9, 0x81, 0x61, 0x2d,
-  0xb4, 0x44, 0xe7, 0x89, 0x89, 0x1b, 0x4b, 0xb8, 0x90, 0x2d, 0x54, 0xa7,
-  0x8e, 0x58, 0x6c, 0x3c, 0xe1, 0x31, 0xba, 0x3e, 0xbf, 0xe2, 0xb4, 0x5c,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x72, 0xdc, 0xc0, 0xee,
-  0x8c, 0x48, 0x9a, 0x30, 0x4b, 0x5f, 0xfc, 0x54, 0xe7, 0x0b, 0xc7, 0x1d,
-  0xfc, 0x66, 0x27, 0x13, 0x6d, 0x4d, 0x0c, 0x15, 0x76, 0x71, 0x51, 0xf3,
-  0x25, 0xfd, 0x2c, 0x37, 0xf7, 0xc0, 0xce, 0xcc, 0xa3, 0x90, 0x0b, 0xbd,
-  0x5e, 0x6e, 0x09, 0x8a, 0xde, 0x6f, 0x9c, 0x6d, 0xdc, 0xf1, 0xeb, 0x6b,
-  0xd0, 0x3d, 0x38, 0x8c, 0xd0, 0xbf, 0xaf, 0xbf, 0xe2, 0xb3, 0x95, 0x4f,
-  0x4e, 0x11, 0x43, 0xe0, 0x90, 0x60, 0xca, 0x61, 0xc5, 0xab, 0xfc, 0xd5,
-  0x19, 0x02, 0xe6, 0xee, 0x32, 0xd3, 0x93, 0xc1, 0x6c, 0x69, 0x07, 0xba,
-  0x37, 0x7a, 0x55, 0xed, 0xb3, 0xda, 0xbf, 0x3a, 0x56, 0x67, 0xbb, 0x94,
-  0xc3, 0x70, 0x3e, 0xf6, 0x35, 0xdf, 0x5d, 0xf6, 0xec, 0x4c, 0x76, 0x88,
-  0x22, 0xcb, 0xb8, 0x87, 0x6f, 0x73, 0x48, 0xc5, 0xae, 0x2f, 0x89, 0x18,
-  0xab, 0x89, 0x0c, 0x4e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x62, 0x6e, 0xaf, 0x1e, 0xc0, 0xc8, 0xa0, 0x0f, 0x41, 0x9c, 0x9a, 0x41,
-  0x06, 0xeb, 0x2e, 0xe5, 0x1a, 0xf4, 0x0d, 0x48, 0x40, 0x95, 0x0e, 0xb0,
-  0xbb, 0xc3, 0x0c, 0x66, 0x7a, 0xd9, 0xb4, 0x0c, 0x5b, 0x03, 0x93, 0xa6,
-  0xa8, 0x9a, 0xea, 0x96, 0x44, 0xcb, 0x12, 0xae, 0x40, 0x60, 0x03, 0xfc,
-  0xb7, 0x1b, 0x2d, 0xa0, 0x12, 0xd3, 0x30, 0x74, 0x66, 0xcc, 0xa4, 0xfa,
-  0xca, 0x5b, 0x20, 0x25, 0x7f, 0x66, 0x6d, 0xad, 0x3a, 0x65, 0x13, 0xc3,
-  0x51, 0x00, 0x54, 0x5c, 0x61, 0x0c, 0x76, 0xb7, 0x8b, 0xe6, 0x97, 0xb1,
-  0x94, 0x78, 0x4d, 0x2c, 0x33, 0xc7, 0xf0, 0x09, 0xba, 0x2d, 0xf5, 0x60,
-  0x6d, 0x86, 0x75, 0x71, 0xed, 0xc9, 0x73, 0x2d, 0x73, 0x2d, 0x8a, 0xfb,
-  0x53, 0xa3, 0x1a, 0xc0, 0xe6, 0x8c, 0xd0, 0x6f, 0x98, 0xfc, 0x00, 0xfe,
-  0x8e, 0xd9, 0x2a, 0x39, 0x5c, 0xef, 0x79, 0x1b, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x1b, 0x0e, 0xb3, 0x6e, 0xec, 0x41, 0x47, 0x9c,
-  0x86, 0x65, 0xba, 0x43, 0xfd, 0xef, 0xb4, 0x42, 0xca, 0x96, 0x39, 0xec,
-  0x62, 0x22, 0x73, 0xf7, 0xed, 0xd3, 0x27, 0xef, 0x57, 0x7f, 0x9b, 0x61,
-  0x98, 0x00, 0x63, 0xbb, 0x4f, 0x68, 0x42, 0x1e, 0xe3, 0xea, 0x08, 0xf3,
-  0xa4, 0xe2, 0x9e, 0x71, 0x0a, 0x45, 0x90, 0x7f, 0x93, 0x18, 0x1d, 0x1e,
-  0xf1, 0x47, 0x79, 0x91, 0xe4, 0x73, 0xbf, 0x5f, 0x7e, 0xcb, 0x83, 0xde,
-  0xde, 0x88, 0xa7, 0xe0, 0x65, 0x01, 0x5b, 0x94, 0x64, 0xa8, 0x12, 0xb3,
-  0xde, 0x22, 0x82, 0x62, 0x5d, 0x30, 0x11, 0xc7, 0x7a, 0xa8, 0xa0, 0xd6,
-  0xab, 0x70, 0x88, 0x59, 0x92, 0x3f, 0xf2, 0x6b, 0x25, 0xbb, 0x11, 0xb8,
-  0xda, 0x29, 0x5e, 0xde, 0x29, 0xf0, 0x50, 0x8d, 0xf2, 0x76, 0x05, 0xc0,
-  0xb8, 0x9f, 0xa1, 0xe7, 0x7e, 0x85, 0x07, 0xfa, 0xda, 0xed, 0x97, 0x0a,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x0e, 0xa5, 0x09, 0x60,
-  0x5e, 0xeb, 0xb2, 0x0a, 0xda, 0x15, 0x84, 0xad, 0xd7, 0xf2, 0x0e, 0x1a,
-  0xce, 0x93, 0xea, 0xfa, 0x1d, 0xf5, 0xfb, 0x2d, 0x45, 0x02, 0x26, 0x89,
-  0xf5, 0x4a, 0x1f, 0x4a, 0x4b, 0x39, 0xfb, 0xdd, 0x38, 0xef, 0x39, 0x55,
-  0x55, 0xdf, 0xc4, 0x5e, 0x23, 0x83, 0x02, 0x48, 0x60, 0xf7, 0x1b, 0xcc,
-  0xe8, 0x6f, 0x86, 0xdf, 0x0e, 0xcf, 0xa1, 0x3d, 0x6e, 0xd4, 0xb4, 0x20,
-  0x5d, 0xee, 0xf2, 0xe7, 0x78, 0xb8, 0x40, 0xf7, 0xad, 0xe2, 0x81, 0xe7,
-  0xfa, 0x26, 0x92, 0x0f, 0x24, 0x87, 0xc9, 0x44, 0x5f, 0x90, 0x9d, 0x25,
-  0xaa, 0xb7, 0x95, 0x76, 0x89, 0x53, 0xd5, 0x5f, 0xe8, 0xa6, 0x59, 0xba,
-  0x77, 0xf1, 0x05, 0xe8, 0x2e, 0x5d, 0x24, 0x31, 0x59, 0xf8, 0x13, 0x02,
-  0xfe, 0x9e, 0x22, 0x7b, 0xa7, 0xb1, 0xa1, 0xba, 0xb0, 0xb7, 0xbd, 0xd1,
-  0x12, 0x38, 0x99, 0x61, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0xb2, 0xcc, 0xab, 0x2f, 0x02, 0xe8, 0x24, 0xdd, 0x52, 0xf7, 0xe4, 0x98,
-  0x90, 0x56, 0x66, 0x7c, 0xef, 0x7c, 0x42, 0xf3, 0x4a, 0x71, 0x47, 0x18,
-  0x17, 0x8c, 0x37, 0xac, 0x03, 0xde, 0xf7, 0x55, 0x75, 0xd4, 0x7a, 0x60,
-  0x4e, 0xc3, 0xd0, 0xc4, 0xd4, 0x29, 0xd6, 0xc2, 0x5a, 0x10, 0x42, 0x98,
-  0x1f, 0xcd, 0x91, 0xe2, 0xe5, 0xc3, 0x62, 0x26, 0x01, 0xda, 0x4b, 0xde,
-  0xf2, 0x37, 0xab, 0x37, 0x30, 0xaf, 0x44, 0x39, 0xb0, 0xa0, 0xa1, 0x84,
-  0xbb, 0x01, 0x51, 0x77, 0x94, 0x9f, 0x1f, 0x67, 0x92, 0x39, 0x00, 0x69,
-  0x7a, 0x68, 0x79, 0xf4, 0x8e, 0x28, 0xde, 0xe0, 0x4f, 0x0a, 0x4d, 0x00,
-  0xf0, 0xc3, 0xf8, 0x2a, 0xd3, 0x5e, 0xc2, 0x92, 0x9d, 0x1f, 0x7e, 0x77,
-  0x88, 0x25, 0x9e, 0xd7, 0xaa, 0x95, 0xbd, 0x5a, 0x85, 0x61, 0x54, 0x41,
-  0xad, 0x8f, 0x8f, 0x02, 0x6b, 0xd5, 0x8d, 0x40, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xc0, 0xb2, 0x14, 0x11, 0x7c, 0xd7, 0xd4, 0x99,
-  0xa2, 0xd1, 0x3f, 0xf2, 0xf9, 0x9c, 0x7e, 0xd9, 0x72, 0x9b, 0x8b, 0x73,
-  0xeb, 0x6c, 0x2b, 0xc0, 0x16, 0x3b, 0x8d, 0xa3, 0x36, 0x95, 0xfa, 0x44,
-  0xd9, 0xd8, 0x58, 0xfd, 0x23, 0x67, 0x7f, 0xa2, 0xc4, 0x67, 0x69, 0xbb,
-  0x18, 0x52, 0xc8, 0x38, 0xef, 0xad, 0x36, 0x79, 0x0e, 0x43, 0x17, 0x87,
-  0x3d, 0x1e, 0x6e, 0xf7, 0x06, 0xa5, 0xc2, 0x10, 0x55, 0x73, 0x3a, 0x04,
-  0x3a, 0x32, 0x33, 0xde, 0x21, 0x54, 0xbf, 0xde, 0xd0, 0x5f, 0x2d, 0xe8,
-  0x3a, 0x6f, 0x9b, 0xcb, 0x59, 0x32, 0x95, 0xb7, 0x63, 0xea, 0x6a, 0x07,
-  0x64, 0xa7, 0x6f, 0x3d, 0x55, 0x2a, 0x89, 0x52, 0xda, 0x87, 0xbf, 0xaa,
-  0xd4, 0xbf, 0x97, 0xc0, 0xea, 0xfc, 0xc3, 0x2f, 0x2f, 0xcf, 0x8f, 0xf5,
-  0x7d, 0xfe, 0x0f, 0xf3, 0x13, 0x23, 0x91, 0x76, 0xa8, 0xc5, 0x61, 0x5a,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x5c, 0xbf, 0x5f, 0xe9,
-  0x15, 0x89, 0xfb, 0xdb, 0xaf, 0x98, 0x7b, 0x9c, 0x9d, 0x4f, 0x11, 0xba,
-  0xaf, 0x71, 0x71, 0xc8, 0x09, 0x4e, 0xaa, 0xbe, 0x20, 0x14, 0x24, 0xc8,
-  0x5d, 0xa1, 0x18, 0x3b, 0xf6, 0x48, 0xd9, 0x1a, 0x75, 0x26, 0x54, 0xcf,
-  0xe7, 0xbe, 0xab, 0x24, 0x8f, 0x0c, 0x9c, 0xca, 0x23, 0x33, 0xb6, 0xd6,
-  0x42, 0x65, 0x37, 0x5f, 0x35, 0xe9, 0x06, 0xe3, 0x0f, 0xb6, 0x73, 0x1e,
-  0x4f, 0x5e, 0x94, 0x44, 0x6e, 0xdf, 0xe1, 0x2a, 0x17, 0x97, 0x9a, 0xa1,
-  0x19, 0x1b, 0x3f, 0xa3, 0x25, 0x7a, 0xf1, 0x51, 0xfa, 0xdd, 0x5f, 0x80,
-  0x35, 0xcc, 0x90, 0x2b, 0x8e, 0xa7, 0x5b, 0x4d, 0x9d, 0x0f, 0x13, 0xc7,
-  0xa6, 0x87, 0x8a, 0xce, 0xe4, 0x45, 0xf9, 0xdc, 0xbf, 0xe8, 0xbc, 0xc1,
-  0x5b, 0xfa, 0x51, 0x81, 0xa9, 0x7b, 0x26, 0xa9, 0xdd, 0xa8, 0xdf, 0xcf,
-  0x29, 0xb3, 0xbe, 0x14, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x4d, 0x90, 0x6a, 0xf7, 0xb0, 0xff, 0x6a, 0xac, 0xaf, 0x33, 0xf0, 0x2d,
-  0x6c, 0x58, 0x3e, 0x8d, 0x45, 0x5f, 0x8b, 0xd9, 0x08, 0x69, 0x0e, 0x8d,
-  0x4f, 0x07, 0x87, 0x5b, 0xdb, 0x17, 0xb9, 0x4e, 0xcb, 0x48, 0x41, 0x0c,
-  0x88, 0xa8, 0x23, 0x79, 0x4a, 0x9d, 0x9a, 0x31, 0x72, 0xe3, 0x1b, 0xd4,
-  0x4d, 0xc5, 0xb1, 0x90, 0xdb, 0x9b, 0xef, 0x03, 0xff, 0x11, 0x42, 0x29,
-  0xb6, 0xc6, 0xc2, 0x1e, 0x70, 0x80, 0xaf, 0xea, 0x0a, 0xd3, 0x2d, 0xb9,
-  0xea, 0x6e, 0xbd, 0x5c, 0xac, 0x97, 0xa9, 0x08, 0x7d, 0x51, 0xb3, 0x72,
-  0x62, 0x70, 0x4f, 0x19, 0xa9, 0xee, 0xe4, 0xc7, 0x06, 0x3f, 0xe3, 0x3d,
-  0x1e, 0x02, 0x0e, 0xb5, 0x57, 0x3f, 0x14, 0x23, 0xda, 0x5b, 0xf3, 0xf3,
-  0xf6, 0x62, 0x4e, 0x33, 0x72, 0x41, 0x4d, 0xb1, 0x87, 0xa6, 0xee, 0x68,
-  0x62, 0xb1, 0x71, 0x7e, 0xb8, 0xa9, 0xa7, 0x6f, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xa9, 0x53, 0x42, 0x6f, 0xe4, 0x79, 0xbd, 0xc7,
-  0x37, 0x51, 0xc9, 0xe6, 0xfe, 0x9b, 0x63, 0x03, 0x25, 0xdb, 0xb6, 0xf9,
-  0xdb, 0x80, 0x91, 0x01, 0x3f, 0xdd, 0xee, 0xe9, 0x9e, 0x60, 0xa6, 0x37,
-  0x43, 0x16, 0x9b, 0x92, 0x8b, 0xf9, 0xd9, 0x21, 0xa7, 0xf9, 0x05, 0x21,
-  0x00, 0xaa, 0x35, 0x9c, 0x08, 0xa5, 0x66, 0xba, 0xcb, 0xe3, 0x45, 0xfd,
-  0x8e, 0xbb, 0x5e, 0x97, 0x2a, 0xf9, 0x99, 0x6c, 0x8f, 0x92, 0xe8, 0x7d,
-  0x4f, 0x6d, 0x9c, 0x6d, 0xae, 0x17, 0xe1, 0x16, 0xde, 0x03, 0x35, 0x01,
-  0x76, 0xe6, 0x5a, 0x3b, 0x45, 0xb5, 0x43, 0x21, 0x59, 0xa9, 0x87, 0x38,
-  0x2f, 0x07, 0x94, 0x60, 0x26, 0xb9, 0xd2, 0xb5, 0xe3, 0x3b, 0x57, 0xa0,
-  0xb0, 0xb6, 0xfe, 0x10, 0x2e, 0xb6, 0xbd, 0xdf, 0x24, 0xe1, 0xc3, 0x8c,
-  0xbb, 0x08, 0x62, 0x84, 0x7e, 0x37, 0x25, 0x5c, 0xd1, 0x65, 0xf2, 0x45,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x4f, 0x72, 0x7c, 0xfb,
-  0xb3, 0x7d, 0x0e, 0xa5, 0x2f, 0x39, 0x76, 0x80, 0xff, 0x08, 0xac, 0xe7,
-  0x21, 0x18, 0x3f, 0x03, 0xab, 0xb2, 0xde, 0x9c, 0x83, 0x47, 0xa7, 0x90,
-  0x1e, 0x2c, 0xa3, 0x11, 0x14, 0x4e, 0x27, 0x52, 0x70, 0x56, 0xa6, 0x87,
-  0x1a, 0x45, 0x52, 0xf4, 0x9f, 0xed, 0x73, 0xb2, 0xa2, 0x01, 0x55, 0xbd,
-  0x6a, 0x32, 0xd4, 0x15, 0x2e, 0x7e, 0x05, 0xbb, 0xb5, 0x27, 0x46, 0x58,
-  0x49, 0x2b, 0xab, 0x60, 0x30, 0xbb, 0x6d, 0x8a, 0xd8, 0x85, 0x78, 0x25,
-  0x19, 0x8a, 0x9d, 0xc9, 0x3e, 0xf2, 0xb0, 0x65, 0x1b, 0xe2, 0xfd, 0x30,
-  0x66, 0x4b, 0xb5, 0x6b, 0xfd, 0x7e, 0xe3, 0x1c, 0x31, 0x08, 0xa6, 0xb2,
-  0xda, 0x48, 0x8d, 0xeb, 0xb0, 0xac, 0xf0, 0xf5, 0x6e, 0xe5, 0x4b, 0xc7,
-  0xf9, 0xfb, 0xca, 0xbd, 0x8c, 0x4e, 0x28, 0xa1, 0xe9, 0xf6, 0x7f, 0x12,
-  0xd3, 0x5d, 0x88, 0x0b, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x8b, 0x4e, 0xfd, 0x3f, 0x25, 0xc1, 0xc0, 0x59, 0x5a, 0xd5, 0x46, 0xa0,
-  0x79, 0x62, 0x4e, 0x29, 0xc6, 0x1f, 0x75, 0x8a, 0x8c, 0x82, 0x5e, 0x15,
-  0x72, 0xd5, 0x58, 0x66, 0x81, 0x74, 0x9c, 0x40, 0x1d, 0x98, 0x69, 0x66,
-  0xf6, 0xcb, 0x81, 0x16, 0xd4, 0xa4, 0xbc, 0x13, 0xd6, 0x85, 0x54, 0x5d,
-  0xb3, 0x1b, 0x28, 0xa3, 0x56, 0x36, 0x46, 0xf6, 0xc2, 0x98, 0x24, 0xbb,
-  0x35, 0xfe, 0xa2, 0x6a, 0x0b, 0xb7, 0x27, 0x6c, 0xb4, 0xda, 0x41, 0x1b,
-  0x37, 0x2f, 0x76, 0x99, 0x63, 0x28, 0x79, 0xc0, 0xc5, 0x38, 0x5d, 0xf2,
-  0x16, 0x23, 0x40, 0xa5, 0xb7, 0x36, 0x5a, 0xd1, 0x91, 0x4a, 0xbe, 0x6f,
-  0x76, 0x8e, 0x06, 0x8f, 0x7c, 0x29, 0x10, 0x51, 0x6a, 0x42, 0xde, 0x5b,
-  0xed, 0x80, 0x38, 0xf4, 0xf3, 0xf2, 0x5c, 0x76, 0xf2, 0x78, 0xbd, 0x0d,
-  0x0c, 0xe8, 0x78, 0x68, 0xfc, 0x7c, 0x9c, 0x55, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x65, 0xfe, 0xff, 0xbf, 0x05, 0x2f, 0x3c, 0x1e,
-  0x02, 0xfa, 0x35, 0x81, 0xef, 0x42, 0xb9, 0xa8, 0x5f, 0x6d, 0x50, 0xaf,
-  0x80, 0x74, 0xb5, 0x76, 0xad, 0xd0, 0x5a, 0xc0, 0x44, 0xb0, 0x49, 0x2a,
-  0x1c, 0x35, 0x33, 0x93, 0x5f, 0x00, 0x08, 0x49, 0x94, 0xb7, 0xab, 0x66,
-  0xd3, 0xd7, 0xcc, 0xfe, 0x68, 0x9e, 0xf0, 0xae, 0x7e, 0x26, 0x1d, 0x4a,
-  0x85, 0xf7, 0x0c, 0xaa, 0xd6, 0x4f, 0x0c, 0x6a, 0xde, 0xd6, 0x8b, 0xb8,
-  0xc0, 0xbe, 0x0b, 0xab, 0x9a, 0x3f, 0xe4, 0x8a, 0x1c, 0x1b, 0x81, 0x1b,
-  0x8d, 0x6e, 0xeb, 0xa3, 0xac, 0x44, 0x9a, 0x51, 0x29, 0x50, 0x2d, 0x93,
-  0xa0, 0x23, 0xb8, 0x30, 0x33, 0x15, 0xe0, 0x11, 0x8b, 0x6e, 0xe3, 0x6f,
-  0x62, 0xc1, 0xba, 0x8b, 0x25, 0xac, 0xd3, 0x13, 0x47, 0xee, 0xc5, 0x56,
-  0x62, 0x9e, 0xbd, 0x5e, 0x03, 0xdb, 0x78, 0xde, 0x5e, 0xf9, 0xb9, 0x4e,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0xd0,
-  0x87, 0x1a, 0x49, 0x23, 0x54, 0x58, 0x48, 0x00, 0x80, 0x87, 0x16, 0x01,
-  0xa0, 0xbd, 0x8c, 0x8b, 0x01, 0x1a, 0xdc, 0xfc, 0x0f, 0xe2, 0x98, 0xa5,
-  0x10, 0x78, 0x25, 0x6f, 0x9b, 0x99, 0x99, 0x59, 0x82, 0xef, 0x3b, 0xc0,
-  0xa7, 0xd6, 0x7c, 0xb6, 0x5e, 0xe0, 0xd5, 0x9b, 0x31, 0x32, 0xf2, 0xf9,
-  0xde, 0x89, 0xa6, 0x89, 0x90, 0x6d, 0xa8, 0x10, 0xb0, 0x71, 0x7a, 0x1e,
-  0x85, 0xb0, 0x81, 0x18, 0x2a, 0x71, 0xb1, 0x7b, 0xbd, 0x29, 0xdc, 0x08,
-  0x0e, 0xc6, 0xc5, 0xfd, 0x4d, 0xce, 0x87, 0x98, 0xd2, 0xe6, 0x0f, 0x11,
-  0x2f, 0x7e, 0xf0, 0xe0, 0xd8, 0x47, 0x1d, 0x04, 0xc6, 0x9b, 0xde, 0x35,
-  0x20, 0x57, 0x3e, 0xd4, 0x57, 0xb8, 0x20, 0x9d, 0x95, 0xae, 0xb6, 0x21,
-  0x82, 0xac, 0xde, 0xdd, 0x9b, 0x60, 0x94, 0xe9, 0x4b, 0xb4, 0x59, 0xef,
-  0x06, 0xa7, 0xe4, 0x65, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x00, 0x00, 0x00, 0x30, 0xc2, 0x35, 0xb2, 0xc0, 0xe8, 0x0e, 0x8a, 0xaf,
-  0x28, 0x09, 0x1e, 0x7e, 0x9e, 0x37, 0x96, 0x87, 0x92, 0x36, 0x29, 0xd7,
-  0x70, 0xd5, 0x84, 0xef, 0xb7, 0x50, 0x42, 0x00, 0xcd, 0xcc, 0xcc, 0x04,
-  0x5a, 0x14, 0xea, 0x45, 0xb8, 0x82, 0x05, 0x94, 0x10, 0x12, 0xd3, 0x2e,
-  0x7d, 0x94, 0x1b, 0x04, 0x9a, 0x3c, 0x6d, 0xa5, 0x6c, 0xbb, 0xbd, 0x4e,
-  0x34, 0xe1, 0xe0, 0x16, 0xcb, 0x55, 0x94, 0x0a, 0x7a, 0x18, 0xa0, 0xdd,
-  0x7b, 0x74, 0xd0, 0x54, 0x3c, 0xb9, 0xcd, 0x54, 0xd0, 0x97, 0xfd, 0xfa,
-  0xd5, 0x19, 0xe9, 0x34, 0x4e, 0x6c, 0xa1, 0xea, 0xf3, 0x7f, 0x31, 0x5e,
-  0xf8, 0x88, 0xbe, 0xec, 0x02, 0xf0, 0xce, 0x75, 0xdb, 0x51, 0xfa, 0x68,
-  0x41, 0x49, 0x10, 0x03, 0xab, 0xff, 0x7f, 0x3a, 0xf5, 0x29, 0xa9, 0x1c,
-  0xc3, 0x3d, 0x5f, 0x4d, 0x66, 0x9b, 0x65, 0x04, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0x24, 0x7a, 0x63, 0x86, 0x07,
-  0x09, 0xeb, 0x71, 0x66, 0xa4, 0x43, 0x09, 0xeb, 0xbc, 0x2d, 0x8c, 0x2c,
-  0xae, 0xeb, 0x7e, 0xde, 0xbf, 0x45, 0x0a, 0x22, 0x91, 0x27, 0x00, 0x00,
-  0x67, 0x66, 0x66, 0x76, 0xa0, 0x3c, 0xc5, 0xd4, 0xd2, 0xcf, 0xd3, 0x0d,
-  0x97, 0x92, 0x52, 0x7c, 0xf8, 0x5c, 0x73, 0x4d, 0xc5, 0x31, 0x62, 0x40,
-  0x76, 0xe7, 0xb5, 0xfb, 0xaa, 0x68, 0x8e, 0x45, 0xc2, 0xf6, 0xff, 0x3b,
-  0xf3, 0x53, 0xd0, 0x30, 0x9b, 0x29, 0x1d, 0x86, 0x25, 0xbd, 0x0f, 0xf9,
-  0x53, 0x91, 0x10, 0x4c, 0xf5, 0x4a, 0xf3, 0x13, 0x04, 0xf4, 0x79, 0x12,
-  0xc1, 0x4a, 0x58, 0x21, 0x3c, 0x74, 0xe0, 0x5d, 0x22, 0x2e, 0x60, 0x2a,
-  0xca, 0x3d, 0x5f, 0xc2, 0xcb, 0xf2, 0x47, 0x71, 0xd0, 0xd6, 0x63, 0xca,
-  0xf3, 0xd0, 0xe8, 0x39, 0x99, 0x3b, 0xff, 0x06, 0x3e, 0xe5, 0xcd, 0x6b,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0xac,
-  0x99, 0x37, 0xb3, 0x60, 0xfa, 0x50, 0xcb, 0x16, 0x8f, 0x95, 0x6b, 0xcc,
-  0x70, 0x69, 0x74, 0x2e, 0x07, 0xd5, 0x81, 0x98, 0xee, 0x16, 0x88, 0x9b,
-  0x17, 0x00, 0x00, 0x00, 0x34, 0x33, 0x33, 0xa9, 0x30, 0x56, 0xe3, 0x6e,
-  0x00, 0xe2, 0x23, 0x99, 0x51, 0x52, 0x76, 0xe2, 0x4c, 0x35, 0xcc, 0xf4,
-  0xbe, 0x8b, 0x0f, 0xdf, 0xbf, 0xaa, 0x2a, 0xbb, 0x59, 0x1f, 0xbe, 0x5c,
-  0xc0, 0x27, 0xda, 0x2d, 0x45, 0x2e, 0x95, 0x8f, 0xb7, 0x50, 0x74, 0xd9,
-  0xec, 0x14, 0xb8, 0x4c, 0x0d, 0x7b, 0xc9, 0xe0, 0x02, 0xd1, 0x9e, 0x56,
-  0xc9, 0x27, 0xf7, 0xa8, 0xa6, 0xa1, 0xdc, 0x38, 0x3f, 0xff, 0xf9, 0x30,
-  0xfa, 0x8c, 0xc8, 0xab, 0x60, 0x86, 0xf0, 0x52, 0xc4, 0x01, 0xc2, 0x9c,
-  0xb3, 0x42, 0x51, 0x52, 0x54, 0x35, 0x83, 0x05, 0xb2, 0xd5, 0xc6, 0x9b,
-  0x7a, 0x43, 0xa6, 0x6c, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x00, 0x00, 0x00, 0xa5, 0xd1, 0xee, 0x7c, 0xd0, 0xba, 0xcb, 0x9d, 0x26,
-  0x7f, 0xd1, 0xed, 0xba, 0xb3, 0x89, 0xf0, 0x52, 0xc2, 0x4f, 0x0c, 0xf0,
-  0x23, 0xdd, 0x15, 0x0e, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x99, 0x99, 0xe5,
-  0x87, 0xd2, 0xdc, 0x99, 0xaf, 0xaf, 0x6f, 0xca, 0x7f, 0x10, 0x1c, 0x8b,
-  0x6e, 0xb8, 0xd8, 0xc5, 0x9e, 0xd0, 0x03, 0xce, 0x57, 0x95, 0xbd, 0xcc,
-  0xba, 0x0f, 0x5f, 0x2e, 0xbf, 0x91, 0x50, 0x10, 0xfb, 0xe2, 0x60, 0x0a,
-  0x57, 0xb1, 0xc7, 0xdb, 0xc3, 0x18, 0x08, 0x02, 0x48, 0xc8, 0xea, 0xa0,
-  0x46, 0x32, 0xaa, 0x31, 0x46, 0x05, 0xbb, 0xbc, 0x03, 0x8d, 0xee, 0x36,
-  0xff, 0xa5, 0x22, 0x99, 0x0d, 0xd2, 0x0a, 0x51, 0x1a, 0x45, 0x34, 0x44,
-  0x68, 0x5d, 0x72, 0xde, 0x20, 0x7d, 0xa1, 0xa1, 0x77, 0xce, 0xd3, 0xc1,
-  0x91, 0xa1, 0x29, 0x1e, 0xc6, 0xe3, 0x35, 0x14, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x00, 0x83, 0x1e, 0x97, 0x8a, 0xf6,
-  0x06, 0xe2, 0xcb, 0xfe, 0xa5, 0x10, 0x1f, 0x59, 0x8b, 0xd6, 0x2f, 0x75,
-  0xeb, 0x3d, 0xff, 0xdf, 0x6e, 0x67, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0xcd, 0xcc, 0x4c, 0xbc, 0x97, 0xb3, 0x89, 0x2c, 0x3e, 0x8b, 0x8b, 0xaf,
-  0x9b, 0x36, 0xe5, 0x38, 0x1c, 0x8d, 0x76, 0x2b, 0xf5, 0x49, 0xe6, 0x15,
-  0x6c, 0x21, 0xaf, 0x6e, 0xdd, 0x87, 0x2f, 0x17, 0xee, 0x4e, 0x3a, 0xda,
-  0x8f, 0x49, 0x3f, 0xd1, 0xcc, 0xc1, 0xd6, 0x95, 0x9e, 0x09, 0x52, 0x40,
-  0xab, 0xac, 0x43, 0xc0, 0x04, 0x7a, 0x13, 0x51, 0x40, 0xae, 0xd1, 0x27,
-  0x84, 0xee, 0xf6, 0x4e, 0x8e, 0xd8, 0x03, 0x5c, 0x37, 0x93, 0x2c, 0x43,
-  0x49, 0xac, 0x48, 0xd9, 0xa0, 0x82, 0x6b, 0x9f, 0x73, 0x22, 0xc6, 0x35,
-  0x95, 0x41, 0x89, 0x43, 0xb1, 0x1f, 0x04, 0xfa, 0x25, 0x24, 0x59, 0x31,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x40, 0xe4,
-  0x16, 0xab, 0x32, 0x30, 0xf2, 0x5a, 0x04, 0x1d, 0x4c, 0x5c, 0xea, 0xfe,
-  0x39, 0xbb, 0xe4, 0x26, 0x4f, 0x5e, 0x35, 0xa6, 0x03, 0x05, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x67, 0x66, 0x66, 0xdb, 0x44, 0x0e, 0xbd, 0xc1,
-  0x9b, 0x8c, 0x59, 0xb7, 0xb9, 0xc3, 0xb0, 0x7d, 0x2a, 0x27, 0x26, 0x19,
-  0x9e, 0xa2, 0x35, 0xe4, 0x38, 0x45, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
-  0xbb, 0xda, 0xd7, 0xf5, 0xdc, 0xf5, 0xee, 0x05, 0x0b, 0x14, 0x99, 0x1d,
-  0x2c, 0x77, 0x0b, 0xa8, 0xe0, 0x4c, 0x30, 0xd5, 0xe7, 0x25, 0xa5, 0x00,
-  0x81, 0x5f, 0xbf, 0x8a, 0x0c, 0x7c, 0xdd, 0x18, 0xb0, 0x6c, 0xf4, 0x8f,
-  0x59, 0x92, 0xf6, 0x79, 0xf6, 0x8d, 0x23, 0xf9, 0xaa, 0xc2, 0x33, 0xf1,
-  0x15, 0xa6, 0x48, 0x92, 0x1c, 0xd8, 0x44, 0x2d, 0xe3, 0xdf, 0x33, 0x4c,
-  0xab, 0x74, 0x9b, 0x1a, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x00, 0x00, 0xc0, 0x5a, 0x43, 0x4f, 0x54, 0x90, 0x5b, 0x8f, 0x29, 0xf1,
-  0xbe, 0xb3, 0x8e, 0xbf, 0x2b, 0x63, 0x9b, 0xa4, 0x6d, 0x29, 0xe3, 0xfd,
-  0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x33, 0x93, 0x51,
-  0xa5, 0xa4, 0x2f, 0xc2, 0xf4, 0x64, 0x4f, 0xb1, 0x7e, 0xa0, 0x7e, 0xc7,
-  0x62, 0x51, 0xe8, 0xcd, 0xbb, 0xdf, 0x39, 0x36, 0x36, 0x64, 0xe4, 0xba,
-  0x75, 0x1f, 0xbe, 0x5c, 0x26, 0xa3, 0x44, 0xc5, 0xda, 0xd1, 0xc0, 0x1f,
-  0x86, 0x3b, 0x67, 0x70, 0xe2, 0x82, 0x02, 0x91, 0x2b, 0x40, 0xb0, 0x46,
-  0xe2, 0xcf, 0xe7, 0x51, 0x2b, 0x63, 0x28, 0xac, 0x8c, 0x0d, 0x7e, 0x2f,
-  0xce, 0x8f, 0x89, 0x09, 0xf4, 0x60, 0xb2, 0x30, 0xa1, 0xfd, 0x80, 0xf7,
-  0xf8, 0x2d, 0xa8, 0xa8, 0x0f, 0x4d, 0x4e, 0xaa, 0x92, 0x83, 0x95, 0xa1,
-  0x6b, 0x2c, 0xb2, 0x3e, 0x6f, 0x3e, 0x33, 0x5c, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xd0, 0xb0, 0xb1, 0x11, 0x5d, 0x5f,
-  0x5d, 0xc3, 0x95, 0x1a, 0x6e, 0xc2, 0xfe, 0xd5, 0xed, 0x59, 0xd2, 0x9f,
-  0x2b, 0xf7, 0xc8, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x9a, 0x99, 0x59, 0xbf, 0xae, 0x61, 0x76, 0x5e, 0x8d, 0xfd, 0x9d, 0xc7,
-  0xb2, 0xa0, 0x04, 0x7f, 0xbb, 0x90, 0x38, 0xa3, 0x7e, 0x5e, 0x2c, 0xdf,
-  0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e, 0x1a, 0xc0, 0x6a, 0xe1,
-  0x44, 0x05, 0x40, 0x61, 0xb6, 0x93, 0x2b, 0x00, 0x16, 0x8a, 0x70, 0x5b,
-  0xbe, 0x39, 0x1f, 0x89, 0xbd, 0x8f, 0x95, 0xd9, 0xad, 0x4a, 0x11, 0xe6,
-  0x30, 0xc0, 0xdb, 0x53, 0x2a, 0xb9, 0x1b, 0x91, 0x35, 0x4b, 0x6b, 0xe3,
-  0x17, 0xdf, 0x05, 0x6f, 0xfd, 0xd9, 0x0a, 0xc7, 0x4d, 0x05, 0x6d, 0x5e,
-  0x2d, 0xc9, 0x42, 0x0f, 0x43, 0x96, 0x20, 0xf6, 0x13, 0xd5, 0x35, 0x2d,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0x30, 0x32,
-  0x38, 0x7a, 0x48, 0x1a, 0x77, 0x0f, 0x63, 0xf3, 0x88, 0x03, 0x96, 0xed,
-  0xc5, 0x2c, 0x20, 0xd4, 0xa5, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xcd, 0xcc, 0x84, 0x26, 0x24, 0x60, 0xdd, 0x3b,
-  0x8b, 0xb9, 0x0a, 0xa7, 0x81, 0xbc, 0x27, 0xda, 0x6d, 0xbc, 0x60, 0x76,
-  0xd7, 0xe7, 0xa3, 0x70, 0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17,
-  0xa4, 0x2d, 0x51, 0x76, 0xad, 0x02, 0x00, 0xd3, 0xd3, 0x9a, 0x21, 0x0c,
-  0x03, 0x85, 0xab, 0x50, 0x0b, 0xbf, 0x2d, 0x26, 0x8b, 0x88, 0x87, 0xdd,
-  0xb0, 0xc1, 0xe5, 0x0f, 0x6a, 0xf8, 0xca, 0x23, 0x3a, 0xb0, 0xb8, 0xed,
-  0x03, 0xb6, 0x19, 0x17, 0xb2, 0xd7, 0xaf, 0x25, 0x0f, 0xe8, 0x5d, 0x96,
-  0x88, 0x6b, 0xd7, 0x7f, 0xe0, 0xb0, 0x41, 0x57, 0xa1, 0x14, 0x1d, 0x55,
-  0x14, 0x63, 0xdb, 0x2a, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x00, 0x00, 0x64, 0x7f, 0x44, 0x75, 0xc4, 0x97, 0x37, 0x63, 0x51, 0x19,
-  0x64, 0xd6, 0x26, 0x62, 0xe4, 0x5c, 0xc4, 0xac, 0xa2, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x66, 0xf6, 0x93,
-  0xf5, 0xd8, 0x74, 0xee, 0xd7, 0xdd, 0xc1, 0x95, 0x0c, 0x6d, 0x23, 0x58,
-  0x1b, 0x30, 0xd1, 0x7c, 0xdd, 0x80, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
-  0x98, 0x97, 0x8e, 0x45, 0xd3, 0x87, 0x93, 0x2f, 0xa7, 0x92, 0xea, 0x68,
-  0xc2, 0x30, 0x3c, 0x9d, 0xfc, 0xe5, 0x9a, 0xe5, 0x63, 0x86, 0xfe, 0xd6,
-  0x66, 0x52, 0x9b, 0xc9, 0x7e, 0x85, 0xd7, 0x18, 0x5e, 0x6c, 0x19, 0x6d,
-  0x74, 0x08, 0x36, 0xf3, 0xae, 0xf9, 0x4c, 0xa7, 0xa7, 0x41, 0xb5, 0x25,
-  0x15, 0x62, 0xff, 0xae, 0x96, 0x82, 0xd3, 0x78, 0xf7, 0x79, 0x28, 0x91,
-  0xd6, 0xb7, 0xed, 0x30, 0xea, 0x73, 0x77, 0x25, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xac, 0xba, 0x36, 0xd5, 0x4b, 0x79,
-  0xf3, 0x04, 0xf5, 0x8c, 0x51, 0xd7, 0x9c, 0xc4, 0x42, 0x49, 0x0f, 0x61,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x34, 0x33, 0x09, 0xd5, 0x70, 0xac, 0x80, 0x04, 0x8f, 0xd9, 0xd8, 0xe8,
-  0xa6, 0xb4, 0x00, 0x28, 0x46, 0x98, 0xca, 0xc7, 0xd2, 0xac, 0x94, 0xc2,
-  0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c, 0xcf, 0x90, 0x68, 0xf1,
-  0x6e, 0x4c, 0xca, 0x87, 0x8f, 0x21, 0x48, 0xa0, 0x16, 0x8b, 0x99, 0xd9,
-  0x87, 0x24, 0x9b, 0x40, 0x22, 0xa8, 0x2f, 0x78, 0xa6, 0xa1, 0x66, 0xe9,
-  0x3d, 0x18, 0x20, 0x3d, 0x8f, 0xe7, 0x2a, 0x62, 0x9d, 0x7a, 0x59, 0x9f,
-  0x19, 0xad, 0x4c, 0x22, 0xf7, 0xd8, 0xff, 0x2f, 0xd1, 0x08, 0x62, 0xfa,
-  0xea, 0x32, 0xde, 0xeb, 0x34, 0xb7, 0x2f, 0xce, 0x59, 0xae, 0x58, 0x5e,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x00, 0xb5, 0x26,
-  0x62, 0x47, 0xcf, 0x65, 0x84, 0xa1, 0x99, 0xdc, 0x4d, 0x33, 0x06, 0x56,
-  0x18, 0xe9, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x9a, 0x99, 0x05, 0xdc, 0x12, 0x36, 0xab, 0x88,
-  0x30, 0xba, 0x5f, 0xa0, 0xfb, 0x47, 0x07, 0x26, 0x35, 0x41, 0x2f, 0x9d,
-  0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e,
-  0x8a, 0xde, 0x53, 0x36, 0x9b, 0x28, 0xd7, 0xc2, 0x9a, 0xe9, 0x54, 0x8f,
-  0x12, 0x82, 0x62, 0x7c, 0x80, 0x95, 0xb6, 0xe4, 0xb6, 0x95, 0x68, 0x3a,
-  0x12, 0x91, 0xc6, 0x53, 0x8d, 0xb2, 0x5f, 0x00, 0x98, 0x26, 0x75, 0x9a,
-  0x48, 0x00, 0x22, 0x78, 0xd9, 0x15, 0xe9, 0x48, 0x9c, 0x41, 0x43, 0x00,
-  0x86, 0x64, 0xcf, 0xef, 0x41, 0xe0, 0x64, 0xc8, 0x06, 0xbd, 0xf3, 0x38,
-  0x14, 0x4d, 0xb4, 0x29, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x00, 0x00, 0x03, 0xc6, 0x05, 0x53, 0x9b, 0xa2, 0x22, 0xa8, 0x0c, 0xfa,
-  0x63, 0x9c, 0xc2, 0x5e, 0x8d, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcd, 0x4c, 0x74, 0x4a,
-  0xab, 0x56, 0x10, 0x49, 0x78, 0xb6, 0xd1, 0x04, 0x5c, 0xb0, 0x7e, 0x50,
-  0x06, 0xcf, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e,
-  0xdd, 0x87, 0x2f, 0x17, 0xa3, 0xd0, 0xde, 0x7e, 0x48, 0x51, 0x64, 0x73,
-  0x21, 0x48, 0x05, 0xf8, 0xb9, 0xaf, 0xa6, 0xc6, 0xfa, 0xf1, 0xaa, 0xea,
-  0xdc, 0x98, 0xbb, 0xb0, 0x67, 0xa5, 0xd0, 0x60, 0x25, 0x56, 0x02, 0x04,
-  0x13, 0x70, 0x85, 0x7a, 0x7e, 0xf8, 0xf8, 0x6c, 0x77, 0xe5, 0x5b, 0x32,
-  0x93, 0xdd, 0x77, 0x1c, 0xa8, 0x36, 0x1c, 0x19, 0x15, 0xd6, 0x65, 0x8a,
-  0x9f, 0x83, 0x4a, 0xdf, 0x0f, 0x3c, 0x5c, 0x71, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x00, 0x40, 0x58, 0x89, 0xea, 0xfc, 0xd4, 0xce,
-  0xe7, 0xab, 0x47, 0x2a, 0x12, 0x95, 0x93, 0x9d, 0x14, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x67, 0x66, 0xa3, 0xab, 0x8c, 0xef, 0x89, 0x25, 0x47, 0x52, 0x2b, 0x9e,
-  0xa5, 0x3b, 0xf4, 0xbe, 0xea, 0xe7, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
-  0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0xce, 0xc3, 0x34, 0xb3,
-  0x70, 0x4e, 0x4d, 0xbe, 0xc5, 0x5a, 0x0c, 0x13, 0x5b, 0xee, 0x07, 0xbf,
-  0x95, 0x12, 0x7e, 0xed, 0x4d, 0xa4, 0x8e, 0xc1, 0x70, 0x34, 0x9e, 0x6a,
-  0x3b, 0x06, 0xa6, 0x65, 0x0f, 0x2c, 0x93, 0x88, 0xf7, 0xef, 0xc9, 0x9c,
-  0x0d, 0xd3, 0xa8, 0x1f, 0x8b, 0xd7, 0x35, 0xe3, 0xcb, 0xa0, 0x72, 0x39,
-  0x0b, 0xa8, 0x5a, 0x6f, 0x95, 0xeb, 0x29, 0x88, 0x52, 0x3e, 0x01, 0x12,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0xc0, 0x8a, 0x2f,
-  0x82, 0xda, 0x05, 0x96, 0x7e, 0x40, 0x9e, 0xef, 0x9e, 0xdb, 0x4c, 0x0c,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x34, 0x93, 0x07, 0x17, 0x22, 0x7d, 0xc9, 0xe4,
-  0xa3, 0x4b, 0x66, 0x4e, 0x3b, 0x84, 0x67, 0x34, 0x04, 0xe0, 0xe7, 0x3a,
-  0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c,
-  0x0d, 0x88, 0x5f, 0xf8, 0xac, 0x4e, 0xb4, 0x30, 0x5c, 0xc6, 0xff, 0x6c,
-  0x72, 0xb2, 0xcf, 0x0b, 0x70, 0xaf, 0x96, 0x28, 0xa9, 0x31, 0xaf, 0xbc,
-  0x71, 0x72, 0xf3, 0xf2, 0x77, 0x8e, 0xac, 0x18, 0x59, 0xc0, 0x96, 0x6f,
-  0xa6, 0xde, 0x5d, 0x3b, 0x9f, 0x66, 0x1f, 0x89, 0xf0, 0x8b, 0xe8, 0x62,
-  0xe1, 0x64, 0x16, 0x30, 0xad, 0xff, 0xfd, 0xd9, 0x7f, 0xd8, 0xb0, 0x25,
-  0x9b, 0x02, 0xa4, 0x00, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x00, 0xd0, 0x69, 0x1e, 0xe4, 0x15, 0x78, 0x19, 0x0a, 0xda, 0x1f, 0xdf,
-  0xc2, 0x56, 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x9a, 0x59, 0xe1, 0xb5,
-  0xf5, 0x60, 0x05, 0x8f, 0x81, 0xda, 0xfd, 0xe9, 0x29, 0x5a, 0x76, 0x21,
-  0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd,
-  0xba, 0x0f, 0x5f, 0x2e, 0xfd, 0xdf, 0x81, 0x19, 0x3c, 0xad, 0xc6, 0x8b,
-  0x58, 0xfe, 0x51, 0x28, 0xb5, 0xf8, 0xdd, 0x10, 0x20, 0x61, 0x30, 0xb3,
-  0xb5, 0xf5, 0x76, 0x71, 0xa8, 0xc3, 0x57, 0xe4, 0x0c, 0xda, 0x70, 0x11,
-  0x1e, 0xbd, 0x07, 0x1a, 0xc6, 0xa1, 0x7e, 0x7c, 0xf2, 0x1c, 0xd0, 0x90,
-  0x6d, 0x8f, 0x21, 0x11, 0xde, 0x84, 0x40, 0xb2, 0x19, 0x13, 0x42, 0x9c,
-  0xf6, 0xc4, 0x72, 0xf5, 0x9e, 0x74, 0xbe, 0x50, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x00, 0x30, 0x32, 0x71, 0x23, 0x2e, 0xdd, 0x8b,
-  0xf5, 0xda, 0xc1, 0xf5, 0x60, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0xcd, 0x04, 0x3e, 0x2b, 0xd5, 0x66, 0x7c, 0x0c, 0x14, 0xe9, 0xcd, 0x52,
-  0x02, 0x82, 0xbf, 0x10, 0x01, 0xf8, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70,
-  0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17, 0xc0, 0x5b, 0xf8, 0x94,
-  0x48, 0x19, 0xd1, 0x27, 0xe0, 0x36, 0x2a, 0x8e, 0xe7, 0x18, 0x70, 0x0f,
-  0xe1, 0x8e, 0xf4, 0x48, 0x72, 0x5c, 0x8e, 0x9c, 0xfb, 0xb9, 0x62, 0xb7,
-  0xa6, 0x15, 0x84, 0x5e, 0x6f, 0x6b, 0x14, 0xb3, 0x43, 0x24, 0xad, 0xe3,
-  0x9e, 0x76, 0x84, 0x7a, 0x92, 0x8d, 0x04, 0x40, 0x51, 0x73, 0x6b, 0x18,
-  0xc1, 0x91, 0x25, 0x20, 0x40, 0x67, 0x02, 0x0e, 0x72, 0x76, 0x27, 0x6b,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0xa4, 0xd8, 0x1d,
-  0xa2, 0x5e, 0xa7, 0x37, 0x70, 0xad, 0xd1, 0x9c, 0x02, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x67, 0x76, 0x60, 0xee, 0x6c, 0x59, 0x24, 0x5c,
-  0x74, 0x80, 0x2a, 0x4d, 0x98, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
-  0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45,
-  0x22, 0x62, 0xe0, 0xd7, 0x4f, 0x07, 0xfe, 0x3a, 0x15, 0x87, 0xb6, 0x74,
-  0x37, 0x21, 0xa4, 0x0a, 0x98, 0x5e, 0x28, 0x12, 0x77, 0xf1, 0x1f, 0x50,
-  0xd9, 0x8f, 0xf9, 0x40, 0x41, 0x6f, 0x0f, 0x4b, 0x87, 0x7d, 0x5e, 0x14,
-  0x57, 0xf5, 0xe8, 0x74, 0xc4, 0xd4, 0x27, 0xfc, 0x86, 0xd9, 0x56, 0x66,
-  0x17, 0xd6, 0x97, 0xe5, 0x69, 0x2a, 0x6b, 0x03, 0x85, 0x38, 0xda, 0x70,
-  0xb2, 0xf6, 0xdd, 0x52, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x00, 0xac, 0x7f, 0x7a, 0x7f, 0xcc, 0x6b, 0x89, 0xc3, 0x0c, 0x8f, 0x01,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x34, 0x69, 0x13, 0xea,
-  0xec, 0xb4, 0x88, 0x9c, 0xa2, 0x9b, 0x58, 0x31, 0xcf, 0x1c, 0xfe, 0x42,
-  0x04, 0xe0, 0xe7, 0x3a, 0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba,
-  0x75, 0x1f, 0xbe, 0x5c, 0x4c, 0x07, 0x42, 0x14, 0x93, 0xe2, 0x83, 0x93,
-  0xa3, 0xa9, 0x72, 0x37, 0xa9, 0xb2, 0x58, 0xa7, 0x6a, 0x94, 0x53, 0xb5,
-  0xeb, 0xad, 0x3b, 0x56, 0x46, 0x6b, 0x61, 0xa3, 0x8c, 0x14, 0x56, 0x32,
-  0xd4, 0x10, 0xf3, 0xf6, 0x64, 0x82, 0xef, 0xb0, 0x24, 0xd2, 0xc2, 0x79,
-  0xde, 0xb0, 0x42, 0x9e, 0x65, 0x05, 0x88, 0x74, 0x2f, 0x8b, 0x58, 0xd5,
-  0x42, 0x1a, 0x06, 0xa3, 0xd4, 0xd1, 0xfc, 0x12, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x00, 0xc5, 0x1c, 0xd9, 0xfc, 0x35, 0x0a, 0xad,
-  0x17, 0xee, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x9a, 0x25, 0x9e, 0xe6, 0xbc, 0xaf, 0xb1, 0x88, 0x37, 0xd7, 0x97, 0x99,
-  0x67, 0x0e, 0x7f, 0x21, 0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1,
-  0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e, 0x5d, 0x53, 0x09, 0xab,
-  0xc2, 0x58, 0x42, 0x94, 0x15, 0x71, 0x15, 0x1b, 0xfa, 0xfb, 0xcd, 0xc6,
-  0xbc, 0xa4, 0x00, 0x49, 0x74, 0xd3, 0x9c, 0x04, 0x81, 0xd7, 0x09, 0x28,
-  0x7d, 0x37, 0x37, 0x58, 0x8a, 0x94, 0x1a, 0x79, 0x8a, 0x7e, 0x94, 0x29,
-  0xbd, 0xab, 0x65, 0x5f, 0xc6, 0x94, 0xed, 0xdd, 0x94, 0xc1, 0xf1, 0xd8,
-  0xed, 0x88, 0x80, 0x47, 0x31, 0x52, 0xda, 0x94, 0x30, 0x60, 0xac, 0x0a,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0x00, 0x83, 0x86, 0xfc,
-  0xaf, 0x41, 0xb9, 0x0e, 0x8e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x4d, 0xac, 0x2d, 0x08, 0x88, 0xfd, 0x14, 0x1e,
-  0x24, 0x78, 0xcc, 0xcc, 0x33, 0x87, 0xbf, 0x10, 0x01, 0xf8, 0xb9, 0xce,
-  0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e, 0xdd, 0x87, 0x2f, 0x17,
-  0xbb, 0xc3, 0xc0, 0xea, 0xbd, 0x9b, 0xd6, 0x22, 0x75, 0x96, 0x6c, 0xca,
-  0xc1, 0x2b, 0x45, 0x76, 0x2e, 0x4e, 0x2b, 0xf7, 0xfa, 0x9e, 0x64, 0x2d,
-  0x4b, 0xd8, 0xd9, 0x06, 0x24, 0xa3, 0xce, 0x49, 0x17, 0xd6, 0x83, 0xe8,
-  0x15, 0xc0, 0xbf, 0x4f, 0xf8, 0x85, 0x9d, 0xaf, 0xb6, 0x85, 0x62, 0x31,
-  0x89, 0x06, 0x10, 0xd3, 0x0a, 0xf1, 0xd5, 0x03, 0x50, 0x2c, 0x1e, 0xf4,
-  0xa8, 0xb6, 0x91, 0x2e, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x40, 0x0c, 0xab, 0x80, 0x53, 0x26, 0xc2, 0x54, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x67, 0x6b, 0xbb, 0x49,
-  0x36, 0x72, 0x46, 0x68, 0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32,
-  0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c,
-  0x98, 0x97, 0x8e, 0x45, 0x80, 0x00, 0x64, 0x53, 0xae, 0xef, 0x99, 0x13,
-  0xc9, 0x1c, 0x53, 0x45, 0x0c, 0xdc, 0x97, 0xd5, 0x80, 0x4b, 0x56, 0x86,
-  0xa3, 0xbc, 0x78, 0xc4, 0xc2, 0x0c, 0xf7, 0xaf, 0xfd, 0x42, 0xe9, 0x1b,
-  0x5f, 0x2f, 0xa6, 0x4f, 0xa5, 0x53, 0xb5, 0xec, 0x36, 0x50, 0xc6, 0xd1,
-  0x3f, 0xdf, 0xaf, 0x63, 0x9f, 0x25, 0x1d, 0x40, 0xd2, 0xb5, 0x83, 0x24,
-  0x67, 0x57, 0x4d, 0x0c, 0x7b, 0x8c, 0x90, 0x3d, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xc0, 0xfa, 0x49, 0xea, 0x2a, 0x92, 0x32, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x94, 0x9d, 0x8c, 0xc4, 0x08, 0x6c, 0x2a, 0x66, 0x32, 0xe3, 0x31, 0x33,
-  0xcf, 0x1c, 0xfe, 0x42, 0x04, 0xe0, 0xe7, 0x3a, 0xd3, 0xac, 0x94, 0xc2,
-  0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c, 0x57, 0x7e, 0xfd, 0x4b,
-  0xb2, 0xe8, 0x7d, 0x9b, 0x0e, 0x3c, 0x51, 0xa9, 0x15, 0xcb, 0x9e, 0xfa,
-  0x45, 0x73, 0x63, 0x75, 0xac, 0xb6, 0x70, 0xff, 0xcb, 0x0a, 0xb6, 0xd9,
-  0xd9, 0xf6, 0x7d, 0x5e, 0x31, 0x38, 0x8e, 0xff, 0x0c, 0xba, 0x37, 0xee,
-  0x71, 0x85, 0x91, 0xa1, 0xc8, 0x13, 0x11, 0xc4, 0x7c, 0x0f, 0x6e, 0xca,
-  0x37, 0x32, 0xdb, 0x2a, 0x5a, 0xf3, 0x69, 0x45, 0x0f, 0x15, 0xe4, 0x1e,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xd0, 0xb2, 0x9d, 0x4f,
-  0x2c, 0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x5a, 0x83, 0xa6, 0xa4, 0x69, 0x0f, 0x33, 0x33,
-  0x99, 0xf1, 0x98, 0x99, 0x67, 0x0e, 0x7f, 0x21, 0x02, 0xf0, 0x73, 0x9d,
-  0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd, 0xba, 0x0f, 0x5f, 0x2e,
-  0xa0, 0xbc, 0x92, 0xcb, 0x0d, 0xff, 0xd0, 0xea, 0x29, 0x6b, 0x52, 0x84,
-  0x98, 0xe7, 0xf8, 0xf0, 0xfb, 0x66, 0xbe, 0x18, 0xab, 0x4c, 0x1b, 0x71,
-  0x16, 0xec, 0xb7, 0x81, 0x69, 0x77, 0xeb, 0x57, 0x47, 0xad, 0x0d, 0x3a,
-  0x20, 0x42, 0x92, 0xf1, 0xc1, 0xd3, 0xef, 0x25, 0xf5, 0x26, 0x55, 0xc3,
-  0x98, 0x49, 0x24, 0xec, 0xbe, 0x30, 0x09, 0xa8, 0x07, 0x14, 0x8f, 0xce,
-  0x6f, 0xdd, 0xa9, 0x71, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0x30, 0xc2, 0xb2, 0x00, 0x12, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x85, 0xed, 0x68, 0x0c,
-  0x84, 0x99, 0x99, 0x99, 0xcc, 0x78, 0xcc, 0xcc, 0x33, 0x87, 0xbf, 0x10,
-  0x01, 0xf8, 0xb9, 0xce, 0x34, 0x2b, 0xa5, 0x70, 0x0e, 0x19, 0xb9, 0x6e,
-  0xdd, 0x87, 0x2f, 0x17, 0x88, 0x7c, 0xf4, 0x83, 0xfd, 0x16, 0xf9, 0xf0,
-  0x1a, 0xcc, 0xc4, 0xea, 0xf5, 0xba, 0x03, 0x19, 0x4a, 0x1b, 0x5d, 0x52,
-  0xe2, 0xfd, 0x78, 0xb1, 0x3d, 0xd3, 0x03, 0x25, 0x35, 0x11, 0x34, 0x47,
-  0x4d, 0xd6, 0x17, 0x27, 0xbf, 0xde, 0x87, 0x2c, 0xde, 0x46, 0x07, 0xcd,
-  0x2f, 0xa1, 0x31, 0x26, 0xe5, 0x8d, 0x87, 0xe1, 0xda, 0xac, 0xfe, 0x37,
-  0x97, 0x79, 0x72, 0xc7, 0x3f, 0x28, 0x4a, 0x5e, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0xe4, 0xc5, 0xbd, 0x0a, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0xf7, 0xec, 0x75, 0x26, 0xcc, 0xcc, 0xcc, 0xcc, 0x65, 0x6a, 0x65, 0x66,
-  0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c, 0x9e, 0x81, 0xef, 0x51,
-  0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45, 0xb3, 0xc4, 0x59, 0xb6,
-  0x35, 0x71, 0xaf, 0xe7, 0x73, 0x4c, 0x03, 0xe2, 0x69, 0x50, 0x19, 0x36,
-  0x65, 0x43, 0xd5, 0x33, 0x7f, 0x31, 0xf1, 0x0e, 0x89, 0xa3, 0x4f, 0x55,
-  0xdd, 0xf3, 0x67, 0x57, 0xf1, 0xcb, 0xe8, 0x3c, 0x7f, 0x68, 0x6a, 0x29,
-  0xe5, 0x60, 0x35, 0x3e, 0x72, 0x40, 0x3c, 0x8b, 0x39, 0x01, 0xa5, 0x9e,
-  0x73, 0x99, 0x7f, 0x87, 0x18, 0xb4, 0x68, 0xc6, 0xd2, 0x7b, 0xa9, 0x71,
-  0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55,
-  0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc,
-  0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d, 0xac, 0x68, 0x06, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0xca, 0xff, 0x91, 0x99, 0x65, 0x66, 0x66, 0x66,
-  0x32, 0xe3, 0x31, 0x33, 0xcf, 0x1c, 0xfe, 0x42, 0x04, 0xe0, 0xe7, 0x3a,
-  0xd3, 0xac, 0x94, 0xc2, 0x39, 0x64, 0xe4, 0xba, 0x75, 0x1f, 0xbe, 0x5c,
-  0x71, 0x02, 0x27, 0x40, 0x1d, 0xe1, 0x11, 0xee, 0x3b, 0xc9, 0x13, 0xe2,
-  0x90, 0x29, 0xf8, 0x0b, 0x70, 0x4b, 0x7b, 0xfa, 0xd2, 0xd6, 0xf5, 0x8f,
-  0x45, 0xff, 0xc8, 0xc4, 0xc9, 0x4c, 0xf9, 0x5e, 0x0e, 0x66, 0x85, 0x5f,
-  0x88, 0xff, 0xd3, 0xb6, 0xd8, 0x0a, 0x67, 0xd6, 0x91, 0x18, 0x5b, 0xe5,
-  0xc9, 0xa9, 0xd9, 0x55, 0xb4, 0x30, 0xde, 0x83, 0xe5, 0x10, 0x9a, 0x05,
-  0x55, 0x53, 0xde, 0x55, 0x01, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa,
-  0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2, 0x58, 0xe5, 0x6b, 0x06,
-  0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71, 0x37, 0x1a, 0x49, 0x4d,
-  0xd5, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0xc8, 0xcc, 0xcc,
-  0x32, 0x33, 0x33, 0x33, 0x99, 0xf1, 0x98, 0x99, 0x67, 0x0e, 0x7f, 0x21,
-  0x02, 0xf0, 0x73, 0x9d, 0x69, 0x56, 0x4a, 0xe1, 0x1c, 0x32, 0x72, 0xdd,
-  0xba, 0x0f, 0x5f, 0x2e, 0x01, 0x00, 0x00, 0xc0, 0xa9, 0xaa, 0xaa, 0x6a,
-  0x54, 0xd4, 0x53, 0x15, 0x58, 0xd6, 0x6d, 0x37, 0x5a, 0x5b, 0xd4, 0x08,
-  0xb2, 0xb0, 0x9f, 0xd9, 0x2c, 0x08, 0x7b, 0x3b, 0x0c, 0x84, 0x44, 0x6a,
-  0x08, 0x75, 0x50, 0x67, 0x4d, 0xe0, 0x04, 0xae, 0x38, 0x92, 0x4a, 0x99,
-  0x8b, 0x1e, 0xbb, 0x5f, 0x89, 0x70, 0x45, 0x82, 0x02, 0xe8, 0xe3, 0xe9,
-  0xea, 0x60, 0x2a, 0xd0, 0xa1, 0xe3, 0x59, 0x47, 0x01, 0x00, 0x00, 0x00,
-  0xaa, 0xaa, 0xaa, 0xaa, 0x54, 0x3d, 0x54, 0x55, 0x57, 0x6d, 0x7e, 0xe2,
-  0x58, 0xe5, 0x6b, 0x06, 0xb0, 0x3a, 0xd1, 0xcc, 0xda, 0xa8, 0x13, 0x71,
-  0x37, 0x1a, 0x49, 0x4d, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-  0x65, 0x66, 0x66, 0xe6, 0x98, 0x99, 0x99, 0x19, 0xcc, 0xa6, 0xcb, 0x4c,
-  0x35, 0x59, 0x9e, 0xba, 0x03, 0xe4, 0x8a, 0xd3, 0x38, 0x17, 0x42, 0x8a,
-  0xb2, 0xd7, 0x87, 0x03, 0x87, 0x5b, 0x26, 0x51, 0x01, 0x00, 0x00, 0x40,
-  0xff, 0xff, 0xff, 0x3f, 0xff, 0xc4, 0xfe, 0x3f, 0x02, 0x3b, 0xce, 0xfe,
-  0x03, 0x62, 0x39, 0x07, 0x06, 0x62, 0x6b, 0x26, 0xf6, 0x1d, 0x36, 0x5f,
-  0x7e, 0x3d, 0xf2, 0x56, 0x34, 0x33, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xcc,
-  0x65, 0x6a, 0x65, 0x66, 0x9b, 0x95, 0x3e, 0x32, 0x03, 0xe8, 0x2d, 0x6c,
-  0x9e, 0x81, 0xef, 0x51, 0x2b, 0x4b, 0x2b, 0x4c, 0x98, 0x97, 0x8e, 0x45
-};
-unsigned int constants_2_len = 11904;
--- a/icicle-cuda/appUtils/poseidon/constants/constants_4.h
+++ b/icicle-cuda/appUtils/poseidon/constants/constants_4.h
--- a/icicle-cuda/appUtils/poseidon/constants/constants_8.h
+++ b/icicle-cuda/appUtils/poseidon/constants/constants_8.h
--- a/icicle-cuda/appUtils/poseidon/poseidon.cu
+++ b/icicle-cuda/appUtils/poseidon/poseidon.cu
@@ -1,271 +0,0 @@
-#include "poseidon.cuh"
-
-template <typename S>
-__global__ void prepare_poseidon_states(S * inp, S * states, size_t number_of_states, S domain_tag, const PoseidonConfiguration<S> config) {
-    int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
-    int state_number = idx / config.t;
-    if (state_number >= number_of_states) {
-        return;
-    }
-    int element_number = idx % config.t;
-
-    S prepared_element;
-
-    // Domain separation
-    if (element_number == 0) {
-        prepared_element = domain_tag;
-    } else {
-        prepared_element = inp[state_number * (config.t - 1) + element_number - 1];
-    }
-
-    // Add pre-round constant
-    prepared_element = prepared_element + config.round_constants[element_number];
-
-    // Store element in state
-    states[idx] = prepared_element;
-}
-
-template <typename S>
-__device__ __forceinline__ S sbox_alpha_five(S element) {
-    S result = S::sqr(element);
-    result = S::sqr(result);
-    return result * element;
-}
-
-template <typename S>
-__device__ S vecs_mul_matrix(S element, S * matrix, int element_number, int vec_number, int size, S * shared_states) {
-    shared_states[threadIdx.x] = element;
-    __syncthreads();
-
-    element = S::zero();
-    for (int i = 0; i < size; i++) {
-        element = element + (shared_states[vec_number * size + i] * matrix[i * size + element_number]);
-    }
-    __syncthreads();
-    return element;
-}
-
-template <typename S>
-__device__ S full_round(S element,
-                        size_t rc_offset,
-                        int local_state_number,
-                        int element_number,
-                        bool multiply_by_mds,
-                        bool add_round_constant,
-                        S * shared_states,
-                        const PoseidonConfiguration<S> config) {
-    element = sbox_alpha_five(element);
-    if (add_round_constant) {
-        element = element + config.round_constants[rc_offset + element_number];
-    }
-
-    // Multiply all the states by mds matrix
-    S * matrix = multiply_by_mds ? config.mds_matrix : config.non_sparse_matrix;
-    return vecs_mul_matrix(element, matrix, element_number, local_state_number, config.t, shared_states);
-}
-
-// Execute full rounds
-template <typename S>
-__global__ void full_rounds(S * states, size_t number_of_states, size_t rc_offset, bool first_half, const PoseidonConfiguration<S> config) {
-    extern __shared__ S shared_states[];
-
-    int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
-    int state_number = idx / config.t;
-    if (state_number >= number_of_states) {
-        return;
-    }
-    int local_state_number = threadIdx.x / config.t;
-    int element_number = idx % config.t;
-
-    for (int i = 0; i < config.full_rounds_half - 1; i++) {
-        states[idx] = full_round(states[idx],
-                                 rc_offset,
-                                 local_state_number,
-                                 element_number,
-                                 true,
-                                 true,
-                                 shared_states,
-                                 config);
-        rc_offset += config.t;
-    }
-
-    states[idx] = full_round(states[idx],
-                             rc_offset,
-                             local_state_number,
-                             element_number,
-                             !first_half,
-                             first_half,
-                             shared_states,
-                             config);
-}
-
-template <typename S>
-__device__ S partial_round(S * state,
-                                  size_t rc_offset,
-                                  int round_number,
-                                  const PoseidonConfiguration<S> config) {
-    S element = state[0];
-    element = sbox_alpha_five(element);
-    element = element + config.round_constants[rc_offset];
-
-    S * sparse_matrix = &config.sparse_matrices[(config.t * 2 - 1) * round_number];
-
-    state[0] = element * sparse_matrix[0];
-    for (int i = 1; i < config.t; i++) {
-        state[0] = state[0] + (state[i] * sparse_matrix[i]);
-    }
-
-    for (int i = 1; i < config.t; i++) {
-        state[i] = state[i] + (element * sparse_matrix[config.t + i - 1]);
-    }
-}
-
-// Execute partial rounds
-template <typename S>
-__global__ void partial_rounds(S * states, size_t number_of_states, size_t rc_offset, const PoseidonConfiguration<S> config) {
-    int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
-    if (idx >= number_of_states) {
-        return;
-    }
-
-    S * state = &states[idx * config.t];
-
-    for (int i = 0; i < config.partial_rounds; i++) {
-        partial_round(state, rc_offset, i, config);
-        rc_offset++;
-    }
-}
-
-// These function is just doing copy from the states to the output
-template <typename S>
-__global__ void get_hash_results(S * states, size_t number_of_states, S * out, int t) {
-    int idx = (blockIdx.x * blockDim.x) + threadIdx.x;
-    if (idx >= number_of_states) {
-        return;
-    }
-
-    out[idx] = states[idx * t + 1];
-}
-
-template <typename S>
-__host__ void Poseidon<S>::hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type) {
-    // Used in matrix multiplication
-
-    S * states, * inp_device;
-
-    // allocate memory for {blocks} states of {t} scalars each
-    cudaMalloc(&states, blocks * this->t * sizeof(S));
-
-    // Move input to cuda
-    cudaMalloc(&inp_device, blocks * (this->t - 1) * sizeof(S));
-    cudaMemcpy(inp_device, inp, blocks * (this->t - 1) * sizeof(S), cudaMemcpyHostToDevice);
-
-    size_t rc_offset = 0;
-
-    // The logic behind this is that 1 thread only works on 1 element
-    // We have {t} elements in each state, and {blocks} states total
-    int number_of_threads = (256 / this->t) * this->t;
-    int hashes_per_block = number_of_threads / this->t;
-    int total_number_of_threads = blocks * this->t;
-    int number_of_blocks = total_number_of_threads / number_of_threads +
-        static_cast<bool>(total_number_of_threads % number_of_threads);
-
-    // The partial rounds operates on the whole state, so we define
-    // the parallelism params for processing a single hash preimage per thread
-    int singlehash_block_size = 128;
-    int number_of_singlehash_blocks = blocks / singlehash_block_size + static_cast<bool>(blocks % singlehash_block_size);
-
-    // Pick the domain_tag accordinaly
-    S domain_tag;
-    switch (hash_type) {
-        case HashType::ConstInputLen:
-            domain_tag = this->const_input_no_pad_domain_tag;
-            break;
-
-        case HashType::MerkleTree:
-            domain_tag = this->tree_domain_tag;
-    }
-
-    #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-    auto start_time = std::chrono::high_resolution_clock::now();
-    #endif
-
-    // Domain separation and adding pre-round constants
-    prepare_poseidon_states <<< number_of_blocks, number_of_threads >>> (inp_device, states, blocks, domain_tag, this->config);
-    rc_offset += this->t;
-    cudaFree(inp_device);
-
-    #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-    cudaDeviceSynchronize();
-    std::cout << "Domain separation: " << rc_offset << std::endl;
-    print_buffer_from_cuda<S>(states, blocks * this->t);
-
-    auto end_time = std::chrono::high_resolution_clock::now();
-    auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
-    std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
-    start_time = std::chrono::high_resolution_clock::now();
-    #endif
-
-    // execute half full rounds
-    full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t >>> (states, blocks, rc_offset, true, this->config);
-    rc_offset += this->t * this->config.full_rounds_half;
-
-    #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-    cudaDeviceSynchronize();
-    std::cout << "Full rounds 1. RCOFFSET: " << rc_offset << std::endl;
-    print_buffer_from_cuda<S>(states, blocks * this->t);
-
-    end_time = std::chrono::high_resolution_clock::now();
-    elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
-    std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
-    start_time = std::chrono::high_resolution_clock::now();
-    #endif
-
-    // execute partial rounds
-    partial_rounds <<< number_of_singlehash_blocks, singlehash_block_size >>> (states, blocks, rc_offset, this->config);
-    rc_offset += this->config.partial_rounds;
-
-    #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-    cudaDeviceSynchronize();
-    std::cout << "Partial rounds. RCOFFSET: " << rc_offset << std::endl;
-    print_buffer_from_cuda<S>(states, blocks * this->t);
-
-    end_time = std::chrono::high_resolution_clock::now();
-    elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
-    std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
-    start_time = std::chrono::high_resolution_clock::now();
-    #endif
-
-    // execute half full rounds
-    full_rounds <<< number_of_blocks, number_of_threads, sizeof(S) * hashes_per_block * this->t >>> (states, blocks, rc_offset, false, this->config);
-
-    #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-    cudaDeviceSynchronize();
-    std::cout << "Full rounds 2. RCOFFSET: " << rc_offset << std::endl;
-    print_buffer_from_cuda<S>(states, blocks * this->t);
-    end_time = std::chrono::high_resolution_clock::now();
-    elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
-    std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
-    start_time = std::chrono::high_resolution_clock::now();
-    #endif
-
-    // get output
-    S * out_device;
-    cudaMalloc(&out_device, blocks * sizeof(S));
-    get_hash_results <<< number_of_singlehash_blocks, singlehash_block_size >>> (states, blocks, out_device, this->config.t);
-
-    #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-    cudaDeviceSynchronize();
-    std::cout << "Get hash results" << std::endl;
-    end_time = std::chrono::high_resolution_clock::now();
-    elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
-    std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
-    #endif
-    cudaMemcpy(out, out_device, blocks * sizeof(S), cudaMemcpyDeviceToHost);
-    cudaFree(out_device);
-    cudaFree(states);
-
-    #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-    cudaDeviceReset();
-    #endif
-}
--- a/icicle-cuda/appUtils/poseidon/poseidon.cuh
+++ b/icicle-cuda/appUtils/poseidon/poseidon.cuh
@@ -1,133 +0,0 @@
-#pragma once
-#include "constants.cuh"
-
-#if !defined(__CUDA_ARCH__) && defined(DEBUG)
-#include <iostream>
-#include <iomanip>
-#include <string>
-#include <sstream>
-#include <chrono>
-
-#define ARITY 3
-
-template <typename S>
-__host__ void print_buffer_from_cuda(S * device_ptr, size_t size) {
-  S * buffer = static_cast< S * >(malloc(size * sizeof(S)));
-  cudaMemcpy(buffer, device_ptr, size * sizeof(S), cudaMemcpyDeviceToHost);
-
-  std::cout << "Start print" << std::endl;
-  for(int i = 0; i < size / ARITY; i++) {
-    std::cout << "State #" << i << std::endl;
-    for (int j = 0; j < ARITY; j++) {
-      std::cout << buffer[i * ARITY + j] << std::endl;
-    }
-    std::cout << std::endl;
-  }
-  std::cout << std::endl;
-  free(buffer);
-}
-#endif
-
-#ifdef DEBUG
-template <typename S>
-__device__ void print_scalar(S element, int data) {
-    printf("D# %d, T# %d: 0x%08x%08x%08x%08x%08x%08x%08x%08x\n",
-           data,
-           threadIdx.x,
-           element.limbs_storage.limbs[0],
-           element.limbs_storage.limbs[1],
-           element.limbs_storage.limbs[2],
-           element.limbs_storage.limbs[3],
-           element.limbs_storage.limbs[4],
-           element.limbs_storage.limbs[5],
-           element.limbs_storage.limbs[6],
-           element.limbs_storage.limbs[7]
-    );
-}
-#endif
-
-template <typename S>
-struct PoseidonConfiguration {
-    uint32_t partial_rounds, full_rounds_half, t;
-    S * round_constants, * mds_matrix, * non_sparse_matrix, *sparse_matrices;
-};
-
-template <typename S>
-class Poseidon {
-  public:
-    uint32_t t;
-    PoseidonConfiguration<S> config;
-
-    enum HashType {
-        ConstInputLen,
-        MerkleTree,
-    };
-
-    Poseidon(const uint32_t arity) {
-        t = arity + 1;
-        this->config.t = t;
-
-        // Pre-calculate domain tags
-        // Domain tags will vary for different applications of Poseidon
-        uint32_t tree_domain_tag_value = 1;
-        tree_domain_tag_value = (tree_domain_tag_value << arity) - tree_domain_tag_value;
-        tree_domain_tag = S::from(tree_domain_tag_value);
-
-        const_input_no_pad_domain_tag = S::one();
-
-        // TO-DO: implement binary shifts for scalar type
-        // const_input_no_pad_domain_tag = S::one() << 64;
-        // const_input_no_pad_domain_tag *= S::from(arity);
-
-        this->config.full_rounds_half = FULL_ROUNDS_DEFAULT;
-        this->config.partial_rounds = partial_rounds_number_from_arity(arity);
-
-        uint32_t round_constants_len = t * this->config.full_rounds_half * 2 + this->config.partial_rounds;
-        uint32_t mds_matrix_len = t * t;
-        uint32_t sparse_matrices_len = (t * 2 - 1) * this->config.partial_rounds;
-
-        // All the constants are stored in a single file
-        S * constants = load_constants<S>(arity);
-
-        S * mds_offset = constants + round_constants_len;
-        S * non_sparse_offset = mds_offset + mds_matrix_len;
-        S * sparse_matrices_offset = non_sparse_offset + mds_matrix_len;
-
-        #if !defined(__CUDA_ARCH__) && defined(DEBUG)
-        std::cout << "P: " << this->config.partial_rounds << " F: " << this->config.full_rounds_half << std::endl;
-        #endif
-
-        // Allocate the memory for constants
-        cudaMalloc(&this->config.round_constants, sizeof(S) * round_constants_len);
-        cudaMalloc(&this->config.mds_matrix, sizeof(S) * mds_matrix_len);
-        cudaMalloc(&this->config.non_sparse_matrix, sizeof(S) * mds_matrix_len);
-        cudaMalloc(&this->config.sparse_matrices, sizeof(S) * sparse_matrices_len);
-
-        // Copy the constants to device
-        cudaMemcpy(this->config.round_constants, constants,
-                sizeof(S) * round_constants_len,
-                cudaMemcpyHostToDevice);
-        cudaMemcpy(this->config.mds_matrix, mds_offset,
-                sizeof(S) * mds_matrix_len,
-                cudaMemcpyHostToDevice);
-        cudaMemcpy(this->config.non_sparse_matrix, non_sparse_offset,
-                sizeof(S) * mds_matrix_len,
-                cudaMemcpyHostToDevice);
-        cudaMemcpy(this->config.sparse_matrices, sparse_matrices_offset,
-                sizeof(S) * sparse_matrices_len,
-                cudaMemcpyHostToDevice);
-    }
-
-    ~Poseidon() {
-        cudaFree(this->config.round_constants);
-        cudaFree(this->config.mds_matrix);
-        cudaFree(this->config.non_sparse_matrix);
-        cudaFree(this->config.sparse_matrices);
-    }
-
-    // Hash multiple preimages in parallel
-    void hash_blocks(const S * inp, size_t blocks, S * out, HashType hash_type);
-
-  private:
-    S tree_domain_tag, const_input_no_pad_domain_tag;
-};
--- a/icicle-cuda/appUtils/poseidon/poseidon_test.cu
+++ b/icicle-cuda/appUtils/poseidon/poseidon_test.cu
@@ -1,48 +0,0 @@
-#define DEBUG
-
-#include "../../curves/bls12_381/curve_config.cuh"
-#include "../../curves/bls12_381/poseidon.cu"
-
-#ifndef __CUDA_ARCH__
-#include <iostream>
-#include <chrono>
-#include <fstream>
-
-int main(int argc, char* argv[]) {
-  const int arity = 2;
-  const int t = arity + 1;
-
-  Poseidon<BLS12_381::scalar_t> poseidon(arity);
-
-  int number_of_blocks = 4;
-
-  BLS12_381::scalar_t input = BLS12_381::scalar_t::zero();
-  BLS12_381::scalar_t * in_ptr = static_cast< BLS12_381::scalar_t * >(malloc(number_of_blocks * arity * sizeof(BLS12_381::scalar_t)));
-  for (uint32_t i = 0; i < number_of_blocks * arity; i++) {
-    // std::cout << input << std::endl;
-    in_ptr[i] = input;
-    input = input + BLS12_381::scalar_t::one();
-  }
-  std::cout << std::endl;
-
-  BLS12_381::scalar_t * out_ptr = static_cast< BLS12_381::scalar_t * >(malloc(number_of_blocks * sizeof(BLS12_381::scalar_t)));
-
-  auto start_time = std::chrono::high_resolution_clock::now();
-
-  poseidon.hash_blocks(in_ptr, number_of_blocks, out_ptr, Poseidon<BLS12_381::scalar_t>::HashType::MerkleTree);
-
-  #ifdef DEBUG
-  for (int i = 0; i < number_of_blocks; i++) {
-    std::cout << out_ptr[i] << std::endl;
-  }
-  #endif
-
-  auto end_time = std::chrono::high_resolution_clock::now();
-  auto elapsed_time = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
-  std::cout << "Elapsed time: " << elapsed_time.count() << " ms" << std::endl;
-
-  free(in_ptr);
-  free(out_ptr);
-}
-
-#endif
--- a/icicle-cuda/curves/curve_config.cuh
+++ b/icicle-cuda/curves/curve_config.cuh
@@ -1,28 +0,0 @@
-#pragma once
-
-#include "../primitives/field.cuh"
-#include "../primitives/projective.cuh"
-
-#if defined(FEATURE_BLS12_381)
-#include "bls12_381/params.cuh"
-#elif defined(FEATURE_BLS12_377)
-#include "bls12_377/params.cuh"
-#elif defined(FEATURE_BN254)
-#include "bn254/params.cuh"
-#else
-# error "no FEATURE"
-#endif
-
-typedef Field<PARAMS::fp_config> scalar_field_t;
-typedef scalar_field_t scalar_t;
-typedef Field<PARAMS::fq_config> point_field_t;
-static constexpr point_field_t b = point_field_t{ PARAMS::weierstrass_b };
-typedef Projective<point_field_t, scalar_field_t, b> projective_t;
-typedef Affine<point_field_t> affine_t;
-#if defined(G2_DEFINED)
-typedef ExtensionField<PARAMS::fq_config> g2_point_field_t;
-static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS::weierstrass_b_g2_re },
-                                                            point_field_t{ PARAMS::weierstrass_b_g2_im }};
-typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
-typedef Affine<g2_point_field_t> g2_affine_t;
-#endif
--- a/icicle-cuda/curves/lde.cu
+++ b/icicle-cuda/curves/lde.cu
@@ -1,327 +0,0 @@
-#ifndef _LDE
-#define _LDE
-#include <cuda.h>
-#include "../appUtils/ntt/lde.cu"
-#include "../appUtils/ntt/ntt.cuh"
-#include "../appUtils/vector_manipulation/ve_mod_mult.cuh"
-#include "curve_config.cuh"
-
-extern "C" scalar_t* build_domain_cuda(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        if (inverse) {
-            return fill_twiddle_factors_array(domain_size, scalar_t::omega_inv(logn), stream);
-        } else {
-            return fill_twiddle_factors_array(domain_size, scalar_t::omega(logn), stream);
-        }
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return nullptr;
-    }
-}
-
-extern "C" int ntt_cuda(scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return ntt_end2end_template<scalar_t,scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        
-        return -1;        
-    }
-}
-
-extern "C" int ecntt_cuda(projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return ntt_end2end_template<projective_t,scalar_t>(arr, n, inverse, stream); // TODO: pass device_id
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int ntt_batch_cuda(scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return ntt_end2end_batch_template<scalar_t,scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int ecntt_batch_cuda(projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return ntt_end2end_batch_template<projective_t,scalar_t>(arr, arr_size, batch_size, inverse, stream); // TODO: pass device_id
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int interpolate_scalars_cuda(scalar_t* d_out, scalar_t *d_evaluations, scalar_t *d_domain, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        return interpolate(d_out, d_evaluations, d_domain, n, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int interpolate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_evaluations, scalar_t* d_domain, unsigned n,
-                                              unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int interpolate_points_cuda(projective_t* d_out, projective_t *d_evaluations, scalar_t *d_domain, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        return interpolate(d_out, d_evaluations, d_domain, n, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int interpolate_points_batch_cuda(projective_t* d_out, projective_t* d_evaluations, scalar_t* d_domain,
-                                             unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_scalars_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain, 
-                                     unsigned domain_size, unsigned n, unsigned device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        scalar_t* _null = nullptr;
-        cudaStreamCreate(&stream);
-        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_scalars_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
-                                           unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        scalar_t* _null = nullptr;
-        cudaStreamCreate(&stream);
-        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_points_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain, 
-                                    unsigned domain_size, unsigned n, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        scalar_t* _null = nullptr;
-        cudaStreamCreate(&stream);
-        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_points_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size,
-                                          unsigned n, unsigned batch_size, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        scalar_t* _null = nullptr;
-        cudaStreamCreate(&stream);
-        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_scalars_on_coset_cuda(scalar_t* d_out, scalar_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
-                                              unsigned n, scalar_t *coset_powers, unsigned device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_scalars_on_coset_batch_cuda(scalar_t* d_out, scalar_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, 
-                                                    unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_points_on_coset_cuda(projective_t* d_out, projective_t *d_coefficients, scalar_t *d_domain, unsigned domain_size,
-                                             unsigned n, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int evaluate_points_on_coset_batch_cuda(projective_t* d_out, projective_t* d_coefficients, scalar_t* d_domain, unsigned domain_size, 
-                                                   unsigned n, unsigned batch_size, scalar_t *coset_powers, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        cudaStreamCreate(&stream);
-        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int reverse_order_scalars_cuda(scalar_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        uint32_t logn = uint32_t(log(n) / log(2));
-        cudaStreamCreate(&stream);
-        reverse_order(arr, n, logn, stream);
-        return 0;
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int reverse_order_scalars_batch_cuda(scalar_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        uint32_t logn = uint32_t(log(n) / log(2));
-        cudaStreamCreate(&stream);
-        reverse_order_batch(arr, n, logn, batch_size, stream);
-        return 0;
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int reverse_order_points_cuda(projective_t* arr, int n, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        uint32_t logn = uint32_t(log(n) / log(2));
-        cudaStreamCreate(&stream);
-        reverse_order(arr, n, logn, stream);
-        return 0;
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-
-extern "C" int reverse_order_points_batch_cuda(projective_t* arr, int n, int batch_size, size_t device_id = 0, cudaStream_t stream = 0)
-{
-    try
-    {
-        uint32_t logn = uint32_t(log(n) / log(2));
-        cudaStreamCreate(&stream);
-        reverse_order_batch(arr, n, logn, batch_size, stream);
-        return 0;
-    }
-    catch (const std::runtime_error &ex)
-    {
-        printf("error %s", ex.what());
-        return -1;
-    }
-}
-#endif
--- a/icicle-cuda/curves/poseidon.cu
+++ b/icicle-cuda/curves/poseidon.cu
@@ -1,26 +0,0 @@
-#ifndef _POSEIDON
-#define _POSEIDON
-#include <cuda.h>
-#include <stdexcept>
-#include "../appUtils/poseidon/poseidon.cu"
-#include "curve_config.cuh"
-
-template class Poseidon<scalar_t>;
-
-extern "C" int poseidon_multi_cuda(scalar_t input[], scalar_t* out,
-                                             size_t number_of_blocks, int arity, size_t device_id = 0)
-{
-  try
-  {
-    Poseidon<scalar_t> poseidon(arity);
-    poseidon.hash_blocks(input, number_of_blocks, out, Poseidon<scalar_t>::HashType::MerkleTree);
-
-    return CUDA_SUCCESS;
-  }
-  catch (const std::runtime_error &ex)
-  {
-    printf("error %s", ex.what());
-    return -1;
-  }
-}
-#endif
--- a/icicle-cuda/curves/projective.cu
+++ b/icicle-cuda/curves/projective.cu
@@ -1,19 +0,0 @@
-#include <cuda.h>
-#include "curve_config.cuh"
-#include "../primitives/projective.cuh"
-
-extern "C" bool eq(projective_t *point1, projective_t *point2)
-{
-  return (*point1 == *point2) && 
-  !((point1->x == point_field_t::zero()) && (point1->y == point_field_t::zero()) && (point1->z == point_field_t::zero())) && 
-  !((point2->x == point_field_t::zero()) && (point2->y == point_field_t::zero()) && (point2->z == point_field_t::zero()));
-}
-
-#if defined(G2_DEFINED)
-extern "C" bool eq_g2(g2_projective_t *point1, g2_projective_t *point2)
-{
-  return (*point1 == *point2) && 
-  !((point1->x == g2_point_field_t::zero()) && (point1->y == g2_point_field_t::zero()) && (point1->z == g2_point_field_t::zero())) && 
-  !((point2->x == g2_point_field_t::zero()) && (point2->y == g2_point_field_t::zero()) && (point2->z == g2_point_field_t::zero()));
-}
-#endif
--- a/icicle-cuda/curves/ve_mod_mult.cu
+++ b/icicle-cuda/curves/ve_mod_mult.cu
@@ -1,75 +0,0 @@
-#ifndef _VEC_MULT
-#define _VEC_MULT
-#include <stdio.h>
-#include <iostream>
-#include "../primitives/field.cuh"
-#include "../utils/storage.cuh"
-#include "../primitives/projective.cuh"
-#include "curve_config.cuh"
-#include "../appUtils/vector_manipulation/ve_mod_mult.cuh"
-
-
-extern "C" int32_t vec_mod_mult_point(projective_t *inout,
-                                      scalar_t *scalar_vec,
-                                      size_t n_elments,
-                                      size_t device_id,
-                                      cudaStream_t stream = 0)
-{
-  // TODO: use device_id when working with multiple devices
-  (void)device_id;
-  try
-  {
-    // TODO: device_id
-    vector_mod_mult<projective_t, scalar_t>(scalar_vec, inout, inout, n_elments, stream);
-    return CUDA_SUCCESS;
-  }
-  catch (const std::runtime_error &ex)
-  {
-    printf("error %s", ex.what()); // TODO: error code and message
-    return -1;
-  }
-}
-
-extern "C" int32_t vec_mod_mult_scalar(scalar_t *inout,
-                                       scalar_t *scalar_vec,
-                                       size_t n_elments,
-                                       size_t device_id,
-                                       cudaStream_t stream = 0)
-{
-  // TODO: use device_id when working with multiple devices
-  (void)device_id;
-  try
-  {
-    // TODO: device_id
-    vector_mod_mult<scalar_t, scalar_t>(scalar_vec, inout, inout, n_elments, stream);
-    return CUDA_SUCCESS;
-  }
-  catch (const std::runtime_error &ex)
-  {
-    printf("error %s", ex.what()); // TODO: error code and message
-    return -1;
-  }
-}
-
-extern "C" int32_t matrix_vec_mod_mult(scalar_t *matrix_flattened,
-                                       scalar_t *input,
-                                       scalar_t *output,
-                                       size_t n_elments,
-                                       size_t device_id,
-                                       cudaStream_t stream = 0)
-{
-  // TODO: use device_id when working with multiple devices
-  (void)device_id;
-  try
-  {
-    // TODO: device_id
-    matrix_mod_mult<scalar_t>(matrix_flattened, input, output, n_elments, stream);
-    return CUDA_SUCCESS;
-  }
-  catch (const std::runtime_error &ex)
-  {
-    printf("error %s", ex.what()); // TODO: error code and message
-    return -1;
-  }
-}
-#endif
--- a/icicle-cuda/CMakeLists.txt
+++ b/icicle-cuda/CMakeLists.txt
--- a/icicle-cuda/README.md
+++ b/icicle-cuda/README.md
--- a/icicle-cuda/appUtils/msm/Makefile
+++ b/icicle-cuda/appUtils/msm/Makefile
--- a/icicle-cuda/appUtils/msm/msm.cu
+++ b/icicle-cuda/appUtils/msm/msm.cu
@@ -88,7 +88,8 @@ template <typename P, typename A>
 __global__ void accumulate_buckets_kernel(P *buckets, unsigned *bucket_offsets, unsigned *bucket_sizes, unsigned *single_bucket_indices, unsigned *point_indices, A *points, unsigned nof_buckets, unsigned *nof_buckets_to_compute, unsigned msm_idx_shift){
  
  unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
-  if (tid >= *nof_buckets_to_compute){ 
+  if (tid ==0) printf("nof buckets to comp: %u", *nof_buckets_to_compute);
+  if (tid>=*nof_buckets_to_compute){ 
    return;
  }
  unsigned msm_index = single_bucket_indices[tid]>>msm_idx_shift;
@@ -106,7 +107,8 @@ template <typename P>
 __global__ void big_triangle_sum_kernel(P* buckets, P* final_sums, unsigned nof_bms, unsigned c){

  unsigned tid = (blockIdx.x * blockDim.x) + threadIdx.x;
-  if (tid >= nof_bms) return;
+  if (tid>=nof_bms) return;
+  // printf("%u",tid);
  P line_sum = buckets[(tid+1)*(1<<c)-1];
  final_sums[tid] = line_sum;
  for (unsigned i = (1<<c)-2; i >0; i--)
@@ -152,16 +154,16 @@ __global__ void final_accumulation_kernel(P* final_sums, P* final_results, unsig

 //this function computes msm using the bucket method
 template <typename S, typename P, typename A>
-void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream) {
+void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device) {
  
  S *d_scalars;
  A *d_points;
  if (!on_device) {
    //copy scalars and point to gpu
-    cudaMallocAsync(&d_scalars, sizeof(S) * size, stream);
-    cudaMallocAsync(&d_points, sizeof(A) * size, stream);
-    cudaMemcpyAsync(d_scalars, scalars, sizeof(S) * size, cudaMemcpyHostToDevice, stream);
-    cudaMemcpyAsync(d_points, points, sizeof(A) * size, cudaMemcpyHostToDevice, stream);
+    cudaMalloc(&d_scalars, sizeof(S) * size);
+    cudaMalloc(&d_points, sizeof(A) * size);
+    cudaMemcpy(d_scalars, scalars, sizeof(S) * size, cudaMemcpyHostToDevice);
+    cudaMemcpy(d_points, points, sizeof(A) * size, cudaMemcpyHostToDevice);
  }
  else {
    d_scalars = scalars;
@@ -178,140 +180,135 @@ void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsi
    nof_bms++;
  }
  unsigned nof_buckets = nof_bms<<c;
-  cudaMallocAsync(&buckets, sizeof(P) * nof_buckets, stream);
+  cudaMalloc(&buckets, sizeof(P) * nof_buckets);

  // launch the bucket initialization kernel with maximum threads
  unsigned NUM_THREADS = 1 << 10;
  unsigned NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
-  initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, nof_buckets);
+  initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, nof_buckets);

  unsigned *bucket_indices;
  unsigned *point_indices;
-  cudaMallocAsync(&bucket_indices, sizeof(unsigned) * size * (nof_bms+1), stream);
-  cudaMallocAsync(&point_indices, sizeof(unsigned) * size * (nof_bms+1), stream);
+  cudaMalloc(&bucket_indices, sizeof(unsigned) * size * (nof_bms+1));
+  cudaMalloc(&point_indices, sizeof(unsigned) * size * (nof_bms+1));

  //split scalars into digits
  NUM_THREADS = 1 << 10;
  NUM_BLOCKS = (size * (nof_bms+1) + NUM_THREADS - 1) / NUM_THREADS;
-  split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size, 
+  split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + size, point_indices + size, d_scalars, size, msm_log_size, 
                                                    nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later
  
  //sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
  unsigned *sort_indices_temp_storage{};
  size_t sort_indices_temp_storage_bytes;
-  // The second to last parameter is the default value supplied explicitly to allow passing the stream
-  // See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
  cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + size, bucket_indices,
-                                 point_indices + size, point_indices, size, 0, sizeof(unsigned) * 8, stream);
-  cudaMallocAsync(&sort_indices_temp_storage, sort_indices_temp_storage_bytes, stream);
+                                 point_indices + size, point_indices, size);
+
+  cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
  for (unsigned i = 0; i < nof_bms; i++) {
    unsigned offset_out = i * size;
    unsigned offset_in = offset_out + size;
-    // The second to last parameter is the default value supplied explicitly to allow passing the stream
-    // See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
-    cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in, bucket_indices + offset_out,
-                                 point_indices + offset_in, point_indices + offset_out, size, 0, sizeof(unsigned) * 8, stream);
+    cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + offset_in,
+                                  bucket_indices + offset_out, point_indices + offset_in, point_indices + offset_out, size);
  }
-  cudaFreeAsync(sort_indices_temp_storage, stream);
+  cudaFree(sort_indices_temp_storage);

  //find bucket_sizes
  unsigned *single_bucket_indices;
  unsigned *bucket_sizes;
  unsigned *nof_buckets_to_compute;
-  cudaMallocAsync(&single_bucket_indices, sizeof(unsigned)*nof_buckets, stream);
-  cudaMallocAsync(&bucket_sizes, sizeof(unsigned)*nof_buckets, stream);
-  cudaMallocAsync(&nof_buckets_to_compute, sizeof(unsigned), stream);
+  cudaMalloc(&single_bucket_indices, sizeof(unsigned)*nof_buckets);
+  cudaMalloc(&bucket_sizes, sizeof(unsigned)*nof_buckets);
+  cudaMalloc(&nof_buckets_to_compute, sizeof(unsigned));
  unsigned *encode_temp_storage{};
  size_t encode_temp_storage_bytes = 0;
  cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, bucket_indices, single_bucket_indices, bucket_sizes,
-                                        nof_buckets_to_compute, nof_bms*size, stream);
-  cudaMallocAsync(&encode_temp_storage, encode_temp_storage_bytes, stream);
+                                        nof_buckets_to_compute, nof_bms*size);
+  cudaMalloc(&encode_temp_storage, encode_temp_storage_bytes);
  cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, bucket_indices, single_bucket_indices, bucket_sizes,
-                                        nof_buckets_to_compute, nof_bms*size, stream);
-  cudaFreeAsync(encode_temp_storage, stream);
+                                        nof_buckets_to_compute, nof_bms*size);
+  cudaFree(encode_temp_storage);

  //get offsets - where does each new bucket begin
  unsigned* bucket_offsets;
-  cudaMallocAsync(&bucket_offsets, sizeof(unsigned)*nof_buckets, stream);
+  cudaMalloc(&bucket_offsets, sizeof(unsigned)*nof_buckets);
  unsigned* offsets_temp_storage{};
  size_t offsets_temp_storage_bytes = 0;
-  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets, stream);
-  cudaMallocAsync(&offsets_temp_storage, offsets_temp_storage_bytes, stream);
-  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets, stream);
-  cudaFreeAsync(offsets_temp_storage, stream);
+  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets);
+  cudaMalloc(&offsets_temp_storage, offsets_temp_storage_bytes);
+  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, nof_buckets);
+  cudaFree(offsets_temp_storage);

-  //launch the accumulation kernel with maximum threads
-  NUM_THREADS = 1 << 8;
+    NUM_THREADS = 1 << 8;
  NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
-  accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices, 
-                                                         d_points, nof_buckets, nof_buckets_to_compute, c+bm_bitsize);
+  accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, point_indices, 
+                                                         d_points, nof_buckets, nof_buckets_to_compute, c+bm_bitsize);                                              

  #ifdef SSM_SUM
    //sum each bucket
    NUM_THREADS = 1 << 10;
    NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
-    ssm_buckets_kernel<fake_point, fake_scalar><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, single_bucket_indices, nof_buckets, c);
+    ssm_buckets_kernel<fake_point, fake_scalar><<<NUM_BLOCKS, NUM_THREADS>>>(buckets, single_bucket_indices, nof_buckets, c);
   
    //sum each bucket module
    P* final_results;
-    cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
+    cudaMalloc(&final_results, sizeof(P) * nof_bms);
    NUM_THREADS = 1<<c;
    NUM_BLOCKS = nof_bms;
-    sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(buckets, final_results);
+    sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS>>>(buckets, final_results);
  #endif

  #ifdef BIG_TRIANGLE
    P* final_results;
-    cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
+    cudaMalloc(&final_results, sizeof(P) * nof_bms);
    //launch the bucket module sum kernel - a thread for each bucket module
    NUM_THREADS = nof_bms;
    NUM_BLOCKS = 1;
-    big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, final_results, nof_bms, c);
+    big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, final_results, nof_bms, c);
  #endif

  P* d_final_result;
  if (!on_device)
-    cudaMallocAsync(&d_final_result, sizeof(P), stream);
+    cudaMalloc(&d_final_result, sizeof(P));

  //launch the double and add kernel, a single thread
-  final_accumulation_kernel<P, S><<<1,1,0,stream>>>(final_results, on_device ? final_result : d_final_result, 1, nof_bms, c);
+  final_accumulation_kernel<P, S><<<1,1>>>(final_results, on_device ? final_result : d_final_result, 1, nof_bms, c);
  
  //copy final result to host
-  cudaStreamSynchronize(stream);
+  cudaDeviceSynchronize();
  if (!on_device)
-    cudaMemcpyAsync(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost, stream);
+    cudaMemcpy(final_result, d_final_result, sizeof(P), cudaMemcpyDeviceToHost);
+    std::cout<<"final res "<<(*final_result)<<std::endl;

  //free memory
  if (!on_device) {
-    cudaFreeAsync(d_points, stream);
-    cudaFreeAsync(d_scalars, stream);
-    cudaFreeAsync(d_final_result, stream);
+    cudaFree(d_points);
+    cudaFree(d_scalars);
+    cudaFree(d_final_result);
  }
-  cudaFreeAsync(buckets, stream);
-  cudaFreeAsync(bucket_indices, stream);
-  cudaFreeAsync(point_indices, stream);
-  cudaFreeAsync(single_bucket_indices, stream);
-  cudaFreeAsync(bucket_sizes, stream);
-  cudaFreeAsync(nof_buckets_to_compute, stream);
-  cudaFreeAsync(bucket_offsets, stream);
-  cudaFreeAsync(final_results, stream);
-
-  cudaStreamSynchronize(stream);
+  cudaFree(buckets);
+  cudaFree(bucket_indices);
+  cudaFree(point_indices);
+  cudaFree(single_bucket_indices);
+  cudaFree(bucket_sizes);
+  cudaFree(nof_buckets_to_compute);
+  cudaFree(bucket_offsets);
+  cudaFree(final_results);
 }

 //this function computes msm using the bucket method
 template <typename S, typename P, typename A>
-void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream){
+void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device){

  unsigned total_size = batch_size * msm_size;
  S *d_scalars;
  A *d_points;
  if (!on_device) {
    //copy scalars and point to gpu
-    cudaMallocAsync(&d_scalars, sizeof(S) * total_size, stream);
-    cudaMallocAsync(&d_points, sizeof(A) * total_size, stream);
-    cudaMemcpyAsync(d_scalars, scalars, sizeof(S) * total_size, cudaMemcpyHostToDevice, stream);
-    cudaMemcpyAsync(d_points, points, sizeof(A) * total_size, cudaMemcpyHostToDevice, stream);
+    cudaMalloc(&d_scalars, sizeof(S) * total_size);
+    cudaMalloc(&d_points, sizeof(A) * total_size);
+    cudaMemcpy(d_scalars, scalars, sizeof(S) * total_size, cudaMemcpyHostToDevice);
+    cudaMemcpy(d_points, points, sizeof(A) * total_size, cudaMemcpyHostToDevice);
  }
  else {
    d_scalars = scalars;
@@ -328,129 +325,125 @@ void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *poin
  unsigned bm_bitsize = ceil(log2(nof_bms));
  unsigned nof_buckets = (nof_bms<<c);
  unsigned total_nof_buckets = nof_buckets*batch_size;
-  cudaMallocAsync(&buckets, sizeof(P) * total_nof_buckets, stream); 
+  cudaMalloc(&buckets, sizeof(P) * total_nof_buckets); 

  //lanch the bucket initialization kernel with maximum threads
  unsigned NUM_THREADS = 1 << 10;
  unsigned NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
-  initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, total_nof_buckets); 
+  initialize_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, total_nof_buckets); 

  unsigned *bucket_indices;
  unsigned *point_indices;
-  cudaMallocAsync(&bucket_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size), stream);
-  cudaMallocAsync(&point_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size), stream);
+  cudaMalloc(&bucket_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size));
+  cudaMalloc(&point_indices, sizeof(unsigned) * (total_size * nof_bms + msm_size));

  //split scalars into digits
  NUM_THREADS = 1 << 8;
  NUM_BLOCKS = (total_size * nof_bms + msm_size + NUM_THREADS - 1) / NUM_THREADS;
-  split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(bucket_indices + msm_size, point_indices + msm_size, d_scalars, total_size, 
+  split_scalars_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(bucket_indices + msm_size, point_indices + msm_size, d_scalars, total_size, 
                                                    msm_log_size, nof_bms, bm_bitsize, c); //+size - leaving the first bm free for the out of place sort later

  //sort indices - the indices are sorted from smallest to largest in order to group together the points that belong to each bucket
  unsigned *sorted_bucket_indices;
  unsigned *sorted_point_indices;
-  cudaMallocAsync(&sorted_bucket_indices, sizeof(unsigned) * (total_size * nof_bms), stream);
-  cudaMallocAsync(&sorted_point_indices, sizeof(unsigned) * (total_size * nof_bms), stream);
+  cudaMalloc(&sorted_bucket_indices, sizeof(unsigned) * (total_size * nof_bms));
+  cudaMalloc(&sorted_point_indices, sizeof(unsigned) * (total_size * nof_bms));

  unsigned *sort_indices_temp_storage{};
  size_t sort_indices_temp_storage_bytes;
-  // The second to last parameter is the default value supplied explicitly to allow passing the stream
-  // See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
  cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
-                                 point_indices + msm_size, sorted_point_indices, total_size * nof_bms, 0, sizeof(unsigned)*8, stream);
-  cudaMallocAsync(&sort_indices_temp_storage, sort_indices_temp_storage_bytes, stream);
-  // The second to last parameter is the default value supplied explicitly to allow passing the stream
-  // See https://nvlabs.github.io/cub/structcub_1_1_device_radix_sort.html#a65e82152de448c6373ed9563aaf8af7e for more info
+                                 point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
+  cudaMalloc(&sort_indices_temp_storage, sort_indices_temp_storage_bytes);
  cub::DeviceRadixSort::SortPairs(sort_indices_temp_storage, sort_indices_temp_storage_bytes, bucket_indices + msm_size, sorted_bucket_indices,
-                                 point_indices + msm_size, sorted_point_indices, total_size * nof_bms, 0, sizeof(unsigned)*8, stream);
-  cudaFreeAsync(sort_indices_temp_storage, stream);
+                                 point_indices + msm_size, sorted_point_indices, total_size * nof_bms);
+  cudaFree(sort_indices_temp_storage);

  //find bucket_sizes
  unsigned *single_bucket_indices;
  unsigned *bucket_sizes;
  unsigned *total_nof_buckets_to_compute;
-  cudaMallocAsync(&single_bucket_indices, sizeof(unsigned)*total_nof_buckets, stream);
-  cudaMallocAsync(&bucket_sizes, sizeof(unsigned)*total_nof_buckets, stream);
-  cudaMallocAsync(&total_nof_buckets_to_compute, sizeof(unsigned), stream);
+  cudaMalloc(&single_bucket_indices, sizeof(unsigned)*total_nof_buckets);
+  cudaMalloc(&bucket_sizes, sizeof(unsigned)*total_nof_buckets);
+  cudaMalloc(&total_nof_buckets_to_compute, sizeof(unsigned));
  unsigned *encode_temp_storage{};
  size_t encode_temp_storage_bytes = 0;
  cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, sorted_bucket_indices, single_bucket_indices, bucket_sizes,
-                                        total_nof_buckets_to_compute, nof_bms*total_size, stream);  
-  cudaMallocAsync(&encode_temp_storage, encode_temp_storage_bytes, stream);
+                                        total_nof_buckets_to_compute, nof_bms*total_size);
+  cudaMalloc(&encode_temp_storage, encode_temp_storage_bytes);
  cub::DeviceRunLengthEncode::Encode(encode_temp_storage, encode_temp_storage_bytes, sorted_bucket_indices, single_bucket_indices, bucket_sizes,
-                                        total_nof_buckets_to_compute, nof_bms*total_size, stream);
-  cudaFreeAsync(encode_temp_storage, stream);
+                                        total_nof_buckets_to_compute, nof_bms*total_size);
+  cudaFree(encode_temp_storage);

  //get offsets - where does each new bucket begin
  unsigned* bucket_offsets;
-  cudaMallocAsync(&bucket_offsets, sizeof(unsigned)*total_nof_buckets, stream);
+  cudaMalloc(&bucket_offsets, sizeof(unsigned)*total_nof_buckets);
  unsigned* offsets_temp_storage{};
  size_t offsets_temp_storage_bytes = 0;
-  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets, stream);
-  cudaMallocAsync(&offsets_temp_storage, offsets_temp_storage_bytes, stream);
-  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets, stream);
-  cudaFreeAsync(offsets_temp_storage, stream);
+  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets);
+  cudaMalloc(&offsets_temp_storage, offsets_temp_storage_bytes);
+  cub::DeviceScan::ExclusiveSum(offsets_temp_storage, offsets_temp_storage_bytes, bucket_sizes, bucket_offsets, total_nof_buckets);
+  cudaFree(offsets_temp_storage);

  //launch the accumulation kernel with maximum threads
  NUM_THREADS = 1 << 8;
  NUM_BLOCKS = (total_nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
-  accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
+  accumulate_buckets_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bucket_offsets, bucket_sizes, single_bucket_indices, sorted_point_indices,
                                                        d_points, nof_buckets, total_nof_buckets_to_compute, c+bm_bitsize);

  #ifdef SSM_SUM
    //sum each bucket
    NUM_THREADS = 1 << 10;
    NUM_BLOCKS = (nof_buckets + NUM_THREADS - 1) / NUM_THREADS;
-    ssm_buckets_kernel<P, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, single_bucket_indices, nof_buckets, c);
+    ssm_buckets_kernel<P, S><<<NUM_BLOCKS, NUM_THREADS>>>(buckets, single_bucket_indices, nof_buckets, c);
   
    //sum each bucket module
    P* final_results;
-    cudaMallocAsync(&final_results, sizeof(P) * nof_bms, stream);
+    cudaMalloc(&final_results, sizeof(P) * nof_bms);
    NUM_THREADS = 1<<c;
    NUM_BLOCKS = nof_bms;
-    sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(buckets, final_results);
+    sum_reduction_kernel<<<NUM_BLOCKS,NUM_THREADS>>>(buckets, final_results);
  #endif

  #ifdef BIG_TRIANGLE
    P* bm_sums;
-    cudaMallocAsync(&bm_sums, sizeof(P) * nof_bms * batch_size, stream);
+    cudaMalloc(&bm_sums, sizeof(P) * nof_bms * batch_size);
    //launch the bucket module sum kernel - a thread for each bucket module
    NUM_THREADS = 1<<8;
    NUM_BLOCKS = (nof_bms*batch_size + NUM_THREADS - 1) / NUM_THREADS;
-    big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(buckets, bm_sums, nof_bms*batch_size, c);
+    big_triangle_sum_kernel<<<NUM_BLOCKS, NUM_THREADS>>>(buckets, bm_sums, nof_bms*batch_size, c);
  #endif

  P* d_final_results;
  if (!on_device)
-    cudaMallocAsync(&d_final_results, sizeof(P)*batch_size, stream);
+    cudaMalloc(&d_final_results, sizeof(P)*batch_size);

  //launch the double and add kernel, a single thread for each msm
  NUM_THREADS = 1<<8;
  NUM_BLOCKS = (batch_size + NUM_THREADS - 1) / NUM_THREADS;
-  final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS, 0, stream>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
-  
+  final_accumulation_kernel<P, S><<<NUM_BLOCKS,NUM_THREADS>>>(bm_sums, on_device ? final_results : d_final_results, batch_size, nof_bms, c);
+
  //copy final result to host
+  cudaDeviceSynchronize();
  if (!on_device)
-    cudaMemcpyAsync(final_results, d_final_results, sizeof(P)*batch_size, cudaMemcpyDeviceToHost, stream);
+    cudaMemcpy(final_results, d_final_results, sizeof(P)*batch_size, cudaMemcpyDeviceToHost);

  //free memory
  if (!on_device) {
-    cudaFreeAsync(d_points, stream);
-    cudaFreeAsync(d_scalars, stream);
-    cudaFreeAsync(d_final_results, stream);
+    cudaFree(d_points);
+    cudaFree(d_scalars);
+    cudaFree(d_final_results);
  }
-  cudaFreeAsync(buckets, stream);
-  cudaFreeAsync(bucket_indices, stream);
-  cudaFreeAsync(point_indices, stream);
-  cudaFreeAsync(sorted_bucket_indices, stream);
-  cudaFreeAsync(sorted_point_indices, stream);
-  cudaFreeAsync(single_bucket_indices, stream);
-  cudaFreeAsync(bucket_sizes, stream);
-  cudaFreeAsync(total_nof_buckets_to_compute, stream);
-  cudaFreeAsync(bucket_offsets, stream);
-  cudaFreeAsync(bm_sums, stream);
+  cudaFree(buckets);
+  cudaFree(bucket_indices);
+  cudaFree(point_indices);
+  cudaFree(sorted_bucket_indices);
+  cudaFree(sorted_point_indices);
+  cudaFree(single_bucket_indices);
+  cudaFree(bucket_sizes);
+  cudaFree(total_nof_buckets_to_compute);
+  cudaFree(bucket_offsets);
+  cudaFree(bm_sums);

-  cudaStreamSynchronize(stream);
 }


@@ -465,44 +458,44 @@ __global__ void to_proj_kernel(A* affine_points, P* proj_points, unsigned N){

 //the function computes msm using ssm
 template <typename S, typename P, typename A>
-void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream){ //works up to 2^8
+void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result){ //works up to 2^8
  S *scalars;
  A *a_points;
  P *p_points;
  P *results;

-  cudaMallocAsync(&scalars, sizeof(S) * size, stream);
-  cudaMallocAsync(&a_points, sizeof(A) * size, stream);
-  cudaMallocAsync(&p_points, sizeof(P) * size, stream);
-  cudaMallocAsync(&results, sizeof(P) * size, stream);
+  cudaMalloc(&scalars, sizeof(S) * size);
+  cudaMalloc(&a_points, sizeof(A) * size);
+  cudaMalloc(&p_points, sizeof(P) * size);
+  cudaMalloc(&results, sizeof(P) * size);

  //copy inputs to device
-  cudaMemcpyAsync(scalars, h_scalars, sizeof(S) * size, cudaMemcpyHostToDevice, stream);
-  cudaMemcpyAsync(a_points, h_points, sizeof(A) * size, cudaMemcpyHostToDevice, stream);
+  cudaMemcpy(scalars, h_scalars, sizeof(S) * size, cudaMemcpyHostToDevice);
+  cudaMemcpy(a_points, h_points, sizeof(A) * size, cudaMemcpyHostToDevice);

  //convert to projective representation and multiply each point by its scalar using single scalar multiplication
  unsigned NUM_THREADS = size;
-  to_proj_kernel<<<1,NUM_THREADS, 0, stream>>>(a_points, p_points, size);
-  ssm_kernel<<<1,NUM_THREADS, 0, stream>>>(scalars, p_points, results, size);
+  to_proj_kernel<<<1,NUM_THREADS>>>(a_points, p_points, size);
+  ssm_kernel<<<1,NUM_THREADS>>>(scalars, p_points, results, size);

  P *final_result;
-  cudaMallocAsync(&final_result, sizeof(P), stream);
+  cudaMalloc(&final_result, sizeof(P));

  //assuming msm size is a power of 2
  //sum all the ssm results
  NUM_THREADS = size;
-  sum_reduction_kernel<<<1,NUM_THREADS, 0, stream>>>(results, final_result);
+  sum_reduction_kernel<<<1,NUM_THREADS>>>(results, final_result);

  //copy result to host
-  cudaStreamSynchronize(stream);
-  cudaMemcpyAsync(h_final_result, final_result, sizeof(P), cudaMemcpyDeviceToHost, stream);
+  cudaDeviceSynchronize();
+  cudaMemcpy(h_final_result, final_result, sizeof(P), cudaMemcpyDeviceToHost);

  //free memory
-  cudaFreeAsync(scalars, stream);
-  cudaFreeAsync(a_points, stream);
-  cudaFreeAsync(p_points, stream);
-  cudaFreeAsync(results, stream);
-  cudaFreeAsync(final_result, stream);
+  cudaFree(scalars);
+  cudaFree(a_points);
+  cudaFree(p_points);
+  cudaFree(results);
+  cudaFree(final_result);

 }

@@ -538,21 +531,21 @@ unsigned get_optimal_c(const unsigned size) {

 //this function is used to compute msms of size larger than 256
 template <typename S, typename P, typename A>
-void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream){
+void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device){
  unsigned c = get_optimal_c(size);
  // unsigned c = 6;
  // unsigned bitsize = 32;
  unsigned bitsize = 255;
-  bucket_method_msm(bitsize, c, scalars, points, size, result, on_device, stream);
+  bucket_method_msm(bitsize, c, scalars, points, size, result, on_device);
 }

 // this function is used to compute a batches of msms of size larger than 256
 template <typename S, typename P, typename A>
-void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream){
+void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device){
  unsigned c = get_optimal_c(msm_size);
  // unsigned c = 6;
  // unsigned bitsize = 32;
  unsigned bitsize = 255;
-  batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device, stream);
+  batched_bucket_method_msm(bitsize, c, scalars, points, batch_size, msm_size, result, on_device);
 }
 #endif
--- a/icicle-cuda/appUtils/msm/msm.cuh
+++ b/icicle-cuda/appUtils/msm/msm.cuh
@@ -3,19 +3,19 @@
 #pragma once

 template <typename S, typename P, typename A>
-void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device, cudaStream_t stream);
+void bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned size, P* final_result, bool on_device);

 template <typename S, typename P, typename A>
-void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device, cudaStream_t stream);
+void batched_bucket_method_msm(unsigned bitsize, unsigned c, S *scalars, A *points, unsigned batch_size, unsigned msm_size, P* final_results, bool on_device);

 template <typename S, typename P, typename A>
-void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device, cudaStream_t stream);
+void batched_large_msm(S* scalars, A* points, unsigned batch_size, unsigned msm_size, P* result, bool on_device);

 template <typename S, typename P, typename A>
-void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device, cudaStream_t stream);
+void large_msm(S* scalars, A* points, unsigned size, P* result, bool on_device);

 template <typename S, typename P, typename A>
-void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, cudaStream_t stream);
+void short_msm(S *h_scalars, A *h_points, unsigned size, P* h_final_result, bool on_device);

 template <typename A, typename S, typename P>
 void reference_msm(S* scalars, A* a_points, unsigned size);
--- a/icicle-cuda/appUtils/msm/tests/msm_test.cu
+++ b/icicle-cuda/appUtils/msm/tests/msm_test.cu
@@ -9,6 +9,74 @@

 using namespace BLS12_381;

+struct fake_point
+{
+  unsigned val = 0;
+
+  __host__ __device__ inline fake_point operator+(fake_point fp) {
+        return {val+fp.val};
+    }
+
+  __host__ __device__ fake_point zero() {
+        fake_point p;
+        return p;
+    }
+
+};
+
+std::ostream& operator<<(std::ostream &strm, const fake_point &a) {
+  return strm <<a.val;
+}
+
+struct fake_scalar
+{
+  unsigned val = 0;
+  unsigned bitsize = 32;
+
+  // __host__ __device__ unsigned get_scalar_digit(unsigned digit_num, unsigned digit_width){
+
+  //   return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
+
+  // }
+  __host__ __device__ int get_scalar_digit(int digit_num, int digit_width){
+
+    return (val>>(digit_num*digit_width))&((1<<digit_width)-1);
+
+  }
+
+  __host__ __device__ inline fake_point operator*(fake_point fp) {
+      
+      fake_point p1;
+      fake_point p2;
+      unsigned x = val;
+      if (x == 0) return fake_point().zero();
+
+      unsigned i = 1;
+      unsigned c_bit = (x & (1<<(bitsize-1)))>>(bitsize-1);
+      while (c_bit==0 && i<bitsize){
+        i++;
+        c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
+      }
+      p1 = fp;
+      p2 = p1+p1;
+      while (i<bitsize){
+        i++;
+        c_bit = (x & (1<<(bitsize-i)))>>(bitsize-i);
+        if (c_bit){
+          p1 = p1 + p2;
+          p2 = p2 + p2;
+        }
+        else {
+          p2 = p1 + p2;
+          p1 = p1 + p1;
+        }
+      }
+      
+      return p1;
+  }
+
+};
+
 class Dummy_Scalar {
  public:
    static constexpr unsigned NBITS = 32;
--- a/icicle-cuda/appUtils/ntt/lde.cu
+++ b/icicle-cuda/appUtils/ntt/lde.cu
@@ -15,20 +15,19 @@
 * @param n Length of `d_domain` array, also equal to the number of evaluations of each polynomial.
 * @param batch_size The size of the batch; the length of `d_evaluations` is `n` * `batch_size`.
 */
-template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
+template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluations, S * d_domain, unsigned n, unsigned batch_size) {
  uint32_t logn = uint32_t(log(n) / log(2));
-  cudaMemcpyAsync(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice, stream);
+  cudaMemcpy(d_out, d_evaluations, sizeof(E) * n * batch_size, cudaMemcpyDeviceToDevice);
  
  int NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
  int NUM_BLOCKS = batch_size * max(int((n / 2) / NUM_THREADS), 1);
  for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
  {
-    ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
+    ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, n, d_domain, n, NUM_BLOCKS, s, false);
  }

  NUM_BLOCKS = (n * batch_size + NUM_THREADS - 1) / NUM_THREADS;
-  template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>> (d_out, n * batch_size, S::inv_log_size(logn));
-  cudaStreamSynchronize(stream);
+  template_normalize_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>> (d_out, n * batch_size, S::inv_log_size(logn));
  return 0;
 }

@@ -40,8 +39,8 @@ template <typename E, typename S> int interpolate_batch(E * d_out, E * d_evaluat
 * @param d_domain Domain on which the polynomial is evaluated. Must be a subgroup.
 * @param n Length of `d_evaluations` and the size `d_domain` arrays (they should have equal length).
 */
-template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n, cudaStream_t stream) {
-  return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1, stream);
+template <typename E, typename S> int interpolate(E * d_out, E * d_evaluations, S * d_domain, unsigned n) {
+  return interpolate_batch <E, S> (d_out, d_evaluations, d_domain, n, 1);
 }

 template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
@@ -63,7 +62,7 @@ template < typename E > __global__ void fill_array(E * arr, E val, uint32_t n) {
 * @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
 */
 template <typename E, typename S>
-int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers, cudaStream_t stream) {
+int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, unsigned batch_size, bool coset, S * coset_powers) {
  uint32_t logn = uint32_t(log(domain_size) / log(2));
  if (domain_size > n) {
    // allocate and initialize an array of stream handles to parallelize data copying across batches
@@ -81,19 +80,18 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
      cudaStreamDestroy(memcpy_streams[i]);
    }
  } else
-    cudaMemcpyAsync(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice, stream);
+    cudaMemcpy(d_out, d_coefficients, sizeof(E) * domain_size * batch_size, cudaMemcpyDeviceToDevice);

  if (coset)
-    batch_vector_mult(coset_powers, d_out, domain_size, batch_size, stream);
+    batch_vector_mult(coset_powers, d_out, domain_size, batch_size);

  int NUM_THREADS = min(domain_size / 2, MAX_THREADS_BATCH);
  int chunks = max(int((domain_size / 2) / NUM_THREADS), 1);
  int NUM_BLOCKS = batch_size * chunks;
  for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
  {
-    ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
+    ntt_template_kernel <E, S> <<<NUM_BLOCKS, NUM_THREADS>>>(d_out, domain_size, d_domain, domain_size, batch_size * chunks, logn - s - 1, true);
  }
-  cudaStreamSynchronize(stream);
  return 0;
 }

@@ -109,76 +107,76 @@ int evaluate_batch(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_
 * @param coset_powers If `coset` is true, a list of powers `[1, u, u^2, ..., u^{n-1}]` where `u` is the generator of the coset.
 */
 template <typename E, typename S> 
-int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers, cudaStream_t stream) {
-  return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers, stream);
+int evaluate(E * d_out, E * d_coefficients, S * d_domain, unsigned domain_size, unsigned n, bool coset, S * coset_powers) {
+  return evaluate_batch <E, S> (d_out, d_coefficients, d_domain, domain_size, n, 1, coset, coset_powers);
 }

 template <typename S> 
-int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
-  return interpolate(d_out, d_evaluations, d_domain, n, stream);
+int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n) {
+  return interpolate(d_out, d_evaluations, d_domain, n);
 }

 template <typename S> 
-int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
-  return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
+int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
+  return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
 }

 template <typename E, typename S> 
-int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream) {
-  return interpolate(d_out, d_evaluations, d_domain, n, stream);
+int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n) {
+  return interpolate(d_out, d_evaluations, d_domain, n);
 }

 template <typename E, typename S> 
-int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream) {
-  return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size, stream);
+int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size) {
+  return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
 }

 template <typename S> 
-int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
+int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
  S* _null = nullptr;
-  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
+  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
 }

 template <typename S> 
-int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
+int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size) {
  S* _null = nullptr;
-  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
+  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
 }

 template <typename E, typename S> 
-int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream) {
+int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n) {
  S* _null = nullptr;
-  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null, stream);
+  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
 }

 template <typename E, typename S> 
 int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain, 
-                          unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream) {
+                          unsigned domain_size, unsigned n, unsigned batch_size) {
  S* _null = nullptr;
-  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null, stream);
+  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
 }

 template <typename S> 
 int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain, 
-                              unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
-  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
+                              unsigned domain_size, unsigned n, S* coset_powers) {
+  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
 }

 template <typename E, typename S> 
 int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, 
-                                    unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
-  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
+                                    unsigned n, unsigned batch_size, S* coset_powers) {
+  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
 }

 template <typename E, typename S> 
 int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain, 
-                             unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream) {
-  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers, stream);
+                             unsigned domain_size, unsigned n, S* coset_powers) {
+  return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
 }

 template <typename E, typename S> 
 int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
-                                   unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream) {
-  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers, stream);
+                                   unsigned n, unsigned batch_size, S* coset_powers) {
+  return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
 }
 #endif
--- a/icicle-cuda/appUtils/ntt/lde.cuh
+++ b/icicle-cuda/appUtils/ntt/lde.cuh
@@ -3,44 +3,44 @@
 #pragma once

 template <typename S> 
-int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
+int interpolate_scalars(S* d_out, S* d_evaluations, S* d_domain, unsigned n);

 template <typename S> 
-int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
+int interpolate_scalars_batch(S* d_out, S* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);

 template <typename E, typename S> 
-int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n, cudaStream_t stream);
+int interpolate_points(E* d_out, E* d_evaluations, S* d_domain, unsigned n);

 template <typename E, typename S> 
-int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size, cudaStream_t stream);
+int interpolate_points_batch(E* d_out, E* d_evaluations, S* d_domain, unsigned n, unsigned batch_size);

 template <typename S> 
-int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
+int evaluate_scalars(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);

 template <typename S> 
-int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
+int evaluate_scalars_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, unsigned batch_size);

 template <typename E, typename S> 
-int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n, cudaStream_t stream);
+int evaluate_points(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size, unsigned n);

 template <typename E, typename S> 
 int evaluate_points_batch(E* d_out, E* d_coefficients, S* d_domain, 
-                          unsigned domain_size, unsigned n, unsigned batch_size, cudaStream_t stream);
+                          unsigned domain_size, unsigned n, unsigned batch_size);

 template <typename S> 
 int evaluate_scalars_on_coset(S* d_out, S* d_coefficients, S* d_domain, 
-                              unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
+                              unsigned domain_size, unsigned n, S* coset_powers);

 template <typename S>                               
 int evaluate_scalars_on_coset_batch(S* d_out, S* d_coefficients, S* d_domain, unsigned domain_size, 
-                                    unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
+                                    unsigned n, unsigned batch_size, S* coset_powers);

 template <typename E, typename S> 
 int evaluate_points_on_coset(E* d_out, E* d_coefficients, S* d_domain, 
-                             unsigned domain_size, unsigned n, S* coset_powers, cudaStream_t stream);
+                             unsigned domain_size, unsigned n, S* coset_powers);

 template <typename E, typename S> 
 int evaluate_points_on_coset_batch(E* d_out, E* d_coefficients, S* d_domain, unsigned domain_size,
-                                   unsigned n, unsigned batch_size, S* coset_powers, cudaStream_t stream);
+                                   unsigned n, unsigned batch_size, S* coset_powers);

 #endif
--- a/icicle-cuda/appUtils/ntt/ntt.cuh
+++ b/icicle-cuda/appUtils/ntt/ntt.cuh
@@ -28,12 +28,11 @@ const uint32_t MAX_THREADS_BATCH = 256;
 * @param n_twiddles number of twiddle factors. 
 * @param omega multiplying factor. 
 */
- template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega, cudaStream_t stream) {
+ template < typename S > S * fill_twiddle_factors_array(uint32_t n_twiddles, S omega) {
  size_t size_twiddles = n_twiddles * sizeof(S);
  S * d_twiddles;
-  cudaMallocAsync(& d_twiddles, size_twiddles, stream);
-  twiddle_factors_kernel<S> <<< 1, 1, 0, stream>>> (d_twiddles, n_twiddles, omega);
-  cudaStreamSynchronize(stream);
+  cudaMalloc( & d_twiddles, size_twiddles);
+  twiddle_factors_kernel<S> <<< 1, 1 >>> (d_twiddles, n_twiddles, omega);
  return d_twiddles;
 }

@@ -90,14 +89,14 @@ template < typename T > __global__ void reverse_order_kernel(T* arr, T* arr_reve
 * @param logn log(n).
 * @param batch_size the size of the batch.
 */
-template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size, cudaStream_t stream) {
+template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t logn, uint32_t batch_size) {
  T* arr_reversed;
-  cudaMallocAsync(&arr_reversed, n * batch_size * sizeof(T), stream);
+  cudaMalloc(&arr_reversed, n * batch_size * sizeof(T));
  int number_of_threads = MAX_THREADS_BATCH;
  int number_of_blocks = (n * batch_size + number_of_threads - 1) / number_of_threads;
-  reverse_order_kernel <<<number_of_blocks, number_of_threads, 0, stream>>> (arr, arr_reversed, n, logn, batch_size);
-  cudaMemcpyAsync(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice, stream);
-  cudaFreeAsync(arr_reversed, stream);
+  reverse_order_kernel <<<number_of_blocks, number_of_threads>>> (arr, arr_reversed, n, logn, batch_size);
+  cudaMemcpy(arr, arr_reversed, n * batch_size * sizeof(T), cudaMemcpyDeviceToDevice);
+  cudaFree(arr_reversed);
 }

 /**
@@ -108,8 +107,8 @@ template < typename T > void reverse_order_batch(T* arr, uint32_t n, uint32_t lo
 * @param n length of `arr`.
 * @param logn log(n).
 */
-template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn, cudaStream_t stream) {
-  reverse_order_batch(arr, n, logn, 1, stream);
+template < typename T > void reverse_order(T* arr, uint32_t n, uint32_t logn) {
+  reverse_order_batch(arr, n, logn, 1);
 }

 /**
@@ -156,15 +155,14 @@ template < typename E, typename S > __global__ void template_normalize_kernel(E
 * @param d_twiddles twiddle factors of type S (scalars) array allocated on the device memory (must be a power of 2).
 * @param n_twiddles length of d_twiddles.
 */
-template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles, cudaStream_t stream) {
+template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr, uint32_t n, uint32_t logn, S * d_twiddles, uint32_t n_twiddles) {
  uint32_t m = 2;
-  // TODO: optimize with separate streams for each iteration
  for (uint32_t s = 0; s < logn; s++) {
    for (uint32_t i = 0; i < n; i += m) {
        uint32_t shifted_m = m >> 1;
        uint32_t number_of_threads = MAX_NUM_THREADS ^ ((shifted_m ^ MAX_NUM_THREADS) & -(shifted_m < MAX_NUM_THREADS));
        uint32_t number_of_blocks = shifted_m / MAX_NUM_THREADS + 1;
-        template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks, 0, stream >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
+        template_butterfly_kernel < E, S > <<< number_of_threads, number_of_blocks >>> (d_arr, d_twiddles, n, n_twiddles, m, i, m >> 1);
    }
    m <<= 1;
  }
@@ -179,22 +177,21 @@ template < typename E, typename S > void template_ntt_on_device_memory(E * d_arr
 * @param n_twiddles length of d_twiddles. 
 * @param inverse indicate if the result array should be normalized by n^(-1). 
 */
-template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_twiddles, uint32_t n_twiddles, bool inverse, cudaStream_t stream) {
+template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_twiddles, uint32_t n_twiddles, bool inverse) {
  uint32_t logn = uint32_t(log(n) / log(2));
  size_t size_E = n * sizeof(E);
  E * arrReversed = template_reverse_order < E > (arr, n, logn);
  E * d_arrReversed;
-  cudaMallocAsync( & d_arrReversed, size_E, stream);
-  cudaMemcpyAsync(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice, stream);
-  template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles, stream);
+  cudaMalloc( & d_arrReversed, size_E);
+  cudaMemcpy(d_arrReversed, arrReversed, size_E, cudaMemcpyHostToDevice);
+  template_ntt_on_device_memory < E, S > (d_arrReversed, n, logn, d_twiddles, n_twiddles);
  if (inverse) {
    int NUM_THREADS = MAX_NUM_THREADS;
    int NUM_BLOCKS = (n + NUM_THREADS - 1) / NUM_THREADS;
-    template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream >>> (d_arrReversed, n, S::inv_log_size(logn));
+    template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arrReversed, n, S::inv_log_size(logn));
  }
-  cudaMemcpyAsync(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost, stream);
-  cudaFreeAsync(d_arrReversed, stream);
-  cudaStreamSynchronize(stream);
+  cudaMemcpy(arrReversed, d_arrReversed, size_E, cudaMemcpyDeviceToHost);
+  cudaFree(d_arrReversed);
  return arrReversed;
 }

@@ -204,22 +201,21 @@ template < typename E, typename S > E * ntt_template(E * arr, uint32_t n, S * d_
 * @param n length of d_arr.
 * @param inverse indicate if the result array should be normalized by n^(-1). 
 */
- template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse, cudaStream_t stream) {
+ template<typename E,typename S> uint32_t ntt_end2end_template(E * arr, uint32_t n, bool inverse) {
  uint32_t logn = uint32_t(log(n) / log(2));
  uint32_t n_twiddles = n; 
  S * twiddles = new S[n_twiddles];
  S * d_twiddles;
  if (inverse){
-    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
+    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
  } else{
-    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
+    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
  }
-  E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse, stream);
+  E * result = ntt_template < E, S > (arr, n, d_twiddles, n_twiddles, inverse);
  for(int i = 0; i < n; i++){
    arr[i] = result[i]; 
  }
-  cudaFreeAsync(d_twiddles, stream);
-  cudaStreamSynchronize(stream);
+  cudaFree(d_twiddles);
  return 0; // TODO add
 }

@@ -340,45 +336,42 @@ __global__ void ntt_template_kernel_rev_ord(E *arr, uint32_t n, uint32_t logn, u
 * @param n size of batch.
 * @param inverse indicate if the result array should be normalized by n^(-1). 
 */
- template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse, cudaStream_t stream) {
+ template <typename E, typename S> uint32_t ntt_end2end_batch_template(E * arr, uint32_t arr_size, uint32_t n, bool inverse) {
  int batches = int(arr_size / n);
  uint32_t logn = uint32_t(log(n) / log(2));
  uint32_t n_twiddles = n; // n_twiddles is set to 4096 as BLS12_381::scalar_t::omega() is of that order. 
  size_t size_E = arr_size * sizeof(E);
  S * d_twiddles;
  if (inverse){
-    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn), stream);
+    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega_inv(logn));
  } else{
-    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn), stream);
+    d_twiddles = fill_twiddle_factors_array(n_twiddles, S::omega(logn));
  }
  E * d_arr;
-  cudaMallocAsync( & d_arr, size_E, stream);
-  cudaMemcpyAsync(d_arr, arr, size_E, cudaMemcpyHostToDevice, stream);
+  cudaMalloc( & d_arr, size_E);
+  cudaMemcpy(d_arr, arr, size_E, cudaMemcpyHostToDevice);
  int NUM_THREADS = MAX_THREADS_BATCH;
  int NUM_BLOCKS = (batches + NUM_THREADS - 1) / NUM_THREADS;
-  ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, logn, batches);
+  ntt_template_kernel_rev_ord<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, logn, batches);

  NUM_THREADS = min(n / 2, MAX_THREADS_BATCH);
  int chunks = max(int((n / 2) / NUM_THREADS), 1);
  int total_tasks = batches * chunks;
  NUM_BLOCKS = total_tasks;

-  //TODO: this loop also can be unrolled
-  for (uint32_t s = 0; s < logn; s++)
+  for (uint32_t s = 0; s < logn; s++) //TODO: this loop also can be unrolled
  {
-    ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
-    cudaStreamSynchronize(stream);
+    ntt_template_kernel<E, S><<<NUM_BLOCKS, NUM_THREADS>>>(d_arr, n, d_twiddles, n_twiddles, total_tasks, s, false);
  }
  if (inverse == true)
  {
    NUM_THREADS = MAX_NUM_THREADS;
    NUM_BLOCKS = (arr_size + NUM_THREADS - 1) / NUM_THREADS;
-    template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS, 0, stream>>> (d_arr, arr_size, S::inv_log_size(logn));
+    template_normalize_kernel < E, S > <<< NUM_THREADS, NUM_BLOCKS >>> (d_arr, arr_size, S::inv_log_size(logn));
  }
-  cudaMemcpyAsync(arr, d_arr, size_E, cudaMemcpyDeviceToHost, stream);
-  cudaFreeAsync(d_arr, stream);
-  cudaFreeAsync(d_twiddles, stream);
-  cudaStreamSynchronize(stream);
+  cudaMemcpy(arr, d_arr, size_E, cudaMemcpyDeviceToHost);
+  cudaFree(d_arr);
+  cudaFree(d_twiddles);
  return 0; 
 }

--- a/icicle-cuda/appUtils/vector_manipulation/ve_mod_mult.cuh
+++ b/icicle-cuda/appUtils/vector_manipulation/ve_mod_mult.cuh
@@ -19,7 +19,7 @@ __global__ void vectorModMult(S *scalar_vec, E *element_vec, E *result, size_t n
 }

 template <typename E, typename S>
-int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments, cudaStream_t stream) // TODO: in place so no need for third result vector
+int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments) // TODO: in place so no need for third result vector
 {
    // Set the grid and block dimensions
    int num_blocks = (int)ceil((float)n_elments / MAX_THREADS_PER_BLOCK);
@@ -28,24 +28,23 @@ int vector_mod_mult(S *vec_a, E *vec_b, E *result, size_t n_elments, cudaStream_
    // Allocate memory on the device for the input vectors, the output vector, and the modulus
    S *d_vec_a;
    E *d_vec_b, *d_result;
-    cudaMallocAsync(&d_vec_a, n_elments * sizeof(S), stream);
-    cudaMallocAsync(&d_vec_b, n_elments * sizeof(E), stream);
-    cudaMallocAsync(&d_result, n_elments * sizeof(E), stream);
+    cudaMalloc(&d_vec_a, n_elments * sizeof(S));
+    cudaMalloc(&d_vec_b, n_elments * sizeof(E));
+    cudaMalloc(&d_result, n_elments * sizeof(E));

    // Copy the input vectors and the modulus from the host to the device
-    cudaMemcpyAsync(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice, stream);
-    cudaMemcpyAsync(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice, stream);
+    cudaMemcpy(d_vec_a, vec_a, n_elments * sizeof(S), cudaMemcpyHostToDevice);
+    cudaMemcpy(d_vec_b, vec_b, n_elments * sizeof(E), cudaMemcpyHostToDevice);

    // Call the kernel to perform element-wise modular multiplication
-    vectorModMult<<<num_blocks, threads_per_block, 0, stream>>>(d_vec_a, d_vec_b, d_result, n_elments);
+    vectorModMult<<<num_blocks, threads_per_block>>>(d_vec_a, d_vec_b, d_result, n_elments);

-    cudaMemcpyAsync(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost, stream);
+    cudaMemcpy(result, d_result, n_elments * sizeof(E), cudaMemcpyDeviceToHost);

-    cudaFreeAsync(d_vec_a, stream);
-    cudaFreeAsync(d_vec_b, stream);
-    cudaFreeAsync(d_result, stream);
+    cudaFree(d_vec_a);
+    cudaFree(d_vec_b);
+    cudaFree(d_result);

-    cudaStreamSynchronize(stream);
    return 0;
 }

@@ -61,12 +60,12 @@ __global__ void batchVectorMult(S *scalar_vec, E *element_vec, unsigned n_scalar
 }

 template <typename E, typename S>
-int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size, cudaStream_t stream)
+int batch_vector_mult(S *scalar_vec, E *element_vec, unsigned n_scalars, unsigned batch_size)
 {
    // Set the grid and block dimensions
    int NUM_THREADS = MAX_THREADS_PER_BLOCK;
    int NUM_BLOCKS = (n_scalars * batch_size + NUM_THREADS - 1) / NUM_THREADS;
-    batchVectorMult<<<NUM_BLOCKS, NUM_THREADS, 0, stream>>>(scalar_vec, element_vec, n_scalars, batch_size);
+    batchVectorMult<<<NUM_BLOCKS, NUM_THREADS>>>(scalar_vec, element_vec, n_scalars, batch_size);
    return 0;
 }

@@ -84,7 +83,7 @@ __global__ void matrixVectorMult(E *matrix_elements, E *vector_elements, E *resu
 }

 template <typename E>
-int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim, cudaStream_t stream)
+int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t dim)
 {
    // Set the grid and block dimensions
    int num_blocks = (int)ceil((float)dim / MAX_THREADS_PER_BLOCK);
@@ -92,24 +91,23 @@ int matrix_mod_mult(E *matrix_elements, E *vector_elements, E *result, size_t di

    // Allocate memory on the device for the input vectors, the output vector, and the modulus
    E *d_matrix, *d_vector, *d_result;
-    cudaMallocAsync(&d_matrix, (dim * dim) * sizeof(E), stream);
-    cudaMallocAsync(&d_vector, dim * sizeof(E), stream);
-    cudaMallocAsync(&d_result, dim * sizeof(E), stream);
+    cudaMalloc(&d_matrix, (dim * dim) * sizeof(E));
+    cudaMalloc(&d_vector, dim * sizeof(E));
+    cudaMalloc(&d_result, dim * sizeof(E));

    // Copy the input vectors and the modulus from the host to the device
-    cudaMemcpyAsync(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice, stream);
-    cudaMemcpyAsync(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice, stream);
+    cudaMemcpy(d_matrix, matrix_elements, (dim * dim) * sizeof(E), cudaMemcpyHostToDevice);
+    cudaMemcpy(d_vector, vector_elements, dim * sizeof(E), cudaMemcpyHostToDevice);

    // Call the kernel to perform element-wise modular multiplication
-    matrixVectorMult<<<num_blocks, threads_per_block, 0, stream>>>(d_matrix, d_vector, d_result, dim);
+    matrixVectorMult<<<num_blocks, threads_per_block>>>(d_matrix, d_vector, d_result, dim);

-    cudaMemcpyAsync(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost, stream);
+    cudaMemcpy(result, d_result, dim * sizeof(E), cudaMemcpyDeviceToHost);

-    cudaFreeAsync(d_matrix, stream);
-    cudaFreeAsync(d_vector, stream);
-    cudaFreeAsync(d_result, stream);
+    cudaFree(d_matrix);
+    cudaFree(d_vector);
+    cudaFree(d_result);

-    cudaStreamSynchronize(stream);
    return 0;
 }
 #endif
--- a/icicle/curves/bls12_377/curve_config.cuh
+++ b/icicle/curves/bls12_377/curve_config.cuh
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "../../primitives/field.cuh"
+#include "../../primitives/projective.cuh"
+
+#include "params.cuh"
+
+namespace BLS12_377 {
+    typedef Field<PARAMS_BLS12_377::fp_config> scalar_field_t;
+    typedef scalar_field_t scalar_t;
+    typedef Field<PARAMS_BLS12_377::fq_config> point_field_t;
+    static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_377::weierstrass_b };
+    typedef Projective<point_field_t, scalar_field_t, b> projective_t;
+    typedef Affine<point_field_t> affine_t;
+    #if defined(G2_DEFINED)
+    typedef ExtensionField<PARAMS_BLS12_377::fq_config> g2_point_field_t;
+    static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_re },
+                                                               point_field_t{ PARAMS_BLS12_377::weierstrass_b_g2_im }};
+    typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
+    typedef Affine<g2_point_field_t> g2_affine_t;
+    #endif
+}
--- a/icicle/curves/bls12_377/lde.cu
+++ b/icicle/curves/bls12_377/lde.cu
@@ -0,0 +1,308 @@
+#ifndef _BLS12_377_LDE
+#define _BLS12_377_LDE
+#include <cuda.h>
+#include "../../appUtils/ntt/lde.cu"
+#include "../../appUtils/ntt/ntt.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+#include "curve_config.cuh"
+
+extern "C" BLS12_377::scalar_t* build_domain_cuda_bls12_377(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        if (inverse) {
+            return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega_inv(logn));
+        } else {
+            return fill_twiddle_factors_array(domain_size, BLS12_377::scalar_t::omega(logn));
+        }
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return nullptr;
+    }
+}
+
+extern "C" int ntt_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        
+        return -1;        
+    }
+}
+
+extern "C" int ecntt_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ntt_batch_cuda_bls12_377(BLS12_377::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<BLS12_377::scalar_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ecntt_batch_cuda_bls12_377(BLS12_377::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<BLS12_377::projective_t,BLS12_377::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_evaluations, BLS12_377::scalar_t* d_domain, unsigned n,
+                                              unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_evaluations, BLS12_377::scalar_t *d_domain, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_evaluations, BLS12_377::scalar_t* d_domain,
+                                             unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, 
+                                     unsigned domain_size, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        BLS12_377::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
+                                           unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        BLS12_377::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, 
+                                    unsigned domain_size, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        BLS12_377::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size,
+                                          unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        BLS12_377::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
+                                              unsigned n, BLS12_377::scalar_t *coset_powers, unsigned device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_377(BLS12_377::scalar_t* d_out, BLS12_377::scalar_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size, 
+                                                    unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t *d_coefficients, BLS12_377::scalar_t *d_domain, unsigned domain_size,
+                                             unsigned n, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::projective_t* d_coefficients, BLS12_377::scalar_t* d_domain, unsigned domain_size, 
+                                                   unsigned n, unsigned batch_size, BLS12_377::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_batch_cuda_bls12_377(BLS12_377::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_cuda_bls12_377(BLS12_377::projective_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_batch_cuda_bls12_377(BLS12_377::projective_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+#endif
--- a/icicle/curves/bls12_377/msm.cu
+++ b/icicle/curves/bls12_377/msm.cu
@@ -0,0 +1,87 @@
+#ifndef _BLS12_377_MSM
+#define _BLS12_377_MSM
+#include "../../appUtils/msm/msm.cu"
+#include <stdexcept>
+#include <cuda.h>
+#include "curve_config.cuh"
+
+
+extern "C"
+int msm_cuda_bls12_377(BLS12_377::projective_t *out, BLS12_377::affine_t points[],
+              BLS12_377::scalar_t scalars[], size_t count, size_t device_id = 0)
+{
+    try
+    {
+        large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, count, out, false);
+        return CUDA_SUCCESS;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int msm_batch_cuda_bls12_377(BLS12_377::projective_t* out, BLS12_377::affine_t points[],
+                              BLS12_377::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
+{
+  try
+  {
+    batched_large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(scalars, points, batch_size, msm_size, out, false);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what());
+    return -1;
+  }
+}
+
+/**
+ * Commit to a polynomial using the MSM.
+ * Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
+ * @param d_out Ouptut point to write the result to.
+ * @param d_scalars Scalars for the MSM. Must be on device.
+ * @param d_points Points for the MSM. Must be on device.
+ * @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
+ */
+ extern "C"
+ int commit_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t device_id = 0)
+ {
+     try
+     {
+         large_msm<BLS12_377::scalar_t, BLS12_377::projective_t, BLS12_377::affine_t>(d_scalars, d_points, count, d_out, true);
+         return 0;
+     }
+     catch (const std::runtime_error &ex)
+     {
+         printf("error %s", ex.what());
+         return -1;
+     }
+ }
+ 
+ /**
+  * Commit to a batch of polynomials using the MSM.
+  * Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
+  * @param d_out Ouptut point to write the results to.
+  * @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
+  * @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
+  * @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
+  * @param batch_size Size of the batch.
+  */
+ extern "C"
+ int commit_batch_cuda_bls12_377(BLS12_377::projective_t* d_out, BLS12_377::scalar_t* d_scalars, BLS12_377::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
+ {
+     try
+     {
+         batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
+         return 0;
+     }
+     catch (const std::runtime_error &ex)
+     {
+         printf("error %s", ex.what());
+         return -1;
+     }
+ }
+
+#endif
--- a/icicle-cuda/curves/bls12_377/params.cuh
+++ b/icicle-cuda/curves/bls12_377/params.cuh
@@ -1,6 +1,6 @@
 #pragma once
 #include "../../utils/storage.cuh"
-namespace PARAMS{
+namespace PARAMS_BLS12_377{
  struct fp_config{
    static constexpr unsigned limbs_count = 8;
    static constexpr storage<limbs_count> modulus = {0x00000001, 0x0a118000, 0xd0000001, 0x59aa76fe, 0x5c37b001, 0x60b44d1e, 0x9a2ca556, 0x12ab655e};
--- a/icicle/curves/bls12_377/projective.cu
+++ b/icicle/curves/bls12_377/projective.cu
@@ -0,0 +1,22 @@
+
+#include <cuda.h>
+
+#include "curve_config.cuh"
+
+#include "../../primitives/projective.cuh"
+
+extern "C" bool eq_bls12_377(BLS12_377::projective_t *point1, BLS12_377::projective_t *point2)
+{
+    return (*point1 == *point2) && 
+    !((point1->x == BLS12_377::point_field_t::zero()) && (point1->y == BLS12_377::point_field_t::zero()) && (point1->z == BLS12_377::point_field_t::zero())) && 
+    !((point2->x == BLS12_377::point_field_t::zero()) && (point2->y == BLS12_377::point_field_t::zero()) && (point2->z == BLS12_377::point_field_t::zero()));
+}
+
+#if defined(G2_DEFINED)
+extern "C" bool eq_g2_bls12_377(BLS12_377::g2_projective_t *point1, BLS12_377::g2_projective_t *point2)
+{
+  return (*point1 == *point2) && 
+  !((point1->x == BLS12_377::g2_point_field_t::zero()) && (point1->y == BLS12_377::g2_point_field_t::zero()) && (point1->z == BLS12_377::g2_point_field_t::zero())) && 
+  !((point2->x == BLS12_377::g2_point_field_t::zero()) && (point2->y == BLS12_377::g2_point_field_t::zero()) && (point2->z == BLS12_377::g2_point_field_t::zero()));
+}
+#endif
--- a/icicle/curves/bls12_377/supported_operations.cu
+++ b/icicle/curves/bls12_377/supported_operations.cu
@@ -1,5 +1,5 @@
+
 #include "projective.cu"
 #include "lde.cu"
 #include "msm.cu"
 #include "ve_mod_mult.cu"
-#include "poseidon.cu"
--- a/icicle/curves/bls12_377/ve_mod_mult.cu
+++ b/icicle/curves/bls12_377/ve_mod_mult.cu
@@ -0,0 +1,66 @@
+#ifndef _BLS12_377_VEC_MULT
+#define _BLS12_377_VEC_MULT
+#include <stdio.h>
+#include <iostream>
+#include "../../primitives/field.cuh"
+#include "../../utils/storage.cuh"
+#include "../../primitives/projective.cuh"
+#include "curve_config.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+
+
+extern "C" int32_t vec_mod_mult_point_bls12_377(BLS12_377::projective_t *inout,
+                                      BLS12_377::scalar_t *scalar_vec,
+                                      size_t n_elments,
+                                      size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<BLS12_377::projective_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t vec_mod_mult_scalar_bls12_377(BLS12_377::scalar_t *inout,
+                                       BLS12_377::scalar_t *scalar_vec,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<BLS12_377::scalar_t, BLS12_377::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t matrix_vec_mod_mult_bls12_377(BLS12_377::scalar_t *matrix_flattened,
+                                       BLS12_377::scalar_t *input,
+                                       BLS12_377::scalar_t *output,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    matrix_mod_mult<BLS12_377::scalar_t>(matrix_flattened, input, output, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+#endif
--- a/icicle/curves/bls12_381/curve_config.cuh
+++ b/icicle/curves/bls12_381/curve_config.cuh
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "../../primitives/field.cuh"
+#include "../../primitives/projective.cuh"
+
+#include "params.cuh"
+
+namespace BLS12_381 {
+    typedef Field<PARAMS_BLS12_381::fp_config> scalar_field_t;
+    typedef scalar_field_t scalar_t;
+    typedef Field<PARAMS_BLS12_381::fq_config> point_field_t;
+    static constexpr point_field_t b = point_field_t{ PARAMS_BLS12_381::weierstrass_b };
+    typedef Projective<point_field_t, scalar_field_t, b> projective_t;
+    typedef Affine<point_field_t> affine_t;
+    #if defined(G2_DEFINED)
+    typedef ExtensionField<PARAMS_BLS12_381::fq_config> g2_point_field_t;
+    static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_re },
+                                                               point_field_t{ PARAMS_BLS12_381::weierstrass_b_g2_im }};
+    typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
+    typedef Affine<g2_point_field_t> g2_affine_t;
+    #endif
+}
--- a/icicle/curves/bls12_381/lde.cu
+++ b/icicle/curves/bls12_381/lde.cu
@@ -0,0 +1,308 @@
+#ifndef _BLS12_381_LDE
+#define _BLS12_381_LDE
+#include <cuda.h>
+#include "../../appUtils/ntt/lde.cu"
+#include "../../appUtils/ntt/ntt.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+#include "curve_config.cuh"
+
+extern "C" BLS12_381::scalar_t* build_domain_cuda_bls12_381(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        if (inverse) {
+            return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega_inv(logn));
+        } else {
+            return fill_twiddle_factors_array(domain_size, BLS12_381::scalar_t::omega(logn));
+        }
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return nullptr;
+    }
+}
+
+extern "C" int ntt_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        
+        return -1;        
+    }
+}
+
+extern "C" int ecntt_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ntt_batch_cuda_bls12_381(BLS12_381::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<BLS12_381::scalar_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ecntt_batch_cuda_bls12_381(BLS12_381::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<BLS12_381::projective_t,BLS12_381::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_evaluations, BLS12_381::scalar_t* d_domain, unsigned n,
+                                              unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_evaluations, BLS12_381::scalar_t *d_domain, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_evaluations, BLS12_381::scalar_t* d_domain,
+                                             unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, 
+                                     unsigned domain_size, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        BLS12_381::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
+                                           unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        BLS12_381::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, 
+                                    unsigned domain_size, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        BLS12_381::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size,
+                                          unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        BLS12_381::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
+                                              unsigned n, BLS12_381::scalar_t *coset_powers, unsigned device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_batch_cuda_bls12_381(BLS12_381::scalar_t* d_out, BLS12_381::scalar_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size, 
+                                                    unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t *d_coefficients, BLS12_381::scalar_t *d_domain, unsigned domain_size,
+                                             unsigned n, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::projective_t* d_coefficients, BLS12_381::scalar_t* d_domain, unsigned domain_size, 
+                                                   unsigned n, unsigned batch_size, BLS12_381::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_batch_cuda_bls12_381(BLS12_381::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_cuda_bls12_381(BLS12_381::projective_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_batch_cuda_bls12_381(BLS12_381::projective_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+#endif
--- a/icicle/curves/bls12_381/msm.cu
+++ b/icicle/curves/bls12_381/msm.cu
@@ -1,18 +1,19 @@
-#ifndef _MSM
-#define _MSM
-#include "../appUtils/msm/msm.cu"
+#ifndef _BLS12_381_MSM
+#define _BLS12_381_MSM
+#include "../../appUtils/msm/msm.cu"
 #include <stdexcept>
 #include <cuda.h>
 #include "curve_config.cuh"


 extern "C"
-int msm_cuda(projective_t *out, affine_t points[],
-              scalar_t scalars[], size_t count, size_t device_id = 0, cudaStream_t stream = 0)
+int msm_cuda_bls12_381(BLS12_381::projective_t *out, BLS12_381::affine_t points[],
+              BLS12_381::scalar_t scalars[], size_t count, size_t device_id = 0)
 {
    try
    {
-        large_msm<scalar_t, projective_t, affine_t>(scalars, points, count, out, false, stream);
+        large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, count, out, false);
+        
        return CUDA_SUCCESS;
    }
    catch (const std::runtime_error &ex)
@@ -22,14 +23,13 @@ int msm_cuda(projective_t *out, affine_t points[],
    }
 }

-extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
-                              scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0, cudaStream_t stream = 0)
+extern "C" int msm_batch_cuda_bls12_381(BLS12_381::projective_t* out, BLS12_381::affine_t points[],
+                              BLS12_381::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
 {
  try
  {
-    cudaStreamCreate(&stream);
-    batched_large_msm<scalar_t, projective_t, affine_t>(scalars, points, batch_size, msm_size, out, false, stream);
-    cudaStreamSynchronize(stream);
+    batched_large_msm<BLS12_381::scalar_t, BLS12_381::projective_t, BLS12_381::affine_t>(scalars, points, batch_size, msm_size, out, false);
+
    return CUDA_SUCCESS;
  }
  catch (const std::runtime_error &ex)
@@ -48,12 +48,11 @@ extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
 * @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
 */
 extern "C"
- int commit_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t device_id = 0, cudaStream_t stream = 0)
+ int commit_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t device_id = 0)
 {
     try
     {
-         large_msm(d_scalars, d_points, count, d_out, true, stream);
-         cudaStreamSynchronize(stream);
+         large_msm(d_scalars, d_points, count, d_out, true);
         return 0;
     }
     catch (const std::runtime_error &ex)
@@ -73,13 +72,11 @@ extern "C" int msm_batch_cuda(projective_t* out, affine_t points[],
  * @param batch_size Size of the batch.
  */
 extern "C"
- int commit_batch_cuda(projective_t* d_out, scalar_t* d_scalars, affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0, cudaStream_t stream = 0)
+ int commit_batch_cuda_bls12_381(BLS12_381::projective_t* d_out, BLS12_381::scalar_t* d_scalars, BLS12_381::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
 {
     try
     {
-        cudaStreamCreate(&stream);
-         batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true, stream);
-         cudaStreamSynchronize(stream);
+         batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
         return 0;
     }
     catch (const std::runtime_error &ex)
--- a/icicle-cuda/curves/bls12_381/params.cuh
+++ b/icicle-cuda/curves/bls12_381/params.cuh
@@ -2,7 +2,7 @@

 #include "../../utils/storage.cuh"

-namespace PARAMS{
+namespace PARAMS_BLS12_381{
  struct fp_config {
    // field structure size = 8 * 32 bit
    static constexpr unsigned limbs_count = 8;
--- a/icicle/curves/bls12_381/projective.cu
+++ b/icicle/curves/bls12_381/projective.cu
@@ -0,0 +1,19 @@
+#include <cuda.h>
+#include "curve_config.cuh"
+#include "../../primitives/projective.cuh"
+
+extern "C" bool eq_bls12_381(BLS12_381::projective_t *point1, BLS12_381::projective_t *point2)
+{
+    return (*point1 == *point2) && 
+    !((point1->x == BLS12_381::point_field_t::zero()) && (point1->y == BLS12_381::point_field_t::zero()) && (point1->z == BLS12_381::point_field_t::zero())) && 
+    !((point2->x == BLS12_381::point_field_t::zero()) && (point2->y == BLS12_381::point_field_t::zero()) && (point2->z == BLS12_381::point_field_t::zero()));
+}
+
+#if defined(G2_DEFINED)
+extern "C" bool eq_g2_bls12_381(BLS12_381::g2_projective_t *point1, BLS12_381::g2_projective_t *point2)
+{
+  return (*point1 == *point2) && 
+  !((point1->x == BLS12_381::g2_point_field_t::zero()) && (point1->y == BLS12_381::g2_point_field_t::zero()) && (point1->z == BLS12_381::g2_point_field_t::zero())) && 
+  !((point2->x == BLS12_381::g2_point_field_t::zero()) && (point2->y == BLS12_381::g2_point_field_t::zero()) && (point2->z == BLS12_381::g2_point_field_t::zero()));
+}
+#endif
--- a/icicle/curves/bls12_381/supported_operations.cu
+++ b/icicle/curves/bls12_381/supported_operations.cu
@@ -0,0 +1,4 @@
+#include "projective.cu"
+#include "lde.cu"
+#include "msm.cu"
+#include "ve_mod_mult.cu"
--- a/icicle/curves/bls12_381/ve_mod_mult.cu
+++ b/icicle/curves/bls12_381/ve_mod_mult.cu
@@ -0,0 +1,65 @@
+#ifndef _BLS12_381_VEC_MULT
+#define _BLS12_381_VEC_MULT
+#include <stdio.h>
+#include <iostream>
+#include "../../primitives/field.cuh"
+#include "../../utils/storage.cuh"
+#include "../../primitives/projective.cuh"
+#include "curve_config.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+
+extern "C" int32_t vec_mod_mult_point_bls12_381(BLS12_381::projective_t *inout,
+                                      BLS12_381::scalar_t *scalar_vec,
+                                      size_t n_elments,
+                                      size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<BLS12_381::projective_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t vec_mod_mult_scalar_bls12_381(BLS12_381::scalar_t *inout,
+                                       BLS12_381::scalar_t *scalar_vec,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<BLS12_381::scalar_t, BLS12_381::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t matrix_vec_mod_mult_bls12_381(BLS12_381::scalar_t *matrix_flattened,
+                                       BLS12_381::scalar_t *input,
+                                       BLS12_381::scalar_t *output,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    matrix_mod_mult<BLS12_381::scalar_t>(matrix_flattened, input, output, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+#endif
--- a/icicle/curves/bn254/curve_config.cuh
+++ b/icicle/curves/bn254/curve_config.cuh
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "../../primitives/field.cuh"
+#include "../../primitives/projective.cuh"
+
+#include "params.cuh"
+
+namespace BN254 {
+    typedef Field<PARAMS_BN254::fp_config> scalar_field_t;
+    typedef scalar_field_t scalar_t;
+    typedef Field<PARAMS_BN254::fq_config> point_field_t;
+    static constexpr point_field_t b = point_field_t{ PARAMS_BN254::weierstrass_b };
+    typedef Projective<point_field_t, scalar_field_t, b> projective_t;
+    typedef Affine<point_field_t> affine_t;
+    #if defined(G2_DEFINED)
+    typedef ExtensionField<PARAMS_BN254::fq_config> g2_point_field_t;
+    static constexpr g2_point_field_t b_g2 = g2_point_field_t{ point_field_t{ PARAMS_BN254::weierstrass_b_g2_re },
+                                                               point_field_t{ PARAMS_BN254::weierstrass_b_g2_im }};
+    typedef Projective<g2_point_field_t, scalar_field_t, b_g2> g2_projective_t;
+    typedef Affine<g2_point_field_t> g2_affine_t;
+    #endif
+}
--- a/icicle/curves/bn254/lde.cu
+++ b/icicle/curves/bn254/lde.cu
@@ -0,0 +1,308 @@
+#ifndef _BN254_LDE
+#define _BN254_LDE
+#include <cuda.h>
+#include "../../appUtils/ntt/lde.cu"
+#include "../../appUtils/ntt/ntt.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+#include "curve_config.cuh"
+
+extern "C" BN254::scalar_t* build_domain_cuda_bn254(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        if (inverse) {
+            return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega_inv(logn));
+        } else {
+            return fill_twiddle_factors_array(domain_size, BN254::scalar_t::omega(logn));
+        }
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return nullptr;
+    }
+}
+
+extern "C" int ntt_cuda_bn254(BN254::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<BN254::scalar_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        
+        return -1;        
+    }
+}
+
+extern "C" int ecntt_cuda_bn254(BN254::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<BN254::projective_t,BN254::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ntt_batch_cuda_bn254(BN254::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<BN254::scalar_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ecntt_batch_cuda_bn254(BN254::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<BN254::projective_t,BN254::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_evaluations, BN254::scalar_t* d_domain, unsigned n,
+                                              unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_evaluations, BN254::scalar_t *d_domain, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_evaluations, BN254::scalar_t* d_domain,
+                                             unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain, 
+                                     unsigned domain_size, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        BN254::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
+                                           unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        BN254::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain, 
+                                    unsigned domain_size, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        BN254::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size,
+                                          unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        BN254::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
+                                              unsigned n, BN254::scalar_t *coset_powers, unsigned device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_batch_cuda_bn254(BN254::scalar_t* d_out, BN254::scalar_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size, 
+                                                    unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t *d_coefficients, BN254::scalar_t *d_domain, unsigned domain_size,
+                                             unsigned n, BN254::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_batch_cuda_bn254(BN254::projective_t* d_out, BN254::projective_t* d_coefficients, BN254::scalar_t* d_domain, unsigned domain_size, 
+                                                   unsigned n, unsigned batch_size, BN254::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_cuda_bn254(BN254::scalar_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_batch_cuda_bn254(BN254::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_cuda_bn254(BN254::projective_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_batch_cuda_bn254(BN254::projective_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+#endif
--- a/icicle/curves/bn254/msm.cu
+++ b/icicle/curves/bn254/msm.cu
@@ -0,0 +1,87 @@
+#ifndef _BN254_MSM
+#define _BN254_MSM
+#include "../../appUtils/msm/msm.cu"
+#include <stdexcept>
+#include <cuda.h>
+#include "curve_config.cuh"
+
+
+extern "C"
+int msm_cuda_bn254(BN254::projective_t *out, BN254::affine_t points[],
+              BN254::scalar_t scalars[], size_t count, size_t device_id = 0)
+{
+    try
+    {
+        large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, count, out, false);
+        return CUDA_SUCCESS;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int msm_batch_cuda_bn254(BN254::projective_t* out, BN254::affine_t points[],
+                              BN254::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
+{
+  try
+  {
+    batched_large_msm<BN254::scalar_t, BN254::projective_t, BN254::affine_t>(scalars, points, batch_size, msm_size, out, false);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what());
+    return -1;
+  }
+}
+
+/**
+ * Commit to a polynomial using the MSM.
+ * Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
+ * @param d_out Ouptut point to write the result to.
+ * @param d_scalars Scalars for the MSM. Must be on device.
+ * @param d_points Points for the MSM. Must be on device.
+ * @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
+ */
+ extern "C"
+ int commit_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t device_id = 0)
+ {
+     try
+     {
+         large_msm(d_scalars, d_points, count, d_out, true);
+         return 0;
+     }
+     catch (const std::runtime_error &ex)
+     {
+         printf("error %s", ex.what());
+         return -1;
+     }
+ }
+ 
+ /**
+  * Commit to a batch of polynomials using the MSM.
+  * Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
+  * @param d_out Ouptut point to write the results to.
+  * @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
+  * @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
+  * @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
+  * @param batch_size Size of the batch.
+  */
+ extern "C"
+ int commit_batch_cuda_bn254(BN254::projective_t* d_out, BN254::scalar_t* d_scalars, BN254::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
+ {
+     try
+     {
+         batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
+         return 0;
+     }
+     catch (const std::runtime_error &ex)
+     {
+         printf("error %s", ex.what());
+         return -1;
+     }
+ }
+
+ #endif
--- a/icicle-cuda/curves/bn254/params.cuh
+++ b/icicle-cuda/curves/bn254/params.cuh
@@ -1,6 +1,6 @@
 #pragma once
 #include "../../utils/storage.cuh"
-namespace PARAMS{
+namespace PARAMS_BN254{
  struct fp_config{
    static constexpr unsigned limbs_count = 8;

--- a/icicle/curves/bn254/projective.cu
+++ b/icicle/curves/bn254/projective.cu
@@ -0,0 +1,19 @@
+#include <cuda.h>
+#include "curve_config.cuh"
+#include "../../primitives/projective.cuh"
+
+extern "C" bool eq_bn254(BN254::projective_t *point1, BN254::projective_t *point2)
+{
+  return (*point1 == *point2) && 
+  !((point1->x == BN254::point_field_t::zero()) && (point1->y == BN254::point_field_t::zero()) && (point1->z == BN254::point_field_t::zero())) && 
+  !((point2->x == BN254::point_field_t::zero()) && (point2->y == BN254::point_field_t::zero()) && (point2->z == BN254::point_field_t::zero()));
+}
+
+#if defined(G2_DEFINED)
+extern "C" bool eq_g2_bn254(BN254::g2_projective_t *point1, BN254::g2_projective_t *point2)
+{
+  return (*point1 == *point2) && 
+  !((point1->x == BN254::g2_point_field_t::zero()) && (point1->y == BN254::g2_point_field_t::zero()) && (point1->z == BN254::g2_point_field_t::zero())) && 
+  !((point2->x == BN254::g2_point_field_t::zero()) && (point2->y == BN254::g2_point_field_t::zero()) && (point2->z == BN254::g2_point_field_t::zero()));
+}
+#endif
--- a/icicle/curves/bn254/supported_operations.cu
+++ b/icicle/curves/bn254/supported_operations.cu
@@ -0,0 +1,4 @@
+#include "projective.cu"
+#include "lde.cu"
+#include "msm.cu"
+#include "ve_mod_mult.cu"
--- a/icicle/curves/bn254/ve_mod_mult.cu
+++ b/icicle/curves/bn254/ve_mod_mult.cu
@@ -0,0 +1,72 @@
+#ifndef _BN254_VEC_MULT
+#define _BN254_VEC_MULT
+#include <stdio.h>
+#include <iostream>
+#include "../../primitives/field.cuh"
+#include "../../utils/storage.cuh"
+#include "../../primitives/projective.cuh"
+#include "curve_config.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+
+
+extern "C" int32_t vec_mod_mult_point_bn254(BN254::projective_t *inout,
+                                      BN254::scalar_t *scalar_vec,
+                                      size_t n_elments,
+                                      size_t device_id)
+{
+  // TODO: use device_id when working with multiple devices
+  (void)device_id;
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<BN254::projective_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t vec_mod_mult_scalar_bn254(BN254::scalar_t *inout,
+                                       BN254::scalar_t *scalar_vec,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  // TODO: use device_id when working with multiple devices
+  (void)device_id;
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<BN254::scalar_t, BN254::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t matrix_vec_mod_mult_bn254(BN254::scalar_t *matrix_flattened,
+                                       BN254::scalar_t *input,
+                                       BN254::scalar_t *output,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  // TODO: use device_id when working with multiple devices
+  (void)device_id;
+  try
+  {
+    // TODO: device_id
+    matrix_mod_mult<BN254::scalar_t>(matrix_flattened, input, output, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+#endif
--- a/icicle/curves/curve_template/curve_config.cuh
+++ b/icicle/curves/curve_template/curve_config.cuh
@@ -0,0 +1,14 @@
+#pragma once
+
+#include "../../primitives/field.cuh"
+#include "../../primitives/projective.cuh"
+
+#include "params.cuh"
+
+namespace BN254 {
+    typedef Field<CURVE_NAME_U::fp_config> scalar_field_t;    
+    typedef scalar_field_t scalar_t;    
+    typedef Field<CURVE_NAME_U::fq_config> point_field_t;
+    typedef Projective<point_field_t, scalar_field_t, CURVE_NAME_U::group_generator, CURVE_NAME_U::weierstrass_b> projective_t;
+    typedef Affine<point_field_t> affine_t;
+}
--- a/icicle/curves/curve_template/lde.cu
+++ b/icicle/curves/curve_template/lde.cu
@@ -0,0 +1,308 @@
+#ifndef _CURVE_NAME_U_LDE
+#define _CURVE_NAME_U_LDE
+#include <cuda.h>
+#include "../../appUtils/ntt/lde.cu"
+#include "../../appUtils/ntt/ntt.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+#include "curve_config.cuh"
+
+extern "C" CURVE_NAME_U::scalar_t* build_domain_cuda_CURVE_NAME_L(uint32_t domain_size, uint32_t logn, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        if (inverse) {
+            return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega_inv(logn));
+        } else {
+            return fill_twiddle_factors_array(domain_size, CURVE_NAME_U::scalar_t::omega(logn));
+        }
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return nullptr;
+    }
+}
+
+extern "C" int ntt_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        
+        return -1;        
+    }
+}
+
+extern "C" int ecntt_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t n, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, n, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<CURVE_NAME_U::scalar_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int ecntt_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *arr, uint32_t arr_size, uint32_t batch_size, bool inverse, size_t device_id = 0)
+{
+    try
+    {
+        return ntt_end2end_batch_template<CURVE_NAME_U::projective_t,CURVE_NAME_U::scalar_t>(arr, arr_size, batch_size, inverse); // TODO: pass device_id
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain, unsigned n,
+                                              unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_evaluations, CURVE_NAME_U::scalar_t *d_domain, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate(d_out, d_evaluations, d_domain, n);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int interpolate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_evaluations, CURVE_NAME_U::scalar_t* d_domain,
+                                             unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        return interpolate_batch(d_out, d_evaluations, d_domain, n, batch_size);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, 
+                                     unsigned domain_size, unsigned n, unsigned device_id = 0)
+{
+    try
+    {
+        CURVE_NAME_U::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
+                                           unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        CURVE_NAME_U::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, 
+                                    unsigned domain_size, unsigned n, size_t device_id = 0)
+{
+    try
+    {
+        CURVE_NAME_U::scalar_t* _null = nullptr;
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size,
+                                          unsigned n, unsigned batch_size, size_t device_id = 0)
+{
+    try
+    {
+        CURVE_NAME_U::scalar_t* _null = nullptr;
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, false, _null);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
+                                              unsigned n, CURVE_NAME_U::scalar_t *coset_powers, unsigned device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_scalars_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* d_out, CURVE_NAME_U::scalar_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size, 
+                                                    unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t *d_coefficients, CURVE_NAME_U::scalar_t *d_domain, unsigned domain_size,
+                                             unsigned n, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate(d_out, d_coefficients, d_domain, domain_size, n, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int evaluate_points_on_coset_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::projective_t* d_coefficients, CURVE_NAME_U::scalar_t* d_domain, unsigned domain_size, 
+                                                   unsigned n, unsigned batch_size, CURVE_NAME_U::scalar_t *coset_powers, size_t device_id = 0)
+{
+    try
+    {
+        return evaluate_batch(d_out, d_coefficients, d_domain, domain_size, n, batch_size, true, coset_powers);
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_scalars_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::scalar_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order(arr, n, logn);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int reverse_order_points_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* arr, int n, int batch_size, size_t device_id = 0)
+{
+    try
+    {
+        uint32_t logn = uint32_t(log(n) / log(2));
+        reverse_order_batch(arr, n, logn, batch_size);
+        return 0;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+#endif
--- a/icicle/curves/curve_template/msm.cu
+++ b/icicle/curves/curve_template/msm.cu
@@ -0,0 +1,94 @@
+#ifndef _CURVE_NAME_U_MSM
+#define _CURVE_NAME_U_MSM
+#include "../../appUtils/msm/msm.cu"
+#include <stdexcept>
+#include <cuda.h>
+#include "curve_config.cuh"
+
+
+extern "C"
+int msm_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t *out, CURVE_NAME_U::affine_t points[],
+              CURVE_NAME_U::scalar_t scalars[], size_t count, size_t device_id = 0)
+{
+    try
+    {
+        if (count>256){
+            large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
+        }
+        else{
+            short_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, count, out, false);
+        }
+
+        return CUDA_SUCCESS;
+    }
+    catch (const std::runtime_error &ex)
+    {
+        printf("error %s", ex.what());
+        return -1;
+    }
+}
+
+extern "C" int msm_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* out, CURVE_NAME_U::affine_t points[],
+                              CURVE_NAME_U::scalar_t scalars[], size_t batch_size, size_t msm_size, size_t device_id = 0)
+{
+  try
+  {
+    batched_large_msm<CURVE_NAME_U::scalar_t, CURVE_NAME_U::projective_t, CURVE_NAME_U::affine_t>(scalars, points, batch_size, msm_size, out, false);
+
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what());
+    return -1;
+  }
+}
+
+/**
+ * Commit to a polynomial using the MSM.
+ * Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
+ * @param d_out Ouptut point to write the result to.
+ * @param d_scalars Scalars for the MSM. Must be on device.
+ * @param d_points Points for the MSM. Must be on device.
+ * @param count Length of `d_scalars` and `d_points` arrays (they should have equal length).
+ */
+ extern "C"
+ int commit_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t device_id = 0)
+ {
+     try
+     {
+         large_msm(d_scalars, d_points, count, d_out, true);
+         return 0;
+     }
+     catch (const std::runtime_error &ex)
+     {
+         printf("error %s", ex.what());
+         return -1;
+     }
+ }
+ 
+ /**
+  * Commit to a batch of polynomials using the MSM.
+  * Note: this function just calls the MSM, it doesn't convert between evaluation and coefficient form of scalars or points.
+  * @param d_out Ouptut point to write the results to.
+  * @param d_scalars Scalars for the MSMs of all polynomials. Must be on device.
+  * @param d_points Points for the MSMs. Must be on device. It is assumed that this set of bases is used for each MSM.
+  * @param count Length of `d_points` array, `d_scalar` has length `count` * `batch_size`.
+  * @param batch_size Size of the batch.
+  */
+ extern "C"
+ int commit_batch_cuda_CURVE_NAME_L(CURVE_NAME_U::projective_t* d_out, CURVE_NAME_U::scalar_t* d_scalars, CURVE_NAME_U::affine_t* d_points, size_t count, size_t batch_size, size_t device_id = 0)
+ {
+     try
+     {
+         batched_large_msm(d_scalars, d_points, batch_size, count, d_out, true);
+         return 0;
+     }
+     catch (const std::runtime_error &ex)
+     {
+         printf("error %s", ex.what());
+         return -1;
+     }
+ }
+
+ #endif
--- a/icicle/curves/curve_template/projective.cu
+++ b/icicle/curves/curve_template/projective.cu
@@ -0,0 +1,8 @@
+#include <cuda.h>
+#include "curve_config.cuh"
+#include "../../primitives/projective.cuh"
+
+extern "C" bool eq_CURVE_NAME_L(CURVE_NAME_U::projective_t *point1, CURVE_NAME_U::projective_t *point2, size_t device_id = 0)
+{
+    return (*point1 == *point2);
+}
--- a/icicle/curves/curve_template/supported_operations.cu
+++ b/icicle/curves/curve_template/supported_operations.cu
@@ -0,0 +1,4 @@
+#include "projective.cu"
+#include "lde.cu"
+#include "msm.cu"
+#include "ve_mod_mult.cu"
--- a/icicle/curves/curve_template/ve_mod_mult.cu
+++ b/icicle/curves/curve_template/ve_mod_mult.cu
@@ -0,0 +1,66 @@
+#ifndef _CURVE_NAME_U_VEC_MULT
+#define _CURVE_NAME_U_VEC_MULT
+#include <stdio.h>
+#include <iostream>
+#include "../../primitives/field.cuh"
+#include "../../utils/storage.cuh"
+#include "../../primitives/projective.cuh"
+#include "curve_config.cuh"
+#include "../../appUtils/vector_manipulation/ve_mod_mult.cuh"
+
+
+extern "C" int32_t vec_mod_mult_point_CURVE_NAME_L(CURVE_NAME_U::projective_t *inout,
+                                      CURVE_NAME_U::scalar_t *scalar_vec,
+                                      size_t n_elments,
+                                      size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<CURVE_NAME_U::projective_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t vec_mod_mult_scalar_CURVE_NAME_L(CURVE_NAME_U::scalar_t *inout,
+                                       CURVE_NAME_U::scalar_t *scalar_vec,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    vector_mod_mult<CURVE_NAME_U::scalar_t, CURVE_NAME_U::scalar_t>(scalar_vec, inout, inout, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+
+extern "C" int32_t matrix_vec_mod_mult_CURVE_NAME_L(CURVE_NAME_U::scalar_t *matrix_flattened,
+                                       CURVE_NAME_U::scalar_t *input,
+                                       CURVE_NAME_U::scalar_t *output,
+                                       size_t n_elments,
+                                       size_t device_id)
+{
+  try
+  {
+    // TODO: device_id
+    matrix_mod_mult<CURVE_NAME_U::scalar_t>(matrix_flattened, input, output, n_elments);
+    return CUDA_SUCCESS;
+  }
+  catch (const std::runtime_error &ex)
+  {
+    printf("error %s", ex.what()); // TODO: error code and message
+    return -1;
+  }
+}
+#endif
--- a/icicle/curves/index.cu
+++ b/icicle/curves/index.cu
@@ -0,0 +1,3 @@
+#include "bls12_381/supported_operations.cu"
+#include "bls12_377/supported_operations.cu"
+#include "bn254/supported_operations.cu"
--- a/icicle-cuda/primitives/affine.cuh
+++ b/icicle-cuda/primitives/affine.cuh
--- a/icicle-cuda/primitives/extension_field.cuh
+++ b/icicle-cuda/primitives/extension_field.cuh
--- a/icicle-cuda/primitives/field.cuh
+++ b/icicle-cuda/primitives/field.cuh
@@ -5,9 +5,6 @@
 #include "../utils/host_math.cuh"
 #include <random>
 #include <iostream>
-#include <iomanip>
-#include <string>
-#include <sstream>

 #define HOST_INLINE __host__ __forceinline__
 #define DEVICE_INLINE __device__ __forceinline__
@@ -26,15 +23,6 @@ template <class CONFIG> class Field {
      return Field { CONFIG::one };
    }

-    static constexpr HOST_DEVICE_INLINE Field from(uint32_t value) {
-      storage<TLC> scalar;
-      scalar.limbs[0] = value;
-      for (int i = 1; i < TLC; i++) {
-        scalar.limbs[i] = 0;
-      }
-      return Field { scalar };
-    }
-
    static constexpr HOST_DEVICE_INLINE Field generator_x() {
      return Field { CONFIG::generator_x };
    }
@@ -517,6 +505,7 @@ template <class CONFIG> class Field {
    static HOST_INLINE Field rand_host() {
      std::random_device rd;
      std::mt19937_64 generator(rd());
+      // std::mt19937_64 generator(0);
      std::uniform_int_distribution<unsigned> distribution;
      Field value{};
      for (unsigned i = 0; i < TLC; i++)
@@ -535,14 +524,10 @@ template <class CONFIG> class Field {
    }

    friend std::ostream& operator<<(std::ostream& os, const Field& xs) {
-      std::stringstream hex_string;
-      hex_string << std::hex << std::setfill('0');
-
-      for (int i = 0; i < TLC; i++) {
-          hex_string << std::setw(8) << xs.limbs_storage.limbs[i];
-      }
-
-      os << "0x" << hex_string.str();
+      os << "{";
+      for (int i = 0; i < TLC; i++)
+        os << xs.limbs_storage.limbs[i] << ", ";
+      os << "}";
      return os;
    }

--- a/icicle-cuda/primitives/projective.cu
+++ b/icicle-cuda/primitives/projective.cu
--- a/icicle-cuda/primitives/projective.cuh
+++ b/icicle-cuda/primitives/projective.cuh
--- a/icicle-cuda/primitives/test.cu
+++ b/icicle-cuda/primitives/test.cu
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
hadaringonyama	eced32b28b	clean the code	2023-05-28 14:10:29 +03:00
hadaringonyama	b316e4d4f7	small fix	2023-05-28 10:31:05 +03:00
hadaringonyama	b9ab19826a	bug fix	2023-05-24 16:46:03 +03:00
hadaringonyama	e3f9237ceb	tests work	2023-05-24 14:56:04 +03:00
hadaringonyama	0aef5f2f70	Merge remote-tracking branch 'origin/g2_extension_field' into msm-debug	2023-05-24 14:26:19 +03:00
DmytroTym	396c5f3c7b	Fixed after merge errors	2023-05-24 04:12:19 +03:00
ImmanuelSegol	f183dacfd6	Merge remote-tracking branch 'origin/dev-v2' into g2_extension_field	2023-05-23 08:42:03 +03:00
DmytroTym	10a638fba5	Fixed warnings	2023-05-18 18:47:31 +00:00
DmytroTym	9e8f0ec8f2	Zero point equality issue fixed	2023-05-18 10:35:38 +00:00
DmytroTym	53a63bb5ad	G2	2023-05-17 23:06:05 +00:00
hadaringonyama	c108f5cc90	testing batch	2023-05-17 11:57:53 +03:00
DmytroTym	af90ab0961	fix	2023-05-16 22:03:56 +00:00
DmytroTym	845a529423	Rust part of G2	2023-05-16 22:01:59 +00:00
HadarIngonyama	071c24ce5a	supporting new curves (#74 ) * Fix for local machines GoogleTest and CMake (#70) GoogleTest fix, updated readme * Supporting Additional Curves (#72) * init commit - changes for supporting new curves * refactor + additional curve (bls12-377 works, bn254 - not yet) * general refactor + curves script + fixing bn245 * revert unnecessary changes + refactor new curve script * add README and fix limbs_p=limbs_q case in python script --------- Co-authored-by: Vitalii Hnatyk <vhnatyk@gmail.com> Co-authored-by: guy-ingo <106763145+guy-ingo@users.noreply.github.com>	2023-05-15 15:51:48 +03:00
DmytroTym	08c34a5183	Fix for local machines GoogleTest and CMake (#70 ) (#73 ) GoogleTest fix, updated readme Co-authored-by: Vitalii Hnatyk <vhnatyk@gmail.com>	2023-05-15 15:23:06 +03:00
hadaringonyama	c13b003720	tidy test file	2023-05-15 11:56:19 +03:00
hadaringonyama	25a4eebc0a	msm working, tested up to 20	2023-05-14 16:41:14 +03:00
DmytroTym	e41de7dec7	Still WIP	2023-05-11 22:51:17 +00:00
DmytroTym	472a9f5107	Tests and benches WIP	2023-05-10 21:14:11 +00:00
ingo_deploy	689b4814e1	setup testing, dummy passes	2023-05-10 14:38:22 +03:00
ingo_deploy	7ace91528a	msm cuda test	2023-05-10 13:20:39 +03:00
DmytroTym	e0f5eac3a8	Some tests and Rust functionality WIP	2023-05-09 21:34:28 +00:00
DmytroTym	55b0faa0f3	G2 arithmetic WIP	2023-05-08 19:43:58 +00:00