update Rust examples to support installed backend

This commit is contained in:
Yuval Shekel
2024-07-28 20:28:48 +03:00
parent f8661ac0ef
commit 65ca51ca5e
11 changed files with 186 additions and 140 deletions

View File

@@ -1,8 +1,7 @@
# This workflow is a demo of how to run all examples in the Icicle repository.
# For each language directory (c++, Rust, etc.) the workflow
# (1) loops over all examples (msm, ntt, etc.) and
# (2) runs ./compile.sh and ./run.sh in each directory.
# The script ./compile.sh should compile the example and ./run.sh should run it.
# For each language directory (c++, Rust, etc.) the workflow
# (1) loops over all examples (msm, ntt, etc.) and
# (2) runs ./run.sh in each directory.
# Each script should return 0 for success and 1 otherwise.
name: Examples
@@ -14,7 +13,7 @@ on:
- yshekel/V3 # TODO remove when merged to V3
push:
branches:
- V3
- V3
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -47,8 +46,8 @@ jobs:
echo "::set-output name=cuda-backend-branch::$CUDA_BE_BRANCH"
run-examples:
runs-on: [self-hosted, Linux, X64, icicle, examples, extract-cuda-backend-branch]
needs: check-changed-files
runs-on: [self-hosted, Linux, X64, icicle, examples]
needs: [check-changed-files, extract-cuda-backend-branch]
steps:
- name: Checkout
uses: actions/checkout@v4
@@ -68,21 +67,21 @@ jobs:
for dir in $(find . -mindepth 1 -maxdepth 1 -type d); do
if [ -d "$dir" ]; then
echo "Running command in $dir"
cd $dir
cd $dir
./run.sh -d CUDA
cd -
fi
done
# - name: Rust examples
# working-directory: ./examples/rust
# if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.examples == 'true'
# run: |
# # loop over all directories in the current directory
# for dir in $(find . -mindepth 1 -maxdepth 1 -type d); do
# if [ -d "$dir" ]; then
# echo "Running command in $dir"
# cd $dir
# cargo run --release
# cd -
# fi
# done
done
- name: Rust examples
working-directory: ./examples/rust
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.examples == 'true'
run: |
# loop over all directories in the current directory
for dir in $(find . -mindepth 1 -maxdepth 1 -type d); do
if [ -d "$dir" ]; then
echo "Running command in $dir"
cd $dir
./run.sh -d CUDA
cd -
fi
done

View File

@@ -3,7 +3,7 @@ use icicle_runtime::{
stream::IcicleStream,
};
// using both bn254 and bls12-377 curves
// Using both bn254 and bls12-377 curves
use icicle_bls12_377::curve::{
CurveCfg as BLS12377CurveCfg, G1Projective as BLS12377G1Projective, ScalarCfg as BLS12377ScalarCfg,
};
@@ -37,7 +37,7 @@ fn try_load_and_set_backend_device(args: &Args) {
.backend_install_dir
.is_empty()
{
println!("trying to load backend from {}", &args.backend_install_dir);
println!("Trying to load backend from {}", &args.backend_install_dir);
icicle_runtime::runtime::load_backend(&args.backend_install_dir, true /*recursive */).unwrap();
}
println!("Setting device {}", args.device_type);
@@ -46,12 +46,15 @@ fn try_load_and_set_backend_device(args: &Args) {
fn main() {
let args = Args::parse();
println!("{:?}", args);
try_load_and_set_backend_device(&args);
let lower_bound = args.lower_bound_log_size;
let upper_bound = args.upper_bound_log_size;
println!("Running Icicle Examples: Rust MSM");
let upper_size = 1 << (upper_bound);
let upper_size = 1 << upper_bound;
println!("Generating random inputs on host for bn254...");
let upper_points = CurveCfg::generate_random_affine_points(upper_size);
let g2_upper_points = G2CurveCfg::generate_random_affine_points(upper_size);
@@ -65,17 +68,17 @@ fn main() {
let log_size = i;
let size = 1 << log_size;
println!(
"---------------------- MSM size 2^{}={} ------------------------",
"---------------------- MSM size 2^{} = {} ------------------------",
log_size, size
);
// Setting Bn254 points and scalars
let points = HostSlice::from_slice(&upper_points[..size]);
let g2_points = HostSlice::from_slice(&g2_upper_points[..size]);
let scalars = HostSlice::from_slice(&upper_scalars[..size]);
// Setting bls12377 points and scalars
// let points_bls12377 = &upper_points_bls12377[..size];
let points_bls12377 = HostSlice::from_slice(&upper_points_bls12377[..size]); // &upper_points_bls12377[..size];
let points_bls12377 = HostSlice::from_slice(&upper_points_bls12377[..size]);
let scalars_bls12377 = HostSlice::from_slice(&upper_scalars_bls12377[..size]);
println!("Configuring bn254 MSM...");
@@ -110,7 +113,7 @@ fn main() {
)
.unwrap();
println!("Moving results to host..");
println!("Moving results to host...");
let mut msm_host_result = vec![G1Projective::zero(); 1];
let mut g2_msm_host_result = vec![G2Projective::zero(); 1];
let mut msm_host_result_bls12377 = vec![BLS12377G1Projective::zero(); 1];

View File

@@ -4,10 +4,6 @@
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Number Theoretic Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md).
## Best Practices
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
## Usage
```rust
@@ -32,38 +28,9 @@ In this example we use the `BN254` and `BLS12377` fields.
7. Compare results with arkworks
Running the example:
```sh
./run.sh CPU # to use CPU backend
./run.sh CUDA # to load and use CUDA backend
```
You can add the `--feature profile` flag to measure times of both ICICLE and arkworks.
> [!NOTE]
> The default size is 2^20. You can change this by passing the argument.
```sh
# for CPU
./run.sh -d CPU
# for CUDA
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
```
## Benchmarks
These benchmarks were run on a 16 core 24 thread i9-12900k CPU and an RTX 3090 Ti GPU
### Single BN254 NTT
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|--------------|------|------|------|------|------|
| ICICLE | 1.263 ms | 2.986 ms | 4.651 ms | 9.308 ms | 18.618 ms |
| Arkworks | 138 ms | 290 ms | 611 ms | 1,295 ms | 2,715 ms |
### Single BLS12377 NTT
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|--------------|------|------|------|------|------|
| ICICLE | 1.272 ms | 2.893 ms | 4.728 ms | 9.211 ms | 18.319 ms |
| Arkworks | 135 ms | 286 ms | 605 ms | 1,279 ms | 2,682 ms |

View File

@@ -1,7 +1,6 @@
use icicle_runtime::memory::{DeviceVec, HostSlice};
use icicle_bls12_377::curve::{ScalarCfg as BLS12377ScalarCfg, ScalarField as BLS12377ScalarField};
use icicle_bn254::curve::{ScalarCfg as Bn254ScalarCfg, ScalarField as Bn254ScalarField};
use icicle_runtime::memory::{DeviceVec, HostSlice};
use clap::Parser;
use icicle_core::{
@@ -32,7 +31,7 @@ fn try_load_and_set_backend_device(args: &Args) {
.backend_install_dir
.is_empty()
{
println!("trying to load backend from {}", &args.backend_install_dir);
println!("Trying to load backend from {}", &args.backend_install_dir);
icicle_runtime::runtime::load_backend(&args.backend_install_dir, true /*recursive */).unwrap();
}
println!("Setting device {}", args.device_type);
@@ -41,15 +40,18 @@ fn try_load_and_set_backend_device(args: &Args) {
fn main() {
let args = Args::parse();
println!("{:?}", args);
try_load_and_set_backend_device(&args);
println!("Running Icicle Examples: Rust NTT");
let log_size = args.size;
let size = 1 << log_size;
println!(
"---------------------- NTT size 2^{}={} ------------------------",
"---------------------- NTT size 2^{} = {} ------------------------",
log_size, size
);
// Setting Bn254 points and scalars
println!("Generating random inputs on host for bn254...");
let scalars = Bn254ScalarCfg::generate_random(size);
@@ -74,7 +76,6 @@ fn main() {
let cfg = ntt::NTTConfig::<Bn254ScalarField>::default();
println!("Setting up bls12377 Domain...");
// reusing ctx from above
initialize_domain(
ntt::get_root_of_unity::<BLS12377ScalarField>(
size.try_into()
@@ -119,7 +120,7 @@ fn main() {
.as_micros()
);
println!("Moving results to host..");
println!("Moving results to host...");
let mut host_bn254_results = vec![Bn254ScalarField::zero(); size];
ntt_results
.copy_to_host(HostSlice::from_mut_slice(&mut host_bn254_results[..]))
@@ -129,4 +130,7 @@ fn main() {
ntt_results_bls12377
.copy_to_host(HostSlice::from_mut_slice(&mut host_bls12377_results[..]))
.unwrap();
println!("Results for bn254: {:?}", host_bn254_results);
println!("Results for bls12377: {:?}", host_bls12377_results);
}

View File

@@ -4,11 +4,13 @@ version = "1.2.0"
edition = "2018"
[dependencies]
icicle-cuda-runtime = { path = "../../../wrappers/rust/icicle-cuda-runtime" }
icicle-core = { path = "../../../wrappers/rust/icicle-core" }
icicle-bn254 = { path = "../../../wrappers/rust/icicle-curves/icicle-bn254" }
icicle-babybear = { path = "../../../wrappers/rust/icicle-fields/icicle-babybear" }
icicle-runtime = { path = "../../../wrappers/rust_v3/icicle-runtime" }
icicle-core = { path = "../../../wrappers/rust_v3/icicle-core" }
icicle-bn254 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bn254", features = ["g2"] }
icicle-babybear = { path = "../../../wrappers/rust_v3/icicle-fields/icicle-babybear" }
clap = { version = "<=4.4.12", features = ["derive"] }
[features]
profile = []
cuda = ["icicle-runtime/cuda_backend", "icicle-bn254/cuda_backend", "icicle-babybear/cuda_backend"]

View File

@@ -0,0 +1,15 @@
# ICICLE example: Polynomial API
## Key-Takeaway
`ICICLE` provides Rust bindings to Polynomial API [https://dev.ingonyama.com/icicle/rust-bindings/polynomials]
In this example we use the `BN254` and `babybear` fields to demonstrate how to compute on polynomials.
Running the example:
```sh
# for CPU
./run.sh -d CPU
# for CUDA
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
```

View File

@@ -0,0 +1,59 @@
#!/bin/bash
# Exit immediately if a command exits with a non-zero status
set -e
# Function to display usage information
show_help() {
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
echo
echo "Options:"
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
echo " -h Show this help message"
exit 0
}
# Parse command line options
while getopts ":d:b:h" opt; do
case ${opt} in
d )
DEVICE_TYPE=$OPTARG
;;
b )
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
;;
h )
show_help
;;
\? )
echo "Invalid option: -$OPTARG" 1>&2
show_help
;;
: )
echo "Invalid option: -$OPTARG requires an argument" 1>&2
show_help
;;
esac
done
# Set default values if not provided
: "${DEVICE_TYPE:=CPU}"
: "${BACKEND_INSTALL_DIR:=}"
# Create necessary directories
mkdir -p build/example
mkdir -p build/icicle
ICILE_DIR=$(realpath "../../../icicle_v3/")
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
# Build Icicle and the example app that links to it
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
echo "Building icicle with CUDA backend"
BACKEND_INSTALL_DIR=$(realpath ./target/release/deps/icicle/lib/backend)
cargo run --release --features=cuda -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
else
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
cargo run --release -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
fi

View File

@@ -3,21 +3,16 @@ use icicle_babybear::polynomials::DensePolynomial as PolynomialBabyBear;
use icicle_bn254::curve::ScalarField as bn254Scalar;
use icicle_bn254::polynomials::DensePolynomial as PolynomialBn254;
use icicle_cuda_runtime::{
device_context::DeviceContext,
memory::{DeviceVec, HostSlice},
};
use icicle_runtime::memory::{DeviceVec, HostSlice};
use icicle_core::{
ntt::{get_root_of_unity, initialize_domain},
ntt::{get_root_of_unity, initialize_domain, NTTInitDomainConfig},
polynomials::UnivariatePolynomial,
traits::{FieldImpl, GenerateRandom},
};
#[cfg(feature = "profile")]
use std::time::Instant;
use clap::Parser;
use std::time::Instant;
#[derive(Parser, Debug)]
struct Args {
@@ -26,21 +21,40 @@ struct Args {
max_ntt_log_size: u8,
#[arg(short, long, default_value_t = 15)]
poly_log_size: u8,
/// Device type (e.g., "CPU", "CUDA")
#[arg(short, long, default_value = "CPU")]
device_type: String,
/// Backend installation directory
#[arg(short, long, default_value = "")]
backend_install_dir: String,
}
// Load backend and set device
fn try_load_and_set_backend_device(args: &Args) {
if !args
.backend_install_dir
.is_empty()
{
println!("Trying to load backend from {}", &args.backend_install_dir);
icicle_runtime::runtime::load_backend(&args.backend_install_dir, true /*recursive */).unwrap();
}
println!("Setting device {}", args.device_type);
icicle_runtime::set_device(&icicle_runtime::Device::new(&args.device_type, 0)).unwrap();
}
fn init(max_ntt_size: u64) {
// initialize NTT domain for all fields!. Polynomials ops relies on NTT.
// Initialize NTT domain for all fields. Polynomial operations rely on NTT.
println!(
"Initializing NTT domain for max size 2^{}",
max_ntt_size.trailing_zeros()
);
let rou_bn254: bn254Scalar = get_root_of_unity(max_ntt_size);
let ctx = DeviceContext::default();
initialize_domain(rou_bn254, &ctx, false /*=fast twiddles mode*/).unwrap();
initialize_domain(rou_bn254, &NTTInitDomainConfig::default()).unwrap();
let rou_babybear: babybearScalar = get_root_of_unity(max_ntt_size);
initialize_domain(rou_babybear, &ctx, false /*=fast twiddles mode*/).unwrap();
// initialize the cuda backend for polynomials
// make sure to initialize it per field
PolynomialBn254::init_cuda_backend();
PolynomialBabyBear::init_cuda_backend();
initialize_domain(rou_babybear, &NTTInitDomainConfig::default()).unwrap();
}
fn randomize_poly<P>(size: usize, from_coeffs: bool) -> P
@@ -49,6 +63,7 @@ where
P::Field: FieldImpl,
P::FieldConfig: GenerateRandom<P::Field>,
{
println!("Randomizing polynomial of size {} (from_coeffs: {})", size, from_coeffs);
let coeffs_or_evals = P::FieldConfig::generate_random(size);
let p = if from_coeffs {
P::from_coeffs(HostSlice::from_slice(&coeffs_or_evals), size)
@@ -60,42 +75,61 @@ where
fn main() {
let args = Args::parse();
println!("{:?}", args);
try_load_and_set_backend_device(&args);
init(1 << args.max_ntt_log_size);
// randomize three polynomials f,g,h over bn254 scalar field
let poly_size = 1 << args.poly_log_size;
println!("Randomizing polynomials over bn254 scalar field...");
let f = randomize_poly::<PolynomialBn254>(poly_size, true /*from random coeffs*/);
let g = randomize_poly::<PolynomialBn254>(poly_size / 2, true /*from random coeffs*/);
let h = randomize_poly::<PolynomialBn254>(poly_size / 4, false /*from random evaluations on rou*/);
// randomize two polynomials over babybear field
println!("Randomizing polynomials over babybear field...");
let f_babybear = randomize_poly::<PolynomialBabyBear>(poly_size, true /*from random coeffs*/);
let g_babybear = randomize_poly::<PolynomialBabyBear>(poly_size / 2, true /*from random coeffs*/);
let start = Instant::now();
// Arithmetic
println!("Computing f + g");
let t0 = &f + &g;
println!("Computing f * h");
let t1 = &f * &h;
let (q, r) = t1.divide(&t0); // computes q,r for t1(x)=q(x)*t0(x)+r(x)
println!("Computing q and r for t1(x) = q(x) * t0(x) + r(x)");
let (q, r) = t1.divide(&t0);
println!("Computing f_babybear * g_babybear");
let _r_babybear = &f_babybear * &g_babybear;
// check degree
let _r_degree = r.degree();
// Check degree
println!("Degree of r: {}", r.degree());
// evaluate in single domain point
// Evaluate in single domain point
let five = bn254Scalar::from_u32(5);
println!("Evaluating q at 5");
let q_at_five = q.eval(&five);
// evaluate on domain. Note: domain and image can be either Host or Device slice.
// in this example domain in on host and evals on device.
// Evaluate on domain
let host_domain = [five, bn254Scalar::from_u32(30)];
let mut device_image = DeviceVec::<bn254Scalar>::cuda_malloc(host_domain.len()).unwrap();
let mut device_image = DeviceVec::<bn254Scalar>::device_malloc(host_domain.len()).unwrap();
println!("Evaluating t1 on domain {:?}", host_domain);
t1.eval_on_domain(HostSlice::from_slice(&host_domain), &mut device_image[..]);
// slicing
// Slicing
println!("Performing slicing operations on h");
let o = h.odd();
let e = h.even();
let fold = &e + &(&o * &q_at_five); // e(x) + o(x)*scalar
let fold = &e + &(&o * &q_at_five); // e(x) + o(x) * scalar
let _coeff = fold.get_coeff(2); // coeff of x^2
let _coeff = fold.get_coeff(2); // Coeff of x^2
println!(
"Polynomial computation on selected device took: {} μs",
start
.elapsed()
.as_micros()
);
}

1
examples/rust/poseidon/run.sh Executable file
View File

@@ -0,0 +1 @@
# TODO implement

View File

@@ -1,9 +0,0 @@
[package]
name = "v3_device_api"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
icicle-runtime = {path = "../../../wrappers/rust_v3/icicle-runtime" }

View File

@@ -1,29 +0,0 @@
use icicle_runtime::{
memory::{DeviceVec, HostSlice},
runtime, Device,
};
fn main() {
runtime::load_backend(
"/home/administrator/users/yuvals/icicle/examples/rust/v3_device_api/target/debug/deps/icicle/lib/backend/cuda",
true,
);
let device = Device::new("CUDA", 0);
let _cuda_available = runtime::is_device_available(&device);
runtime::set_device(&device).unwrap();
let input = vec![1, 2, 3, 4];
let mut output = vec![0; 4];
let mut d_mem = DeviceVec::<i32>::device_malloc(input.len()).unwrap();
d_mem
.copy_from_host(HostSlice::from_slice(&input))
.unwrap();
d_mem
.copy_to_host(HostSlice::from_mut_slice(&mut output))
.unwrap();
assert_eq!(input, output);
println!("success");
}