refactor rust example msm

This commit is contained in:
Yuval Shekel
2024-07-28 17:08:12 +03:00
parent 2a3dcd776a
commit 10963fbe0c
4 changed files with 86 additions and 151 deletions

View File

@@ -4,15 +4,11 @@ version = "1.2.0"
edition = "2018"
[dependencies]
icicle-cuda-runtime = { path = "../../../wrappers/rust/icicle-cuda-runtime" }
icicle-core = { path = "../../../wrappers/rust/icicle-core" }
icicle-bn254 = { path = "../../../wrappers/rust/icicle-curves/icicle-bn254", features = ["g2"] }
icicle-bls12-377 = { path = "../../../wrappers/rust/icicle-curves/icicle-bls12-377" }
ark-bn254 = { version = "0.4.0", optional = true }
ark-bls12-377 = { version = "0.4.0", optional = true }
ark-ec = { version = "0.4.0", optional = true }
icicle-runtime = { path = "../../../wrappers/rust_v3/icicle-runtime" }
icicle-core = { path = "../../../wrappers/rust_v3/icicle-core" }
icicle-bn254 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bn254", features = ["g2"] }
icicle-bls12-377 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bls12-377" }
clap = { version = "<=4.4.12", features = ["derive"] }
[features]
arkworks = ["ark-bn254", "ark-bls12-377", "ark-ec", "icicle-core/arkworks", "icicle-bn254/arkworks", "icicle-bls12-377/arkworks"]
profile = []
cuda = ["icicle-runtime/cuda_backend", "icicle-bn254/cuda_backend", "icicle-bls12-377/cuda_backend"]

View File

@@ -2,10 +2,6 @@
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
## Best Practices
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
## Usage
```rust
@@ -28,29 +24,12 @@ In this example we use `BN254` curve. The function computes $result = \sum_{i=0}
Running the example:
```sh
cargo run --release
./run.sh CPU # to use CPU backend
./run.sh CUDA # to load and use CUDA backend
```
You can add the `--feature arkworks,profile` flag to measure times of both ICICLE and arkworks.
> [!NOTE]
> The default sizes are 2^19 - 2^23. You can change this by passing the `--lower_bound_log_size <size> --upper_bound_log_size <size>` options. To change the size range to 2^21 - 2^24, run the example like this:
> The default sizes are 2^10 - 2^10. You can change this by passing the `--lower_bound_log_size <size> --upper_bound_log_size <size>` options. To change the size range to 2^21 - 2^24, run the example like this:
> ```sh
> cargo run --release -- -l 21 -u 24
> ```
## Benchmarks
These benchmarks were run on a 16 core 24 thread i9-12900k CPU and an RTX 3090 Ti GPU
### Single BN254 MSM
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|--------------|------|------|------|------|------|
| ICICLE | 10 ms | 11 ms | 21 ms | 39 ms | 77 ms |
| Arkworks | 284 ms | 540 ms | 1,152 ms | 2,320 ms | 4,491 ms |
### Single BLS12377 MSM
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|--------------|------|------|------|------|------|
| ICICLE | 9 ms | 14 ms | 25 ms | 48 ms | 93 ms |
| Arkworks | 490 ms | 918 ms | 1,861 ms | 3,624 ms | 7,191 ms |

30
examples/rust/msm/run.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/bin/bash
set -e
# Function to display usage information
show_help() {
echo "Usage: $0 <CPU|CUDA>"
exit 0
}
# Check if the -h flag is provided or no arguments are provided
if [ "$1" == "-h" ]; then
show_help
fi
DEVICE_TYPE=${1:-CUDA}
ICILE_DIR=$(realpath "../../../icicle_v3/")
ICICLE_CUDA_BACKEND_SRC_DIR="${ICILE_DIR}/backend/cuda"
# Check if DEVICE_TYPE is CUDA and if the CUDA backend directory exists
if [ "$DEVICE_TYPE" == "CUDA" ] && [ -d "${ICICLE_CUDA_BACKEND_SRC_DIR}" ]; then
echo "Loading CUDA backend from ${ICICLE_CUDA_BACKEND_SRC_DIR}"
export ICICLE_CUDA_BACKEND_DIR=$(realpath ./target/release/deps/icicle/lib/backend)
cargo run --release --features=cuda
else
echo "Falling back to CPU backend"
# Load CPU backend (replace with actual command)
cargo run --release
fi

View File

@@ -1,43 +1,50 @@
use icicle_bn254::curve::{CurveCfg, G1Projective, G2CurveCfg, G2Projective, ScalarCfg};
use icicle_runtime::{
memory::{DeviceVec, HostSlice},
stream::IcicleStream,
};
// using both bn254 and bls12-377 curves
use icicle_bls12_377::curve::{
CurveCfg as BLS12377CurveCfg, G1Projective as BLS12377G1Projective, ScalarCfg as BLS12377ScalarCfg,
};
use icicle_cuda_runtime::{
memory::{DeviceVec, HostSlice},
stream::CudaStream,
};
use icicle_core::{curve::Curve, msm, traits::GenerateRandom};
#[cfg(feature = "arkworks")]
use icicle_core::traits::ArkConvertible;
#[cfg(feature = "arkworks")]
use ark_bls12_377::{Fr as Bls12377Fr, G1Affine as Bls12377G1Affine, G1Projective as Bls12377ArkG1Projective};
#[cfg(feature = "arkworks")]
use ark_bn254::{Fr as Bn254Fr, G1Affine as Bn254G1Affine, G1Projective as Bn254ArkG1Projective};
#[cfg(feature = "arkworks")]
use ark_ec::scalar_mul::variable_base::VariableBaseMSM;
#[cfg(feature = "profile")]
use std::time::Instant;
use icicle_bn254::curve::{CurveCfg, G1Projective, G2CurveCfg, G2Projective, ScalarCfg};
use clap::Parser;
use icicle_core::{curve::Curve, msm, traits::GenerateRandom};
use std::env;
#[derive(Parser, Debug)]
struct Args {
/// Lower bound (inclusive) of MSM sizes to run for
#[arg(short, long, default_value_t = 19)]
#[arg(short, long, default_value_t = 10)]
lower_bound_log_size: u8,
/// Upper bound of MSM sizes to run for
#[arg(short, long, default_value_t = 22)]
#[arg(short, long, default_value_t = 10)]
upper_bound_log_size: u8,
/// Device type (e.g., "CPU", "CUDA")
#[arg(short, long, default_value = "CUDA")]
device_type: String,
}
// Load backend and set device
fn try_load_and_set_backend_device(device: &str) {
println!("Trying to load and backend device");
if let Ok(backend_install_dir) = env::var("ICICLE_CUDA_BACKEND_DIR") {
println!("loading backend from {}", &backend_install_dir);
icicle_runtime::runtime::load_backend(&backend_install_dir, true /*recursive */).unwrap();
}
println!("Setting device {}", device);
icicle_runtime::set_device(&icicle_runtime::Device::new(device, 0)).unwrap();
}
fn main() {
#[cfg(feature = "cuda")]
try_load_and_set_backend_device("CUDA");
#[cfg(not(feature = "cuda"))]
try_load_and_set_backend_device("CPU");
let args = Args::parse();
let lower_bound = args.lower_bound_log_size;
let upper_bound = args.upper_bound_log_size;
@@ -70,45 +77,29 @@ fn main() {
let scalars_bls12377 = HostSlice::from_slice(&upper_scalars_bls12377[..size]);
println!("Configuring bn254 MSM...");
let mut msm_results = DeviceVec::<G1Projective>::cuda_malloc(1).unwrap();
let mut g2_msm_results = DeviceVec::<G2Projective>::cuda_malloc(1).unwrap();
let stream = CudaStream::create().unwrap();
let g2_stream = CudaStream::create().unwrap();
let mut msm_results = DeviceVec::<G1Projective>::device_malloc(1).unwrap();
let mut g2_msm_results = DeviceVec::<G2Projective>::device_malloc(1).unwrap();
let mut stream = IcicleStream::create().unwrap();
let mut g2_stream = IcicleStream::create().unwrap();
let mut cfg = msm::MSMConfig::default();
let mut g2_cfg = msm::MSMConfig::default();
cfg.ctx
.stream = &stream;
g2_cfg
.ctx
.stream = &g2_stream;
cfg.stream_handle = *stream;
cfg.is_async = true;
g2_cfg.stream_handle = *g2_stream;
g2_cfg.is_async = true;
println!("Configuring bls12377 MSM...");
let mut msm_results_bls12377 = DeviceVec::<BLS12377G1Projective>::cuda_malloc(1).unwrap();
let stream_bls12377 = CudaStream::create().unwrap();
let mut msm_results_bls12377 = DeviceVec::<BLS12377G1Projective>::device_malloc(1).unwrap();
let mut stream_bls12377 = IcicleStream::create().unwrap();
let mut cfg_bls12377 = msm::MSMConfig::default();
cfg_bls12377
.ctx
.stream = &stream_bls12377;
cfg_bls12377.stream_handle = *stream_bls12377;
cfg_bls12377.is_async = true;
println!("Executing bn254 MSM on device...");
#[cfg(feature = "profile")]
let start = Instant::now();
msm::msm(scalars, points, &cfg, &mut msm_results[..]).unwrap();
#[cfg(feature = "profile")]
println!(
"ICICLE BN254 MSM on size 2^{log_size} took: {} ms",
start
.elapsed()
.as_millis()
);
msm::msm(scalars, g2_points, &g2_cfg, &mut g2_msm_results[..]).unwrap();
println!("Executing bls12377 MSM on device...");
#[cfg(feature = "profile")]
let start = Instant::now();
msm::msm(
scalars_bls12377,
points_bls12377,
@@ -116,13 +107,6 @@ fn main() {
&mut msm_results_bls12377[..],
)
.unwrap();
#[cfg(feature = "profile")]
println!(
"ICICLE BLS12377 MSM on size 2^{log_size} took: {} ms",
start
.elapsed()
.as_millis()
);
println!("Moving results to host..");
let mut msm_host_result = vec![G1Projective::zero(); 1];
@@ -132,16 +116,17 @@ fn main() {
stream
.synchronize()
.unwrap();
g2_stream
.synchronize()
.unwrap();
msm_results
.copy_to_host(HostSlice::from_mut_slice(&mut msm_host_result[..]))
.unwrap();
println!("bn254 result: {:#?}", msm_host_result);
g2_stream
.synchronize()
.unwrap();
g2_msm_results
.copy_to_host(HostSlice::from_mut_slice(&mut g2_msm_host_result[..]))
.unwrap();
println!("bn254 result: {:#?}", msm_host_result);
println!("G2 bn254 result: {:#?}", g2_msm_host_result);
stream_bls12377
@@ -152,69 +137,14 @@ fn main() {
.unwrap();
println!("bls12377 result: {:#?}", msm_host_result_bls12377);
#[cfg(feature = "arkworks")]
{
println!("Checking against arkworks...");
let ark_points: Vec<Bn254G1Affine> = points
.iter()
.map(|&point| point.to_ark())
.collect();
let ark_scalars: Vec<Bn254Fr> = scalars
.iter()
.map(|scalar| scalar.to_ark())
.collect();
let ark_points_bls12377: Vec<Bls12377G1Affine> = points_bls12377
.iter()
.map(|point| point.to_ark())
.collect();
let ark_scalars_bls12377: Vec<Bls12377Fr> = scalars_bls12377
.iter()
.map(|scalar| scalar.to_ark())
.collect();
#[cfg(feature = "profile")]
let start = Instant::now();
let bn254_ark_msm_res = Bn254ArkG1Projective::msm(&ark_points, &ark_scalars).unwrap();
println!("Arkworks Bn254 result: {:#?}", bn254_ark_msm_res);
#[cfg(feature = "profile")]
println!(
"Ark BN254 MSM on size 2^{log_size} took: {} ms",
start
.elapsed()
.as_millis()
);
#[cfg(feature = "profile")]
let start = Instant::now();
let bls12377_ark_msm_res =
Bls12377ArkG1Projective::msm(&ark_points_bls12377, &ark_scalars_bls12377).unwrap();
println!("Arkworks Bls12377 result: {:#?}", bls12377_ark_msm_res);
#[cfg(feature = "profile")]
println!(
"Ark BLS12377 MSM on size 2^{log_size} took: {} ms",
start
.elapsed()
.as_millis()
);
let bn254_icicle_msm_res_as_ark = msm_host_result[0].to_ark();
let bls12377_icicle_msm_res_as_ark = msm_host_result_bls12377[0].to_ark();
println!(
"Bn254 MSM is correct: {}",
bn254_ark_msm_res.eq(&bn254_icicle_msm_res_as_ark)
);
println!(
"Bls12377 MSM is correct: {}",
bls12377_ark_msm_res.eq(&bls12377_icicle_msm_res_as_ark)
);
}
println!("Cleaning up bn254...");
stream
.destroy()
.unwrap();
g2_stream
.destroy()
.unwrap();
println!("Cleaning up bls12377...");
stream_bls12377
.destroy()