mirror of
https://github.com/zkonduit/ezkl.git
synced 2026-01-13 16:27:59 -05:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3b1e9075f8 | ||
|
|
8b223354cc | ||
|
|
caa6ef8e16 |
1
.github/workflows/pypi-gpu.yml
vendored
1
.github/workflows/pypi-gpu.yml
vendored
@@ -34,6 +34,7 @@ jobs:
|
||||
run: |
|
||||
mv pyproject.toml pyproject.toml.orig
|
||||
sed "s/ezkl/ezkl-gpu/" pyproject.toml.orig >pyproject.toml
|
||||
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml
|
||||
|
||||
- uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
|
||||
9
.github/workflows/pypi.yml
vendored
9
.github/workflows/pypi.yml
vendored
@@ -233,6 +233,14 @@ jobs:
|
||||
python-version: 3.12
|
||||
architecture: x64
|
||||
|
||||
- name: Set pyproject.toml version to match github tag
|
||||
shell: bash
|
||||
env:
|
||||
RELEASE_TAG: ${{ github.ref_name }}
|
||||
run: |
|
||||
mv pyproject.toml pyproject.toml.orig
|
||||
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml
|
||||
|
||||
- name: Set Cargo.toml version to match github tag
|
||||
shell: bash
|
||||
env:
|
||||
@@ -242,7 +250,6 @@ jobs:
|
||||
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.toml.orig >Cargo.toml
|
||||
mv Cargo.lock Cargo.lock.orig
|
||||
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.lock.orig >Cargo.lock
|
||||
|
||||
- name: Install required libraries
|
||||
shell: bash
|
||||
run: |
|
||||
|
||||
@@ -147,6 +147,10 @@ shellexpand = "3.1.0"
|
||||
runner = 'wasm-bindgen-test-runner'
|
||||
|
||||
|
||||
[[bench]]
|
||||
name = "zero_finder"
|
||||
harness = false
|
||||
|
||||
[[bench]]
|
||||
name = "accum_dot"
|
||||
harness = false
|
||||
|
||||
116
benches/zero_finder.rs
Normal file
116
benches/zero_finder.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
use std::thread;
|
||||
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use halo2curves::{bn256::Fr as F, ff::Field};
|
||||
use maybe_rayon::{
|
||||
iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator},
|
||||
slice::ParallelSlice,
|
||||
};
|
||||
use rand::Rng;
|
||||
|
||||
// Assuming these are your types
|
||||
#[derive(Clone)]
|
||||
enum ValType {
|
||||
Constant(F),
|
||||
AssignedConstant(usize, F),
|
||||
Other,
|
||||
}
|
||||
|
||||
// Helper to generate test data
|
||||
fn generate_test_data(size: usize, zero_probability: f64) -> Vec<ValType> {
|
||||
let mut rng = rand::thread_rng();
|
||||
(0..size)
|
||||
.map(|_i| {
|
||||
if rng.gen::<f64>() < zero_probability {
|
||||
ValType::Constant(F::ZERO)
|
||||
} else {
|
||||
ValType::Constant(F::ONE) // Or some other non-zero value
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn bench_zero_finding(c: &mut Criterion) {
|
||||
let sizes = [
|
||||
1_000, // 1K
|
||||
10_000, // 10K
|
||||
100_000, // 100K
|
||||
256 * 256 * 2, // Our specific case
|
||||
1_000_000, // 1M
|
||||
10_000_000, // 10M
|
||||
];
|
||||
|
||||
let zero_probability = 0.1; // 10% zeros
|
||||
|
||||
let mut group = c.benchmark_group("zero_finding");
|
||||
group.sample_size(10); // Adjust based on your needs
|
||||
|
||||
for &size in &sizes {
|
||||
let data = generate_test_data(size, zero_probability);
|
||||
|
||||
// Benchmark sequential version
|
||||
group.bench_function(format!("sequential_{}", size), |b| {
|
||||
b.iter(|| {
|
||||
let result = data
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| match e {
|
||||
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
|
||||
(*r == F::ZERO).then_some(i)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
black_box(result)
|
||||
})
|
||||
});
|
||||
|
||||
// Benchmark parallel version
|
||||
group.bench_function(format!("parallel_{}", size), |b| {
|
||||
b.iter(|| {
|
||||
let result = data
|
||||
.par_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| match e {
|
||||
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
|
||||
(*r == F::ZERO).then_some(i)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
black_box(result)
|
||||
})
|
||||
});
|
||||
|
||||
// Benchmark chunked parallel version
|
||||
group.bench_function(format!("chunked_parallel_{}", size), |b| {
|
||||
b.iter(|| {
|
||||
let num_cores = thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(1);
|
||||
let chunk_size = (size / num_cores).max(100);
|
||||
|
||||
let result = data
|
||||
.par_chunks(chunk_size)
|
||||
.enumerate()
|
||||
.flat_map(|(chunk_idx, chunk)| {
|
||||
chunk
|
||||
.par_iter() // Make sure we use par_iter() here
|
||||
.enumerate()
|
||||
.filter_map(move |(i, e)| match e {
|
||||
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
|
||||
(*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
black_box(result)
|
||||
})
|
||||
});
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group!(benches, bench_zero_finding);
|
||||
criterion_main!(benches);
|
||||
@@ -1,7 +1,7 @@
|
||||
import ezkl
|
||||
|
||||
project = 'ezkl'
|
||||
release = '16.2.4'
|
||||
release = '16.2.6'
|
||||
version = release
|
||||
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ asyncio_mode = "auto"
|
||||
|
||||
[project]
|
||||
name = "ezkl"
|
||||
version = "0.0.0"
|
||||
requires-python = ">=3.7"
|
||||
classifiers = [
|
||||
"Programming Language :: Rust",
|
||||
|
||||
@@ -488,7 +488,8 @@ pub async fn deploy_da_verifier_via_solidity(
|
||||
}
|
||||
}
|
||||
|
||||
let contract = match call_to_account {
|
||||
|
||||
match call_to_account {
|
||||
Some(call) => {
|
||||
deploy_single_da_contract(
|
||||
client,
|
||||
@@ -514,8 +515,7 @@ pub async fn deploy_da_verifier_via_solidity(
|
||||
)
|
||||
.await
|
||||
}
|
||||
};
|
||||
return contract;
|
||||
}
|
||||
}
|
||||
|
||||
async fn deploy_multi_da_contract(
|
||||
@@ -630,7 +630,7 @@ async fn deploy_single_da_contract(
|
||||
// bytes memory _callData,
|
||||
PackedSeqToken(call_data.as_ref()),
|
||||
// uint256 _decimals,
|
||||
WordToken(B256::from(decimals).into()),
|
||||
WordToken(B256::from(decimals)),
|
||||
// uint[] memory _scales,
|
||||
DynSeqToken(
|
||||
scales
|
||||
|
||||
@@ -1136,23 +1136,21 @@ pub fn new_op_from_onnx(
|
||||
a: crate::circuit::utils::F32(exponent),
|
||||
})
|
||||
}
|
||||
} else {
|
||||
if let Some(c) = inputs[0].opkind().get_mutable_constant() {
|
||||
inputs[0].decrement_use();
|
||||
deleted_indices.push(0);
|
||||
if c.raw_values.len() > 1 {
|
||||
unimplemented!("only support scalar base")
|
||||
}
|
||||
|
||||
let base = c.raw_values[0];
|
||||
|
||||
SupportedOp::Nonlinear(LookupOp::Exp {
|
||||
scale: scale_to_multiplier(input_scales[1]).into(),
|
||||
base: base.into(),
|
||||
})
|
||||
} else {
|
||||
unimplemented!("only support constant base or pow for now")
|
||||
} else if let Some(c) = inputs[0].opkind().get_mutable_constant() {
|
||||
inputs[0].decrement_use();
|
||||
deleted_indices.push(0);
|
||||
if c.raw_values.len() > 1 {
|
||||
unimplemented!("only support scalar base")
|
||||
}
|
||||
|
||||
let base = c.raw_values[0];
|
||||
|
||||
SupportedOp::Nonlinear(LookupOp::Exp {
|
||||
scale: scale_to_multiplier(input_scales[1]).into(),
|
||||
base: base.into(),
|
||||
})
|
||||
} else {
|
||||
unimplemented!("only support constant base or pow for now")
|
||||
}
|
||||
}
|
||||
"Div" => {
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
use crate::{circuit::region::ConstantsMap, fieldutils::felt_to_integer_rep};
|
||||
use maybe_rayon::slice::Iter;
|
||||
use maybe_rayon::slice::{Iter, ParallelSlice};
|
||||
|
||||
use super::{
|
||||
ops::{intercalate_values, pad, resize},
|
||||
*,
|
||||
};
|
||||
use halo2_proofs::{arithmetic::Field, circuit::Cell, plonk::Instance};
|
||||
use maybe_rayon::iter::{FilterMap, IntoParallelIterator, ParallelIterator};
|
||||
use maybe_rayon::iter::{FilterMap, ParallelIterator};
|
||||
|
||||
pub(crate) fn create_constant_tensor<
|
||||
F: PrimeField + TensorType + std::marker::Send + std::marker::Sync + PartialOrd,
|
||||
@@ -455,7 +455,7 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of constants in the [ValTensor].
|
||||
/// Returns an iterator over the [ValTensor]'s constants.
|
||||
pub fn create_constants_map_iterator(
|
||||
&self,
|
||||
) -> FilterMap<Iter<'_, ValType<F>>, fn(&ValType<F>) -> Option<(F, ValType<F>)>> {
|
||||
@@ -473,20 +473,48 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the number of constants in the [ValTensor].
|
||||
/// Returns a map of the constants in the [ValTensor].
|
||||
pub fn create_constants_map(&self) -> ConstantsMap<F> {
|
||||
match self {
|
||||
ValTensor::Value { inner, .. } => inner
|
||||
.par_iter()
|
||||
.filter_map(|x| {
|
||||
if let ValType::Constant(v) = x {
|
||||
Some((*v, x.clone()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
ValTensor::Instance { .. } => ConstantsMap::new(),
|
||||
let threshold = 1_000_000; // Tuned using the benchmarks
|
||||
|
||||
if self.len() < threshold {
|
||||
match self {
|
||||
ValTensor::Value { inner, .. } => inner
|
||||
.par_iter()
|
||||
.filter_map(|x| {
|
||||
if let ValType::Constant(v) = x {
|
||||
Some((*v, x.clone()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
ValTensor::Instance { .. } => ConstantsMap::new(),
|
||||
}
|
||||
} else {
|
||||
// Use parallel for larger arrays
|
||||
let num_cores = std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(1);
|
||||
let chunk_size = (self.len() / num_cores).max(100_000);
|
||||
|
||||
match self {
|
||||
ValTensor::Value { inner, .. } => inner
|
||||
.par_chunks(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.par_iter() // Make sure we use par_iter() here
|
||||
.filter_map(|x| {
|
||||
if let ValType::Constant(v) = x {
|
||||
Some((*v, x.clone()))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
})
|
||||
.collect(),
|
||||
ValTensor::Instance { .. } => ConstantsMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -878,70 +906,161 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
|
||||
|
||||
/// remove constant zero values constants
|
||||
pub fn remove_const_zero_values(&mut self) {
|
||||
match self {
|
||||
ValTensor::Value { inner: v, dims, .. } => {
|
||||
*v = v
|
||||
.clone()
|
||||
.into_par_iter()
|
||||
.filter_map(|e| {
|
||||
if let ValType::Constant(r) = e {
|
||||
if r == F::ZERO {
|
||||
return None;
|
||||
let size_threshold = 1_000_000; // Tuned using the benchmarks
|
||||
|
||||
if self.len() < size_threshold {
|
||||
match self {
|
||||
ValTensor::Value { inner: v, dims, .. } => {
|
||||
*v = v
|
||||
.clone()
|
||||
.into_iter()
|
||||
.filter_map(|e| {
|
||||
if let ValType::Constant(r) = e {
|
||||
if r == F::ZERO {
|
||||
return None;
|
||||
}
|
||||
} else if let ValType::AssignedConstant(_, r) = e {
|
||||
if r == F::ZERO {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
} else if let ValType::AssignedConstant(_, r) = e {
|
||||
if r == F::ZERO {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(e)
|
||||
})
|
||||
.collect();
|
||||
*dims = v.dims().to_vec();
|
||||
Some(e)
|
||||
})
|
||||
.collect();
|
||||
*dims = v.dims().to_vec();
|
||||
}
|
||||
ValTensor::Instance { .. } => {}
|
||||
}
|
||||
} else {
|
||||
// Use parallel for larger arrays
|
||||
let num_cores = std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(1);
|
||||
let chunk_size = (self.len() / num_cores).max(100_000);
|
||||
|
||||
match self {
|
||||
ValTensor::Value { inner: v, dims, .. } => {
|
||||
*v = v
|
||||
.par_chunks_mut(chunk_size)
|
||||
.flat_map(|chunk| {
|
||||
chunk
|
||||
.par_iter_mut() // Make sure we use par_iter() here
|
||||
.filter_map(|e| {
|
||||
if let ValType::Constant(r) = e {
|
||||
if *r == F::ZERO {
|
||||
return None;
|
||||
}
|
||||
} else if let ValType::AssignedConstant(_, r) = e {
|
||||
if *r == F::ZERO {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(e.clone())
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
*dims = v.dims().to_vec();
|
||||
}
|
||||
ValTensor::Instance { .. } => {}
|
||||
}
|
||||
ValTensor::Instance { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
/// gets constants
|
||||
/// filter constant zero values constants
|
||||
pub fn get_const_zero_indices(&self) -> Vec<usize> {
|
||||
match self {
|
||||
ValTensor::Value { inner: v, .. } => v
|
||||
.par_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| {
|
||||
if let ValType::Constant(r) = e {
|
||||
if *r == F::ZERO {
|
||||
return Some(i);
|
||||
let size_threshold = 1_000_000; // Tuned using the benchmarks
|
||||
|
||||
if self.len() < size_threshold {
|
||||
// Use single-threaded for smaller arrays
|
||||
match &self {
|
||||
ValTensor::Value { inner: v, .. } => v
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| {
|
||||
match e {
|
||||
// Combine both match arms to reduce branching
|
||||
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
|
||||
(*r == F::ZERO).then_some(i)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
} else if let ValType::AssignedConstant(_, r) = e {
|
||||
if *r == F::ZERO {
|
||||
return Some(i);
|
||||
}
|
||||
}
|
||||
None
|
||||
})
|
||||
.collect(),
|
||||
ValTensor::Instance { .. } => vec![],
|
||||
})
|
||||
.collect(),
|
||||
ValTensor::Instance { .. } => vec![],
|
||||
}
|
||||
} else {
|
||||
// Use parallel for larger arrays
|
||||
let num_cores = std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(1);
|
||||
let chunk_size = (self.len() / num_cores).max(100_000);
|
||||
|
||||
match &self {
|
||||
ValTensor::Value { inner: v, .. } => v
|
||||
.par_chunks(chunk_size)
|
||||
.enumerate()
|
||||
.flat_map(|(chunk_idx, chunk)| {
|
||||
chunk
|
||||
.par_iter() // Make sure we use par_iter() here
|
||||
.enumerate()
|
||||
.filter_map(move |(i, e)| match e {
|
||||
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
|
||||
(*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
ValTensor::Instance { .. } => vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// gets constants
|
||||
/// gets constant indices
|
||||
pub fn get_const_indices(&self) -> Vec<usize> {
|
||||
match self {
|
||||
ValTensor::Value { inner: v, .. } => v
|
||||
.par_iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| {
|
||||
if let ValType::Constant(_) = e {
|
||||
Some(i)
|
||||
} else if let ValType::AssignedConstant(_, _) = e {
|
||||
Some(i)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
ValTensor::Instance { .. } => vec![],
|
||||
let size_threshold = 1_000_000; // Tuned using the benchmarks
|
||||
|
||||
if self.len() < size_threshold {
|
||||
// Use single-threaded for smaller arrays
|
||||
match &self {
|
||||
ValTensor::Value { inner: v, .. } => v
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(i, e)| {
|
||||
match e {
|
||||
// Combine both match arms to reduce branching
|
||||
ValType::Constant(_) | ValType::AssignedConstant(_, _) => Some(i),
|
||||
_ => None,
|
||||
}
|
||||
})
|
||||
.collect(),
|
||||
ValTensor::Instance { .. } => vec![],
|
||||
}
|
||||
} else {
|
||||
// Use parallel for larger arrays
|
||||
let num_cores = std::thread::available_parallelism()
|
||||
.map(|n| n.get())
|
||||
.unwrap_or(1);
|
||||
let chunk_size = (self.len() / num_cores).max(100_000);
|
||||
|
||||
match &self {
|
||||
ValTensor::Value { inner: v, .. } => v
|
||||
.par_chunks(chunk_size)
|
||||
.enumerate()
|
||||
.flat_map(|(chunk_idx, chunk)| {
|
||||
chunk
|
||||
.par_iter() // Make sure we use par_iter() here
|
||||
.enumerate()
|
||||
.filter_map(move |(i, e)| match e {
|
||||
ValType::Constant(_) | ValType::AssignedConstant(_, _) => {
|
||||
Some(chunk_idx * chunk_size + i)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>(),
|
||||
ValTensor::Instance { .. } => vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user