Compare commits

..

9 Commits

Author SHA1 Message Date
github-actions[bot]
edd478dd8c ci: update version string in docs 2024-12-31 12:28:24 +00:00
dante
c839a30ae6 fix: clearer duplication functions (#895) 2024-12-31 07:28:02 -05:00
dante
352812b9ac refactor!: simplified decompose op (#892) 2024-12-30 13:44:03 -05:00
dante
d48d0b0b3e fix: get_slice should not use intermediate Vec (#894) 2024-12-27 23:26:22 -05:00
Jseam
8b223354cc fix: add version string and sed (#893) 2024-12-27 14:24:28 -05:00
dante
caa6ef8e16 fix: const filtering strat is size dependent (#891) 2024-12-27 09:43:59 -05:00
Artem
c4354c10a5 fix: ios bindings update action (#886) 2024-12-16 10:49:13 -05:00
dante
c1ce8c88d0 chore: rm wasm serialization checks (#890) 2024-12-12 22:20:29 -05:00
dante
876a9584a1 chore: optimize wasm bundle for speed over size (#889) 2024-12-12 15:35:17 -05:00
17 changed files with 684 additions and 416 deletions

View File

@@ -34,6 +34,7 @@ jobs:
run: |
mv pyproject.toml pyproject.toml.orig
sed "s/ezkl/ezkl-gpu/" pyproject.toml.orig >pyproject.toml
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml
- uses: actions-rs/toolchain@v1
with:

View File

@@ -233,6 +233,14 @@ jobs:
python-version: 3.12
architecture: x64
- name: Set pyproject.toml version to match github tag
shell: bash
env:
RELEASE_TAG: ${{ github.ref_name }}
run: |
mv pyproject.toml pyproject.toml.orig
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml
- name: Set Cargo.toml version to match github tag
shell: bash
env:
@@ -242,7 +250,6 @@ jobs:
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.toml.orig >Cargo.toml
mv Cargo.lock Cargo.lock.orig
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.lock.orig >Cargo.lock
- name: Install required libraries
shell: bash
run: |

129
.github/workflows/swift-pm.yml vendored Normal file
View File

@@ -0,0 +1,129 @@
name: Build and Publish EZKL iOS SPM package
on:
push:
tags:
# Only support SemVer versioning tags
- 'v[0-9]+.[0-9]+.[0-9]+'
- '[0-9]+.[0-9]+.[0-9]+'
jobs:
build-and-update:
runs-on: macos-latest
env:
EZKL_SWIFT_PACKAGE_REPO: github.com/zkonduit/ezkl-swift-package.git
steps:
- name: Checkout EZKL
uses: actions/checkout@v3
- name: Extract TAG from github.ref_name
run: |
# github.ref_name is provided by GitHub Actions and contains the tag name directly.
TAG="${{ github.ref_name }}"
echo "Original TAG: $TAG"
# Remove leading 'v' if present to match the Swift Package Manager version format.
NEW_TAG=${TAG#v}
echo "Stripped TAG: $NEW_TAG"
echo "TAG=$NEW_TAG" >> $GITHUB_ENV
- name: Install Rust (nightly)
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Build EzklCoreBindings
run: CONFIGURATION=release cargo run --bin ios_gen_bindings --features "ios-bindings uuid camino uniffi_bindgen" --no-default-features
- name: Clone ezkl-swift-package repository
run: |
git clone https://${{ env.EZKL_SWIFT_PACKAGE_REPO }}
- name: Copy EzklCoreBindings
run: |
rm -rf ezkl-swift-package/Sources/EzklCoreBindings
cp -r build/EzklCoreBindings ezkl-swift-package/Sources/
- name: Copy Test Files
run: |
rm -rf ezkl-swift-package/Tests/EzklAssets/*
cp tests/assets/kzg ezkl-swift-package/Tests/EzklAssets/kzg.srs
cp tests/assets/input.json ezkl-swift-package/Tests/EzklAssets/input.json
cp tests/assets/model.compiled ezkl-swift-package/Tests/EzklAssets/network.ezkl
cp tests/assets/settings.json ezkl-swift-package/Tests/EzklAssets/settings.json
- name: Check for changes
id: check_changes
run: |
cd ezkl-swift-package
if git diff --quiet Sources/EzklCoreBindings Tests/EzklAssets; then
echo "no_changes=true" >> $GITHUB_OUTPUT
else
echo "no_changes=false" >> $GITHUB_OUTPUT
fi
- name: Set up Xcode environment
if: steps.check_changes.outputs.no_changes == 'false'
run: |
sudo xcode-select -s /Applications/Xcode.app/Contents/Developer
sudo xcodebuild -license accept
- name: Run Package Tests
if: steps.check_changes.outputs.no_changes == 'false'
run: |
cd ezkl-swift-package
xcodebuild test \
-scheme EzklPackage \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-resultBundlePath ../testResults
- name: Run Example App Tests
if: steps.check_changes.outputs.no_changes == 'false'
run: |
cd ezkl-swift-package/Example
xcodebuild test \
-project Example.xcodeproj \
-scheme EzklApp \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-parallel-testing-enabled NO \
-resultBundlePath ../../exampleTestResults \
-skip-testing:EzklAppUITests/EzklAppUITests/testButtonClicksInOrder
- name: Setup Git
run: |
cd ezkl-swift-package
git config user.name "GitHub Action"
git config user.email "action@github.com"
git remote set-url origin https://zkonduit:${EZKL_SWIFT_PACKAGE_REPO_TOKEN}@${{ env.EZKL_SWIFT_PACKAGE_REPO }}
env:
EZKL_SWIFT_PACKAGE_REPO_TOKEN: ${{ secrets.EZKL_PORTER_TOKEN }}
- name: Commit and Push Changes
if: steps.check_changes.outputs.no_changes == 'false'
run: |
cd ezkl-swift-package
git add Sources/EzklCoreBindings Tests/EzklAssets
git commit -m "Automatically updated EzklCoreBindings for EZKL"
if ! git push origin; then
echo "::error::Failed to push changes to ${{ env.EZKL_SWIFT_PACKAGE_REPO }}. Please ensure that EZKL_PORTER_TOKEN has the correct permissions."
exit 1
fi
- name: Tag the latest commit
run: |
cd ezkl-swift-package
source $GITHUB_ENV
# Tag the latest commit on the current branch
if git rev-parse "$TAG" >/dev/null 2>&1; then
echo "Tag $TAG already exists locally. Skipping tag creation."
else
git tag "$TAG"
fi
if ! git push origin "$TAG"; then
echo "::error::Failed to push tag '$TAG' to ${{ env.EZKL_SWIFT_PACKAGE_REPO }}. Please ensure EZKL_PORTER_TOKEN has correct permissions."
exit 1
fi

View File

@@ -1,85 +0,0 @@
name: Build and Publish EZKL iOS SPM package
on:
workflow_dispatch:
inputs:
tag:
description: "The tag to release"
required: true
push:
tags:
- "*"
jobs:
build-and-update:
runs-on: macos-latest
steps:
- name: Checkout EZKL
uses: actions/checkout@v3
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Build EzklCoreBindings
run: CONFIGURATION=release cargo run --bin ios_gen_bindings --features "ios-bindings uuid camino uniffi_bindgen" --no-default-features
- name: Clone ezkl-swift-package repository
run: |
git clone https://github.com/zkonduit/ezkl-swift-package.git
- name: Copy EzklCoreBindings
run: |
rm -rf ezkl-swift-package/Sources/EzklCoreBindings
cp -r build/EzklCoreBindings ezkl-swift-package/Sources/
- name: Copy Test Files
run: |
rm -rf ezkl-swift-package/Tests/EzklAssets/*
cp tests/assets/kzg ezkl-swift-package/Tests/EzklAssets/kzg.srs
cp tests/assets/input.json ezkl-swift-package/Tests/EzklAssets/input.json
cp tests/assets/model.compiled ezkl-swift-package/Tests/EzklAssets/network.ezkl
cp tests/assets/settings.json ezkl-swift-package/Tests/EzklAssets/settings.json
- name: Set up Xcode environment
run: |
sudo xcode-select -s /Applications/Xcode.app/Contents/Developer
sudo xcodebuild -license accept
- name: Run Package Tests
run: |
cd ezkl-swift-package
xcodebuild test \
-scheme EzklPackage \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-resultBundlePath ../testResults
- name: Run Example App Tests
run: |
cd ezkl-swift-package/Example
xcodebuild test \
-project Example.xcodeproj \
-scheme EzklApp \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-parallel-testing-enabled NO \
-resultBundlePath ../../exampleTestResults \
-skip-testing:EzklAppUITests/EzklAppUITests/testButtonClicksInOrder
- name: Commit and Push Changes to feat/ezkl-direct-integration
run: |
cd ezkl-swift-package
git config user.name "GitHub Action"
git config user.email "action@github.com"
git add Sources/EzklCoreBindings
git add Tests/EzklAssets
git commit -m "Automatically updated EzklCoreBindings for EZKL"
git tag ${{ github.event.inputs.tag }}
git remote set-url origin https://zkonduit:${EZKL_PORTER_TOKEN}@github.com/zkonduit/ezkl-swift-package.git
git push origin
git push origin tag ${{ github.event.inputs.tag }}
env:
EZKL_PORTER_TOKEN: ${{ secrets.EZKL_PORTER_TOKEN }}

View File

@@ -147,6 +147,10 @@ shellexpand = "3.1.0"
runner = 'wasm-bindgen-test-runner'
[[bench]]
name = "zero_finder"
harness = false
[[bench]]
name = "accum_dot"
harness = false
@@ -286,3 +290,11 @@ rustflags = ["-C", "relocation-model=pic"]
lto = "fat"
codegen-units = 1
# panic = "abort"
[package.metadata.wasm-pack.profile.release]
wasm-opt = [
"-O4",
"--flexible-inline-max-function-size",
"4294967295",
]

116
benches/zero_finder.rs Normal file
View File

@@ -0,0 +1,116 @@
use std::thread;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use halo2curves::{bn256::Fr as F, ff::Field};
use maybe_rayon::{
iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator},
slice::ParallelSlice,
};
use rand::Rng;
// Assuming these are your types
#[derive(Clone)]
enum ValType {
Constant(F),
AssignedConstant(usize, F),
Other,
}
// Helper to generate test data
fn generate_test_data(size: usize, zero_probability: f64) -> Vec<ValType> {
let mut rng = rand::thread_rng();
(0..size)
.map(|_i| {
if rng.gen::<f64>() < zero_probability {
ValType::Constant(F::ZERO)
} else {
ValType::Constant(F::ONE) // Or some other non-zero value
}
})
.collect()
}
fn bench_zero_finding(c: &mut Criterion) {
let sizes = [
1_000, // 1K
10_000, // 10K
100_000, // 100K
256 * 256 * 2, // Our specific case
1_000_000, // 1M
10_000_000, // 10M
];
let zero_probability = 0.1; // 10% zeros
let mut group = c.benchmark_group("zero_finding");
group.sample_size(10); // Adjust based on your needs
for &size in &sizes {
let data = generate_test_data(size, zero_probability);
// Benchmark sequential version
group.bench_function(format!("sequential_{}", size), |b| {
b.iter(|| {
let result = data
.iter()
.enumerate()
.filter_map(|(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(i)
}
_ => None,
})
.collect::<Vec<_>>();
black_box(result)
})
});
// Benchmark parallel version
group.bench_function(format!("parallel_{}", size), |b| {
b.iter(|| {
let result = data
.par_iter()
.enumerate()
.filter_map(|(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(i)
}
_ => None,
})
.collect::<Vec<_>>();
black_box(result)
})
});
// Benchmark chunked parallel version
group.bench_function(format!("chunked_parallel_{}", size), |b| {
b.iter(|| {
let num_cores = thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (size / num_cores).max(100);
let result = data
.par_chunks(chunk_size)
.enumerate()
.flat_map(|(chunk_idx, chunk)| {
chunk
.par_iter() // Make sure we use par_iter() here
.enumerate()
.filter_map(move |(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
}
_ => None,
})
})
.collect::<Vec<_>>();
black_box(result)
})
});
}
group.finish();
}
criterion_group!(benches, bench_zero_finding);
criterion_main!(benches);

View File

@@ -1,7 +1,7 @@
import ezkl
project = 'ezkl'
release = '16.2.1'
release = '16.2.9'
version = release

View File

@@ -12,6 +12,7 @@ asyncio_mode = "auto"
[project]
name = "ezkl"
version = "0.0.0"
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Rust",

View File

@@ -141,10 +141,11 @@ pub(crate) fn gen_vk(
.map_err(|e| EZKLError::InternalError(format!("Failed to create verifying key: {}", e)))?;
let mut serialized_vk = Vec::new();
vk.write(&mut serialized_vk, halo2_proofs::SerdeFormat::RawBytes)
.map_err(|e| {
EZKLError::InternalError(format!("Failed to serialize verifying key: {}", e))
})?;
vk.write(
&mut serialized_vk,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
)
.map_err(|e| EZKLError::InternalError(format!("Failed to serialize verifying key: {}", e)))?;
Ok(serialized_vk)
}
@@ -165,7 +166,7 @@ pub(crate) fn gen_pk(
let mut reader = BufReader::new(&vk[..]);
let vk = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit.settings().clone(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize verifying key: {}", e)))?;
@@ -197,7 +198,7 @@ pub(crate) fn verify(
let mut reader = BufReader::new(&vk[..]);
let vk = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit_settings.clone(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize vk: {}", e)))?;
@@ -277,7 +278,7 @@ pub(crate) fn verify_aggr(
let mut reader = BufReader::new(&vk[..]);
let vk = VerifyingKey::<G1Affine>::read::<_, AggregationCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize vk: {}", e)))?;
@@ -365,7 +366,7 @@ pub(crate) fn prove(
let mut reader = BufReader::new(&pk[..]);
let pk = ProvingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit.settings().clone(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize proving key: {}", e)))?;
@@ -487,7 +488,7 @@ pub(crate) fn vk_validation(vk: Vec<u8>, settings: Vec<u8>) -> Result<bool, EZKL
let mut reader = BufReader::new(&vk[..]);
let _ = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit_settings,
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize verifying key: {}", e)))?;
@@ -504,7 +505,7 @@ pub(crate) fn pk_validation(pk: Vec<u8>, settings: Vec<u8>) -> Result<bool, EZKL
let mut reader = BufReader::new(&pk[..]);
let _ = ProvingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit_settings,
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize proving key: {}", e)))?;

View File

@@ -30,6 +30,8 @@ use crate::{
use super::*;
use crate::circuit::ops::lookup::LookupOp;
const ASCII_ALPHABET: &str = "abcdefghijklmnopqrstuvwxyz";
/// Calculate the L1 distance between two tensors.
/// ```
/// use ezkl::tensor::Tensor;
@@ -418,10 +420,6 @@ pub fn dot<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
values[0].remove_indices(&mut removal_indices, true)?;
values[1].remove_indices(&mut removal_indices, true)?;
let elapsed = global_start.elapsed();
trace!("filtering const zero indices took: {:?}", elapsed);
let start = instant::Instant::now();
let mut inputs = vec![];
let block_width = config.custom_gates.output.num_inner_cols();
@@ -429,37 +427,22 @@ pub fn dot<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
for (i, input) in values.iter_mut().enumerate() {
input.pad_to_zero_rem(block_width, ValType::Constant(F::ZERO))?;
let inp = {
let (res, len) = region.assign_with_duplication(
&config.custom_gates.inputs[i],
input,
&config.check_mode,
false,
)?;
let (res, len) = region
.assign_with_duplication_unconstrained(&config.custom_gates.inputs[i], input)?;
assigned_len = len;
res.get_inner()?
};
inputs.push(inp);
}
let elapsed = start.elapsed();
trace!("assigning inputs took: {:?}", elapsed);
// Now we can assign the dot product
// time this step
let start = instant::Instant::now();
let accumulated_dot = accumulated::dot(&[inputs[0].clone(), inputs[1].clone()], block_width)?;
let elapsed = start.elapsed();
trace!("calculating accumulated dot took: {:?}", elapsed);
let start = instant::Instant::now();
let (output, output_assigned_len) = region.assign_with_duplication(
let (output, output_assigned_len) = region.assign_with_duplication_constrained(
&config.custom_gates.output,
&accumulated_dot.into(),
&config.check_mode,
true,
)?;
let elapsed = start.elapsed();
trace!("assigning output took: {:?}", elapsed);
// enable the selectors
if !region.is_dummy() {
@@ -1000,7 +983,6 @@ fn select<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
region: &mut RegionCtx<F>,
values: &[ValTensor<F>; 2],
) -> Result<ValTensor<F>, CircuitError> {
let start = instant::Instant::now();
let (mut input, index) = (values[0].clone(), values[1].clone());
input.flatten();
@@ -1028,9 +1010,6 @@ fn select<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
let (_, assigned_output) =
dynamic_lookup(config, region, &[index, output], &[dim_indices, input])?;
let end = start.elapsed();
trace!("select took: {:?}", end);
Ok(assigned_output)
}
@@ -1092,7 +1071,6 @@ pub(crate) fn dynamic_lookup<F: PrimeField + TensorType + PartialOrd + std::hash
lookups: &[ValTensor<F>; 2],
tables: &[ValTensor<F>; 2],
) -> Result<(ValTensor<F>, ValTensor<F>), CircuitError> {
let start = instant::Instant::now();
// if not all lookups same length err
if lookups[0].len() != lookups[1].len() {
return Err(CircuitError::MismatchedLookupLength(
@@ -1126,28 +1104,20 @@ pub(crate) fn dynamic_lookup<F: PrimeField + TensorType + PartialOrd + std::hash
}
let table_len = table_0.len();
trace!("assigning tables took: {:?}", start.elapsed());
// now create a vartensor of constants for the dynamic lookup index
let table_index = create_constant_tensor(F::from(dynamic_lookup_index as u64), table_len);
let _table_index =
region.assign_dynamic_lookup(&config.dynamic_lookups.tables[2], &table_index)?;
trace!("assigning table index took: {:?}", start.elapsed());
let lookup_0 = region.assign(&config.dynamic_lookups.inputs[0], &lookup_0)?;
let lookup_1 = region.assign(&config.dynamic_lookups.inputs[1], &lookup_1)?;
let lookup_len = lookup_0.len();
trace!("assigning lookups took: {:?}", start.elapsed());
// now set the lookup index
let lookup_index = create_constant_tensor(F::from(dynamic_lookup_index as u64), lookup_len);
let _lookup_index = region.assign(&config.dynamic_lookups.inputs[2], &lookup_index)?;
trace!("assigning lookup index took: {:?}", start.elapsed());
let mut lookup_block = 0;
if !region.is_dummy() {
@@ -1194,9 +1164,6 @@ pub(crate) fn dynamic_lookup<F: PrimeField + TensorType + PartialOrd + std::hash
region.increment_dynamic_lookup_index(1);
region.increment(lookup_len);
let end = start.elapsed();
trace!("dynamic lookup took: {:?}", end);
Ok((lookup_0, lookup_1))
}
@@ -1441,7 +1408,6 @@ pub(crate) fn linearize_element_index<F: PrimeField + TensorType + PartialOrd +
dim: usize,
is_flat_index: bool,
) -> Result<ValTensor<F>, CircuitError> {
let start_time = instant::Instant::now();
let index = values[0].clone();
if !is_flat_index {
assert_eq!(index.dims().len(), dims.len());
@@ -1515,9 +1481,6 @@ pub(crate) fn linearize_element_index<F: PrimeField + TensorType + PartialOrd +
region.apply_in_loop(&mut output, inner_loop_function)?;
let elapsed = start_time.elapsed();
trace!("linearize_element_index took: {:?}", elapsed);
Ok(output.into())
}
@@ -1949,16 +1912,11 @@ pub fn sum<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
region.flush()?;
// time this entire function run
let global_start = instant::Instant::now();
let mut values = values.clone();
// this section has been optimized to death, don't mess with it
values[0].remove_const_zero_values();
let elapsed = global_start.elapsed();
trace!("filtering const zero indices took: {:?}", elapsed);
// if empty return a const
if values[0].is_empty() {
return Ok(create_zero_tensor(1));
@@ -1970,12 +1928,8 @@ pub fn sum<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
let input = {
let mut input = values[0].clone();
input.pad_to_zero_rem(block_width, ValType::Constant(F::ZERO))?;
let (res, len) = region.assign_with_duplication(
&config.custom_gates.inputs[1],
&input,
&config.check_mode,
false,
)?;
let (res, len) =
region.assign_with_duplication_unconstrained(&config.custom_gates.inputs[1], &input)?;
assigned_len = len;
res.get_inner()?
};
@@ -1983,11 +1937,10 @@ pub fn sum<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
// Now we can assign the dot product
let accumulated_sum = accumulated::sum(&input, block_width)?;
let (output, output_assigned_len) = region.assign_with_duplication(
let (output, output_assigned_len) = region.assign_with_duplication_constrained(
&config.custom_gates.output,
&accumulated_sum.into(),
&config.check_mode,
true,
)?;
// enable the selectors
@@ -2053,13 +2006,10 @@ pub fn prod<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
) -> Result<ValTensor<F>, CircuitError> {
region.flush()?;
// time this entire function run
let global_start = instant::Instant::now();
// this section has been optimized to death, don't mess with it
let removal_indices = values[0].get_const_zero_indices();
let elapsed = global_start.elapsed();
trace!("finding const zero indices took: {:?}", elapsed);
// if empty return a const
if !removal_indices.is_empty() {
return Ok(create_zero_tensor(1));
@@ -2070,12 +2020,8 @@ pub fn prod<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
let input = {
let mut input = values[0].clone();
input.pad_to_zero_rem(block_width, ValType::Constant(F::ONE))?;
let (res, len) = region.assign_with_duplication(
&config.custom_gates.inputs[1],
&input,
&config.check_mode,
false,
)?;
let (res, len) =
region.assign_with_duplication_unconstrained(&config.custom_gates.inputs[1], &input)?;
assigned_len = len;
res.get_inner()?
};
@@ -2083,11 +2029,10 @@ pub fn prod<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
// Now we can assign the dot product
let accumulated_prod = accumulated::prod(&input, block_width)?;
let (output, output_assigned_len) = region.assign_with_duplication(
let (output, output_assigned_len) = region.assign_with_duplication_constrained(
&config.custom_gates.output,
&accumulated_prod.into(),
&config.check_mode,
true,
)?;
// enable the selectors
@@ -2440,7 +2385,6 @@ pub(crate) fn pairwise<F: PrimeField + TensorType + PartialOrd + std::hash::Hash
let orig_lhs = lhs.clone();
let orig_rhs = rhs.clone();
let start = instant::Instant::now();
let first_zero_indices = HashSet::from_iter(lhs.get_const_zero_indices());
let second_zero_indices = HashSet::from_iter(rhs.get_const_zero_indices());
@@ -2455,7 +2399,6 @@ pub(crate) fn pairwise<F: PrimeField + TensorType + PartialOrd + std::hash::Hash
BaseOp::Sub => second_zero_indices.clone(),
_ => return Err(CircuitError::UnsupportedOp),
};
trace!("setting up indices took {:?}", start.elapsed());
if lhs.len() != rhs.len() {
return Err(CircuitError::DimMismatch(format!(
@@ -2480,7 +2423,6 @@ pub(crate) fn pairwise<F: PrimeField + TensorType + PartialOrd + std::hash::Hash
// Now we can assign the dot product
// time the calc
let start = instant::Instant::now();
let op_result = match op {
BaseOp::Add => add(&inputs),
BaseOp::Sub => sub(&inputs),
@@ -2491,20 +2433,13 @@ pub(crate) fn pairwise<F: PrimeField + TensorType + PartialOrd + std::hash::Hash
error!("{}", e);
halo2_proofs::plonk::Error::Synthesis
})?;
trace!("pairwise {} calc took {:?}", op.as_str(), start.elapsed());
let start = instant::Instant::now();
let assigned_len = op_result.len() - removal_indices.len();
let mut output = region.assign_with_omissions(
&config.custom_gates.output,
&op_result.into(),
&removal_indices,
)?;
trace!(
"pairwise {} input assign took {:?}",
op.as_str(),
start.elapsed()
);
// Enable the selectors
if !region.is_dummy() {
@@ -2671,9 +2606,7 @@ pub fn greater<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
rhs.expand(&broadcasted_shape)?;
let diff = pairwise(config, region, &[lhs, rhs], BaseOp::Sub)?;
let sign = sign(config, region, &[diff])?;
equals(config, region, &[sign, create_unit_tensor(1)])
}
@@ -5286,75 +5219,72 @@ pub(crate) fn decompose<F: PrimeField + TensorType + PartialOrd + std::hash::Has
base: &usize,
n: &usize,
) -> Result<ValTensor<F>, CircuitError> {
let input = values[0].clone();
let mut input = values[0].clone();
let is_assigned = !input.all_prev_assigned();
let bases: ValTensor<F> = Tensor::from(
(0..*n)
.rev()
.map(|x| ValType::Constant(integer_rep_to_felt(base.pow(x as u32) as IntegerRep))),
if !is_assigned {
input = region.assign(&config.custom_gates.inputs[0], &input)?;
}
let mut bases: ValTensor<F> = Tensor::from(
// repeat it input.len() times
(0..input.len()).flat_map(|_| {
(0..*n)
.rev()
.map(|x| ValType::Constant(integer_rep_to_felt(base.pow(x as u32) as IntegerRep)))
}),
)
.into();
let mut bases_dims = input.dims().to_vec();
bases_dims.push(*n);
bases.reshape(&bases_dims)?;
let cartesian_coord = input
.dims()
.iter()
.map(|x| 0..*x)
.multi_cartesian_product()
.collect::<Vec<_>>();
let mut decomposed_dims = input.dims().to_vec();
decomposed_dims.push(*n + 1);
let mut output: Tensor<Tensor<ValType<F>>> = Tensor::new(None, input.dims())?;
let claimed_output = if region.witness_gen() {
input.decompose(*base, *n)?
} else {
let decomposed_len = decomposed_dims.iter().product();
let claimed_output = Tensor::new(
Some(&vec![ValType::Value(Value::unknown()); decomposed_len]),
&decomposed_dims,
)?;
let inner_loop_function =
|i: usize, region: &mut RegionCtx<F>| -> Result<Tensor<ValType<F>>, CircuitError> {
let coord = cartesian_coord[i].clone();
let slice = coord.iter().map(|x| *x..*x + 1).collect::<Vec<_>>();
let mut sliced_input = input.get_slice(&slice)?;
sliced_input.flatten();
claimed_output.into()
};
region.assign(&config.custom_gates.output, &claimed_output)?;
region.increment(claimed_output.len());
if !is_assigned {
sliced_input = region.assign(&config.custom_gates.inputs[0], &sliced_input)?;
}
let input_slice = input.dims().iter().map(|x| 0..*x).collect::<Vec<_>>();
let mut sign_slice = input_slice.clone();
sign_slice.push(0..1);
let mut rest_slice = input_slice.clone();
rest_slice.push(1..n + 1);
let mut claimed_output_slice = if region.witness_gen() {
sliced_input.decompose(*base, *n)?
} else {
Tensor::from(vec![ValType::Value(Value::unknown()); *n + 1].into_iter()).into()
};
let sign = claimed_output.get_slice(&sign_slice)?;
let rest = claimed_output.get_slice(&rest_slice)?;
claimed_output_slice =
region.assign(&config.custom_gates.inputs[1], &claimed_output_slice)?;
claimed_output_slice.flatten();
let sign = range_check(config, region, &[sign], &(-1, 1))?;
let rest = range_check(config, region, &[rest], &(0, (*base - 1) as i128))?;
region.increment(claimed_output_slice.len());
// equation needs to be constructed as ij,ij->i but for arbitrary n dims we need to construct this dynamically
// indices should map in order of the alphabet
// start with lhs
let lhs = ASCII_ALPHABET.chars().take(rest.dims().len()).join("");
let rhs = ASCII_ALPHABET.chars().take(rest.dims().len() - 1).join("");
let equation = format!("{},{}->{}", lhs, lhs, rhs);
// get the sign bit and make sure it is valid
let sign = claimed_output_slice.first()?;
let sign = range_check(config, region, &[sign], &(-1, 1))?;
// now add the rhs
// get the rest of the thing and make sure it is in the correct range
let rest = claimed_output_slice.get_slice(&[1..claimed_output_slice.len()])?;
let prod_decomp = einsum(config, region, &[rest.clone(), bases], &equation)?;
let rest = range_check(config, region, &[rest], &(0, (base - 1) as i128))?;
let signed_decomp = pairwise(config, region, &[prod_decomp, sign], BaseOp::Mult)?;
let prod_decomp = dot(config, region, &[rest, bases.clone()])?;
enforce_equality(config, region, &[input, signed_decomp])?;
let signed_decomp = pairwise(config, region, &[prod_decomp, sign], BaseOp::Mult)?;
enforce_equality(config, region, &[sliced_input, signed_decomp])?;
Ok(claimed_output_slice.get_inner_tensor()?.clone())
};
region.apply_in_loop(&mut output, inner_loop_function)?;
let mut combined_output = output.combine()?;
let mut output_dims = input.dims().to_vec();
output_dims.push(*n + 1);
combined_output.reshape(&output_dims)?;
Ok(combined_output.into())
Ok(claimed_output)
}
pub(crate) fn sign<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(

View File

@@ -671,22 +671,17 @@ impl<'a, F: PrimeField + TensorType + PartialOrd + std::hash::Hash> RegionCtx<'a
}
/// Assign a valtensor to a vartensor with duplication
pub fn assign_with_duplication(
pub fn assign_with_duplication_unconstrained(
&mut self,
var: &VarTensor,
values: &ValTensor<F>,
check_mode: &crate::circuit::CheckMode,
single_inner_col: bool,
) -> Result<(ValTensor<F>, usize), Error> {
if let Some(region) = &self.region {
// duplicates every nth element to adjust for column overflow
let (res, len) = var.assign_with_duplication(
let (res, len) = var.assign_with_duplication_unconstrained(
&mut region.borrow_mut(),
self.row,
self.linear_coord,
values,
check_mode,
single_inner_col,
&mut self.assigned_constants,
)?;
Ok((res, len))
@@ -695,7 +690,37 @@ impl<'a, F: PrimeField + TensorType + PartialOrd + std::hash::Hash> RegionCtx<'a
self.row,
self.linear_coord,
values,
single_inner_col,
false,
&mut self.assigned_constants,
)?;
Ok((values.clone(), len))
}
}
/// Assign a valtensor to a vartensor with duplication
pub fn assign_with_duplication_constrained(
&mut self,
var: &VarTensor,
values: &ValTensor<F>,
check_mode: &crate::circuit::CheckMode,
) -> Result<(ValTensor<F>, usize), Error> {
if let Some(region) = &self.region {
// duplicates every nth element to adjust for column overflow
let (res, len) = var.assign_with_duplication_constrained(
&mut region.borrow_mut(),
self.row,
self.linear_coord,
values,
check_mode,
&mut self.assigned_constants,
)?;
Ok((res, len))
} else {
let (_, len) = var.dummy_assign_with_duplication(
self.row,
self.linear_coord,
values,
true,
&mut self.assigned_constants,
)?;
Ok((values.clone(), len))

View File

@@ -488,7 +488,8 @@ pub async fn deploy_da_verifier_via_solidity(
}
}
let contract = match call_to_account {
match call_to_account {
Some(call) => {
deploy_single_da_contract(
client,
@@ -514,8 +515,7 @@ pub async fn deploy_da_verifier_via_solidity(
)
.await
}
};
return contract;
}
}
async fn deploy_multi_da_contract(
@@ -630,7 +630,7 @@ async fn deploy_single_da_contract(
// bytes memory _callData,
PackedSeqToken(call_data.as_ref()),
// uint256 _decimals,
WordToken(B256::from(decimals).into()),
WordToken(B256::from(decimals)),
// uint[] memory _scales,
DynSeqToken(
scales

View File

@@ -1226,6 +1226,7 @@ impl Model {
values.iter().map(|v| v.dims()).collect_vec()
);
let start = instant::Instant::now();
match &node {
NodeType::Node(n) => {
let res = if node.is_constant() && node.num_uses() == 1 {
@@ -1363,6 +1364,7 @@ impl Model {
results.insert(*idx, full_results);
}
}
debug!("------------ layout of {} took {:?}", idx, start.elapsed());
}
// we do this so we can support multiple passes of the same model and have deterministic results (Non-assigned inputs etc... etc...)

View File

@@ -142,8 +142,6 @@ use tract_onnx::prelude::SymbolValues;
pub fn extract_tensor_value(
input: Arc<tract_onnx::prelude::Tensor>,
) -> Result<Tensor<f32>, GraphError> {
use maybe_rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
let dt = input.datum_type();
let dims = input.shape().to_vec();
@@ -156,7 +154,7 @@ pub fn extract_tensor_value(
match dt {
DatumType::F16 => {
let vec = input.as_slice::<tract_onnx::prelude::f16>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| (*x).into()).collect();
let cast: Vec<f32> = vec.iter().map(|x| (*x).into()).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::F32 => {
@@ -165,61 +163,61 @@ pub fn extract_tensor_value(
}
DatumType::F64 => {
let vec = input.as_slice::<f64>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I64 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i64>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I32 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i32>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I16 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i16>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I8 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i8>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U8 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u8>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U16 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u16>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U32 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u32>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U64 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u64>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::Bool => {
// Generally a shape or hyperparam
let vec = input.as_slice::<bool>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as usize as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as usize as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::TDim => {
@@ -227,7 +225,7 @@ pub fn extract_tensor_value(
let vec = input.as_slice::<tract_onnx::prelude::TDim>()?.to_vec();
let cast: Result<Vec<f32>, GraphError> = vec
.par_iter()
.iter()
.map(|x| match x.to_i64() {
Ok(v) => Ok(v as f32),
Err(_) => match x.to_i64() {
@@ -1136,23 +1134,21 @@ pub fn new_op_from_onnx(
a: crate::circuit::utils::F32(exponent),
})
}
} else {
if let Some(c) = inputs[0].opkind().get_mutable_constant() {
inputs[0].decrement_use();
deleted_indices.push(0);
if c.raw_values.len() > 1 {
unimplemented!("only support scalar base")
}
let base = c.raw_values[0];
SupportedOp::Nonlinear(LookupOp::Exp {
scale: scale_to_multiplier(input_scales[1]).into(),
base: base.into(),
})
} else {
unimplemented!("only support constant base or pow for now")
} else if let Some(c) = inputs[0].opkind().get_mutable_constant() {
inputs[0].decrement_use();
deleted_indices.push(0);
if c.raw_values.len() > 1 {
unimplemented!("only support scalar base")
}
let base = c.raw_values[0];
SupportedOp::Nonlinear(LookupOp::Exp {
scale: scale_to_multiplier(input_scales[1]).into(),
base: base.into(),
})
} else {
unimplemented!("only support constant base or pow for now")
}
}
"Div" => {

View File

@@ -638,42 +638,44 @@ impl<T: Clone + TensorType> Tensor<T> {
where
T: Send + Sync,
{
if indices.is_empty() {
// Fast path: empty indices or full tensor slice
if indices.is_empty()
|| indices.iter().map(|x| x.end - x.start).collect::<Vec<_>>() == self.dims
{
return Ok(self.clone());
}
// Validate dimensions
if self.dims.len() < indices.len() {
return Err(TensorError::DimError(format!(
"The dimensionality of the slice {:?} is greater than the tensor's {:?}",
indices, self.dims
)));
} else if indices.iter().map(|x| x.end - x.start).collect::<Vec<_>>() == self.dims {
// else if slice is the same as dims, return self
return Ok(self.clone());
}
// if indices weren't specified we fill them in as required
let mut full_indices = indices.to_vec();
// Pre-allocate the full indices vector with capacity
let mut full_indices = Vec::with_capacity(self.dims.len());
full_indices.extend_from_slice(indices);
for i in 0..(self.dims.len() - indices.len()) {
full_indices.push(0..self.dims()[indices.len() + i])
}
// Fill remaining dimensions
full_indices.extend((indices.len()..self.dims.len()).map(|i| 0..self.dims[i]));
let cartesian_coord: Vec<Vec<usize>> = full_indices
// Pre-calculate total size and allocate result vector
let total_size: usize = full_indices
.iter()
.cloned()
.multi_cartesian_product()
.collect();
let res: Vec<T> = cartesian_coord
.par_iter()
.map(|e| {
let index = self.get_index(e);
self[index].clone()
})
.collect();
.map(|range| range.end - range.start)
.product();
let mut res = Vec::with_capacity(total_size);
// Calculate new dimensions once
let dims: Vec<usize> = full_indices.iter().map(|e| e.end - e.start).collect();
// Use iterator directly without collecting into intermediate Vec
for coord in full_indices.iter().cloned().multi_cartesian_product() {
let index = self.get_index(&coord);
res.push(self[index].clone());
}
Tensor::new(Some(&res), &dims)
}
@@ -831,7 +833,7 @@ impl<T: Clone + TensorType> Tensor<T> {
num_repeats: usize,
initial_offset: usize,
) -> Result<Tensor<T>, TensorError> {
let mut inner: Vec<T> = vec![];
let mut inner: Vec<T> = Vec::with_capacity(self.inner.len());
let mut offset = initial_offset;
for (i, elem) in self.inner.clone().into_iter().enumerate() {
if (i + offset + 1) % n == 0 {
@@ -860,20 +862,22 @@ impl<T: Clone + TensorType> Tensor<T> {
num_repeats: usize,
initial_offset: usize,
) -> Result<Tensor<T>, TensorError> {
let mut inner: Vec<T> = vec![];
let mut indices_to_remove = std::collections::HashSet::new();
for i in 0..self.inner.len() {
if (i + initial_offset + 1) % n == 0 {
for j in 1..(1 + num_repeats) {
indices_to_remove.insert(i + j);
}
}
}
// Pre-calculate capacity to avoid reallocations
let estimated_size = self.inner.len() - (self.inner.len() / n) * num_repeats;
let mut inner = Vec::with_capacity(estimated_size);
let old_inner = self.inner.clone();
for (i, elem) in old_inner.into_iter().enumerate() {
if !indices_to_remove.contains(&i) {
inner.push(elem.clone());
// Use iterator directly instead of creating intermediate collections
let mut i = 0;
while i < self.inner.len() {
// Add the current element
inner.push(self.inner[i].clone());
// If this is an nth position (accounting for offset)
if (i + initial_offset + 1) % n == 0 {
// Skip the next num_repeats elements
i += num_repeats + 1;
} else {
i += 1;
}
}

View File

@@ -1,12 +1,12 @@
use crate::{circuit::region::ConstantsMap, fieldutils::felt_to_integer_rep};
use maybe_rayon::slice::Iter;
use maybe_rayon::slice::{Iter, ParallelSlice};
use super::{
ops::{intercalate_values, pad, resize},
*,
};
use halo2_proofs::{arithmetic::Field, circuit::Cell, plonk::Instance};
use maybe_rayon::iter::{FilterMap, IntoParallelIterator, ParallelIterator};
use maybe_rayon::iter::{FilterMap, ParallelIterator};
pub(crate) fn create_constant_tensor<
F: PrimeField + TensorType + std::marker::Send + std::marker::Sync + PartialOrd,
@@ -455,7 +455,7 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
}
}
/// Returns the number of constants in the [ValTensor].
/// Returns an iterator over the [ValTensor]'s constants.
pub fn create_constants_map_iterator(
&self,
) -> FilterMap<Iter<'_, ValType<F>>, fn(&ValType<F>) -> Option<(F, ValType<F>)>> {
@@ -473,20 +473,48 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
}
}
/// Returns the number of constants in the [ValTensor].
/// Returns a map of the constants in the [ValTensor].
pub fn create_constants_map(&self) -> ConstantsMap<F> {
match self {
ValTensor::Value { inner, .. } => inner
.par_iter()
.filter_map(|x| {
if let ValType::Constant(v) = x {
Some((*v, x.clone()))
} else {
None
}
})
.collect(),
ValTensor::Instance { .. } => ConstantsMap::new(),
let threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < threshold {
match self {
ValTensor::Value { inner, .. } => inner
.par_iter()
.filter_map(|x| {
if let ValType::Constant(v) = x {
Some((*v, x.clone()))
} else {
None
}
})
.collect(),
ValTensor::Instance { .. } => ConstantsMap::new(),
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match self {
ValTensor::Value { inner, .. } => inner
.par_chunks(chunk_size)
.flat_map(|chunk| {
chunk
.par_iter() // Make sure we use par_iter() here
.filter_map(|x| {
if let ValType::Constant(v) = x {
Some((*v, x.clone()))
} else {
None
}
})
})
.collect(),
ValTensor::Instance { .. } => ConstantsMap::new(),
}
}
}
@@ -878,70 +906,161 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
/// remove constant zero values constants
pub fn remove_const_zero_values(&mut self) {
match self {
ValTensor::Value { inner: v, dims, .. } => {
*v = v
.clone()
.into_par_iter()
.filter_map(|e| {
if let ValType::Constant(r) = e {
if r == F::ZERO {
return None;
let size_threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < size_threshold {
match self {
ValTensor::Value { inner: v, dims, .. } => {
*v = v
.clone()
.into_iter()
.filter_map(|e| {
if let ValType::Constant(r) = e {
if r == F::ZERO {
return None;
}
} else if let ValType::AssignedConstant(_, r) = e {
if r == F::ZERO {
return None;
}
}
} else if let ValType::AssignedConstant(_, r) = e {
if r == F::ZERO {
return None;
}
}
Some(e)
})
.collect();
*dims = v.dims().to_vec();
Some(e)
})
.collect();
*dims = v.dims().to_vec();
}
ValTensor::Instance { .. } => {}
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match self {
ValTensor::Value { inner: v, dims, .. } => {
*v = v
.par_chunks_mut(chunk_size)
.flat_map(|chunk| {
chunk
.par_iter_mut() // Make sure we use par_iter() here
.filter_map(|e| {
if let ValType::Constant(r) = e {
if *r == F::ZERO {
return None;
}
} else if let ValType::AssignedConstant(_, r) = e {
if *r == F::ZERO {
return None;
}
}
Some(e.clone())
})
})
.collect();
*dims = v.dims().to_vec();
}
ValTensor::Instance { .. } => {}
}
ValTensor::Instance { .. } => {}
}
}
/// gets constants
/// filter constant zero values constants
pub fn get_const_zero_indices(&self) -> Vec<usize> {
match self {
ValTensor::Value { inner: v, .. } => v
.par_iter()
.enumerate()
.filter_map(|(i, e)| {
if let ValType::Constant(r) = e {
if *r == F::ZERO {
return Some(i);
let size_threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < size_threshold {
// Use single-threaded for smaller arrays
match &self {
ValTensor::Value { inner: v, .. } => v
.iter()
.enumerate()
.filter_map(|(i, e)| {
match e {
// Combine both match arms to reduce branching
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(i)
}
_ => None,
}
} else if let ValType::AssignedConstant(_, r) = e {
if *r == F::ZERO {
return Some(i);
}
}
None
})
.collect(),
ValTensor::Instance { .. } => vec![],
})
.collect(),
ValTensor::Instance { .. } => vec![],
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match &self {
ValTensor::Value { inner: v, .. } => v
.par_chunks(chunk_size)
.enumerate()
.flat_map(|(chunk_idx, chunk)| {
chunk
.par_iter() // Make sure we use par_iter() here
.enumerate()
.filter_map(move |(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
}
_ => None,
})
})
.collect::<Vec<_>>(),
ValTensor::Instance { .. } => vec![],
}
}
}
/// gets constants
/// gets constant indices
pub fn get_const_indices(&self) -> Vec<usize> {
match self {
ValTensor::Value { inner: v, .. } => v
.par_iter()
.enumerate()
.filter_map(|(i, e)| {
if let ValType::Constant(_) = e {
Some(i)
} else if let ValType::AssignedConstant(_, _) = e {
Some(i)
} else {
None
}
})
.collect(),
ValTensor::Instance { .. } => vec![],
let size_threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < size_threshold {
// Use single-threaded for smaller arrays
match &self {
ValTensor::Value { inner: v, .. } => v
.iter()
.enumerate()
.filter_map(|(i, e)| {
match e {
// Combine both match arms to reduce branching
ValType::Constant(_) | ValType::AssignedConstant(_, _) => Some(i),
_ => None,
}
})
.collect(),
ValTensor::Instance { .. } => vec![],
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match &self {
ValTensor::Value { inner: v, .. } => v
.par_chunks(chunk_size)
.enumerate()
.flat_map(|(chunk_idx, chunk)| {
chunk
.par_iter() // Make sure we use par_iter() here
.enumerate()
.filter_map(move |(i, e)| match e {
ValType::Constant(_) | ValType::AssignedConstant(_, _) => {
Some(chunk_idx * chunk_size + i)
}
_ => None,
})
})
.collect::<Vec<_>>(),
ValTensor::Instance { .. } => vec![],
}
}
}

View File

@@ -494,16 +494,56 @@ impl VarTensor {
}
}
/// Assigns specific values (`ValTensor`) to the columns of the inner tensor but allows for column wrapping for accumulated operations.
pub fn assign_with_duplication_unconstrained<
F: PrimeField + TensorType + PartialOrd + std::hash::Hash,
>(
&self,
region: &mut Region<F>,
offset: usize,
values: &ValTensor<F>,
constants: &mut ConstantsMap<F>,
) -> Result<(ValTensor<F>, usize), halo2_proofs::plonk::Error> {
match values {
ValTensor::Instance { .. } => unimplemented!("duplication is not supported on instance columns. increase K if you require more rows."),
ValTensor::Value { inner: v, dims , ..} => {
let duplication_freq = self.block_size();
let num_repeats = self.num_inner_cols();
let duplication_offset = offset;
// duplicates every nth element to adjust for column overflow
let v = v.duplicate_every_n(duplication_freq, num_repeats, duplication_offset).unwrap();
let mut res: ValTensor<F> = {
v.enum_map(|coord, k| {
let cell = self.assign_value(region, offset, k.clone(), coord, constants)?;
Ok::<_, halo2_proofs::plonk::Error>(cell)
})?.into()};
let total_used_len = res.len();
res.remove_every_n(duplication_freq, num_repeats, duplication_offset).unwrap();
res.reshape(dims).unwrap();
res.set_scale(values.scale());
Ok((res, total_used_len))
}
}
}
/// Assigns specific values (`ValTensor`) to the columns of the inner tensor but allows for column wrapping for accumulated operations.
/// Duplication occurs by copying the last cell of the column to the first cell next column and creating a copy constraint between the two.
pub fn assign_with_duplication<F: PrimeField + TensorType + PartialOrd + std::hash::Hash>(
pub fn assign_with_duplication_constrained<
F: PrimeField + TensorType + PartialOrd + std::hash::Hash,
>(
&self,
region: &mut Region<F>,
row: usize,
offset: usize,
values: &ValTensor<F>,
check_mode: &CheckMode,
single_inner_col: bool,
constants: &mut ConstantsMap<F>,
) -> Result<(ValTensor<F>, usize), halo2_proofs::plonk::Error> {
let mut prev_cell = None;
@@ -512,34 +552,16 @@ impl VarTensor {
ValTensor::Instance { .. } => unimplemented!("duplication is not supported on instance columns. increase K if you require more rows."),
ValTensor::Value { inner: v, dims , ..} => {
let duplication_freq = if single_inner_col {
self.col_size()
} else {
self.block_size()
};
let num_repeats = if single_inner_col {
1
} else {
self.num_inner_cols()
};
let duplication_offset = if single_inner_col {
row
} else {
offset
};
let duplication_freq = self.col_size();
let num_repeats = 1;
let duplication_offset = row;
// duplicates every nth element to adjust for column overflow
let v = v.duplicate_every_n(duplication_freq, num_repeats, duplication_offset).unwrap();
let mut res: ValTensor<F> = {
v.enum_map(|coord, k| {
let step = if !single_inner_col {
1
} else {
self.num_inner_cols()
};
let step = self.num_inner_cols();
let (x, y, z) = self.cartesian_coord(offset + coord * step);
if matches!(check_mode, CheckMode::SAFE) && coord > 0 && z == 0 && y == 0 {
@@ -549,11 +571,13 @@ impl VarTensor {
let cell = self.assign_value(region, offset, k.clone(), coord * step, constants)?;
if single_inner_col {
if z == 0 {
let at_end_of_column = z == duplication_freq - 1;
let at_beginning_of_column = z == 0;
if at_end_of_column {
// if we are at the end of the column, we need to copy the cell to the next column
prev_cell = Some(cell.clone());
} else if coord > 0 && z == 0 && single_inner_col {
} else if coord > 0 && at_beginning_of_column {
if let Some(prev_cell) = prev_cell.as_ref() {
let cell = cell.cell().ok_or({
error!("Error getting cell: {:?}", (x,y));
@@ -563,10 +587,10 @@ impl VarTensor {
halo2_proofs::plonk::Error::Synthesis})?;
region.constrain_equal(prev_cell,cell)?;
} else {
error!("Error copy-constraining previous value: {:?}", (x,y));
error!("Previous cell was not set");
return Err(halo2_proofs::plonk::Error::Synthesis);
}
}}
}
Ok(cell)
@@ -577,20 +601,6 @@ impl VarTensor {
res.reshape(dims).unwrap();
res.set_scale(values.scale());
if matches!(check_mode, CheckMode::SAFE) {
// during key generation this will be 0 so we use this as a flag to check
// TODO: this isn't very safe and would be better to get the phase directly
let res_evals = res.int_evals().unwrap();
let is_assigned = res_evals
.iter()
.all(|&x| x == 0);
if !is_assigned {
assert_eq!(
values.int_evals().unwrap(),
res_evals
)};
}
Ok((res, total_used_len))
}
}