Compare commits

..

7 Commits

Author SHA1 Message Date
dante
d48d0b0b3e fix: get_slice should not use intermediate Vec (#894) 2024-12-27 23:26:22 -05:00
Jseam
8b223354cc fix: add version string and sed (#893) 2024-12-27 14:24:28 -05:00
dante
caa6ef8e16 fix: const filtering strat is size dependent (#891) 2024-12-27 09:43:59 -05:00
Artem
c4354c10a5 fix: ios bindings update action (#886) 2024-12-16 10:49:13 -05:00
dante
c1ce8c88d0 chore: rm wasm serialization checks (#890) 2024-12-12 22:20:29 -05:00
dante
876a9584a1 chore: optimize wasm bundle for speed over size (#889) 2024-12-12 15:35:17 -05:00
dante
7d7f049cc4 chore: neural bag of words example (#888) 2024-12-12 14:20:21 -05:00
15 changed files with 1300 additions and 221 deletions

View File

@@ -34,6 +34,7 @@ jobs:
run: |
mv pyproject.toml pyproject.toml.orig
sed "s/ezkl/ezkl-gpu/" pyproject.toml.orig >pyproject.toml
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml
- uses: actions-rs/toolchain@v1
with:

View File

@@ -233,6 +233,14 @@ jobs:
python-version: 3.12
architecture: x64
- name: Set pyproject.toml version to match github tag
shell: bash
env:
RELEASE_TAG: ${{ github.ref_name }}
run: |
mv pyproject.toml pyproject.toml.orig
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml
- name: Set Cargo.toml version to match github tag
shell: bash
env:
@@ -242,7 +250,6 @@ jobs:
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.toml.orig >Cargo.toml
mv Cargo.lock Cargo.lock.orig
sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.lock.orig >Cargo.lock
- name: Install required libraries
shell: bash
run: |

View File

@@ -650,7 +650,9 @@ jobs:
run: python -m venv .env --clear; source .env/bin/activate; pip install -r requirements.txt; python -m ensurepip --upgrade
- name: Build python ezkl
run: source .env/bin/activate; unset CONDA_PREFIX; maturin develop --features python-bindings --release
- name: Postgres tutorials
- name: Neural bow
run: source .env/bin/activate; cargo nextest run py_tests::tests::neural_bag_of_words_ --no-capture
- name: Felt conversion
run: source .env/bin/activate; cargo nextest run py_tests::tests::felt_conversion_test_ --no-capture
- name: Postgres tutorials
run: source .env/bin/activate; cargo nextest run py_tests::tests::postgres_ --no-capture

129
.github/workflows/swift-pm.yml vendored Normal file
View File

@@ -0,0 +1,129 @@
name: Build and Publish EZKL iOS SPM package
on:
push:
tags:
# Only support SemVer versioning tags
- 'v[0-9]+.[0-9]+.[0-9]+'
- '[0-9]+.[0-9]+.[0-9]+'
jobs:
build-and-update:
runs-on: macos-latest
env:
EZKL_SWIFT_PACKAGE_REPO: github.com/zkonduit/ezkl-swift-package.git
steps:
- name: Checkout EZKL
uses: actions/checkout@v3
- name: Extract TAG from github.ref_name
run: |
# github.ref_name is provided by GitHub Actions and contains the tag name directly.
TAG="${{ github.ref_name }}"
echo "Original TAG: $TAG"
# Remove leading 'v' if present to match the Swift Package Manager version format.
NEW_TAG=${TAG#v}
echo "Stripped TAG: $NEW_TAG"
echo "TAG=$NEW_TAG" >> $GITHUB_ENV
- name: Install Rust (nightly)
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Build EzklCoreBindings
run: CONFIGURATION=release cargo run --bin ios_gen_bindings --features "ios-bindings uuid camino uniffi_bindgen" --no-default-features
- name: Clone ezkl-swift-package repository
run: |
git clone https://${{ env.EZKL_SWIFT_PACKAGE_REPO }}
- name: Copy EzklCoreBindings
run: |
rm -rf ezkl-swift-package/Sources/EzklCoreBindings
cp -r build/EzklCoreBindings ezkl-swift-package/Sources/
- name: Copy Test Files
run: |
rm -rf ezkl-swift-package/Tests/EzklAssets/*
cp tests/assets/kzg ezkl-swift-package/Tests/EzklAssets/kzg.srs
cp tests/assets/input.json ezkl-swift-package/Tests/EzklAssets/input.json
cp tests/assets/model.compiled ezkl-swift-package/Tests/EzklAssets/network.ezkl
cp tests/assets/settings.json ezkl-swift-package/Tests/EzklAssets/settings.json
- name: Check for changes
id: check_changes
run: |
cd ezkl-swift-package
if git diff --quiet Sources/EzklCoreBindings Tests/EzklAssets; then
echo "no_changes=true" >> $GITHUB_OUTPUT
else
echo "no_changes=false" >> $GITHUB_OUTPUT
fi
- name: Set up Xcode environment
if: steps.check_changes.outputs.no_changes == 'false'
run: |
sudo xcode-select -s /Applications/Xcode.app/Contents/Developer
sudo xcodebuild -license accept
- name: Run Package Tests
if: steps.check_changes.outputs.no_changes == 'false'
run: |
cd ezkl-swift-package
xcodebuild test \
-scheme EzklPackage \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-resultBundlePath ../testResults
- name: Run Example App Tests
if: steps.check_changes.outputs.no_changes == 'false'
run: |
cd ezkl-swift-package/Example
xcodebuild test \
-project Example.xcodeproj \
-scheme EzklApp \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-parallel-testing-enabled NO \
-resultBundlePath ../../exampleTestResults \
-skip-testing:EzklAppUITests/EzklAppUITests/testButtonClicksInOrder
- name: Setup Git
run: |
cd ezkl-swift-package
git config user.name "GitHub Action"
git config user.email "action@github.com"
git remote set-url origin https://zkonduit:${EZKL_SWIFT_PACKAGE_REPO_TOKEN}@${{ env.EZKL_SWIFT_PACKAGE_REPO }}
env:
EZKL_SWIFT_PACKAGE_REPO_TOKEN: ${{ secrets.EZKL_PORTER_TOKEN }}
- name: Commit and Push Changes
if: steps.check_changes.outputs.no_changes == 'false'
run: |
cd ezkl-swift-package
git add Sources/EzklCoreBindings Tests/EzklAssets
git commit -m "Automatically updated EzklCoreBindings for EZKL"
if ! git push origin; then
echo "::error::Failed to push changes to ${{ env.EZKL_SWIFT_PACKAGE_REPO }}. Please ensure that EZKL_PORTER_TOKEN has the correct permissions."
exit 1
fi
- name: Tag the latest commit
run: |
cd ezkl-swift-package
source $GITHUB_ENV
# Tag the latest commit on the current branch
if git rev-parse "$TAG" >/dev/null 2>&1; then
echo "Tag $TAG already exists locally. Skipping tag creation."
else
git tag "$TAG"
fi
if ! git push origin "$TAG"; then
echo "::error::Failed to push tag '$TAG' to ${{ env.EZKL_SWIFT_PACKAGE_REPO }}. Please ensure EZKL_PORTER_TOKEN has correct permissions."
exit 1
fi

View File

@@ -1,85 +0,0 @@
name: Build and Publish EZKL iOS SPM package
on:
workflow_dispatch:
inputs:
tag:
description: "The tag to release"
required: true
push:
tags:
- "*"
jobs:
build-and-update:
runs-on: macos-latest
steps:
- name: Checkout EZKL
uses: actions/checkout@v3
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: nightly
override: true
- name: Build EzklCoreBindings
run: CONFIGURATION=release cargo run --bin ios_gen_bindings --features "ios-bindings uuid camino uniffi_bindgen" --no-default-features
- name: Clone ezkl-swift-package repository
run: |
git clone https://github.com/zkonduit/ezkl-swift-package.git
- name: Copy EzklCoreBindings
run: |
rm -rf ezkl-swift-package/Sources/EzklCoreBindings
cp -r build/EzklCoreBindings ezkl-swift-package/Sources/
- name: Copy Test Files
run: |
rm -rf ezkl-swift-package/Tests/EzklAssets/*
cp tests/assets/kzg ezkl-swift-package/Tests/EzklAssets/kzg.srs
cp tests/assets/input.json ezkl-swift-package/Tests/EzklAssets/input.json
cp tests/assets/model.compiled ezkl-swift-package/Tests/EzklAssets/network.ezkl
cp tests/assets/settings.json ezkl-swift-package/Tests/EzklAssets/settings.json
- name: Set up Xcode environment
run: |
sudo xcode-select -s /Applications/Xcode.app/Contents/Developer
sudo xcodebuild -license accept
- name: Run Package Tests
run: |
cd ezkl-swift-package
xcodebuild test \
-scheme EzklPackage \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-resultBundlePath ../testResults
- name: Run Example App Tests
run: |
cd ezkl-swift-package/Example
xcodebuild test \
-project Example.xcodeproj \
-scheme EzklApp \
-destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-parallel-testing-enabled NO \
-resultBundlePath ../../exampleTestResults \
-skip-testing:EzklAppUITests/EzklAppUITests/testButtonClicksInOrder
- name: Commit and Push Changes to feat/ezkl-direct-integration
run: |
cd ezkl-swift-package
git config user.name "GitHub Action"
git config user.email "action@github.com"
git add Sources/EzklCoreBindings
git add Tests/EzklAssets
git commit -m "Automatically updated EzklCoreBindings for EZKL"
git tag ${{ github.event.inputs.tag }}
git remote set-url origin https://zkonduit:${EZKL_PORTER_TOKEN}@github.com/zkonduit/ezkl-swift-package.git
git push origin
git push origin tag ${{ github.event.inputs.tag }}
env:
EZKL_PORTER_TOKEN: ${{ secrets.EZKL_PORTER_TOKEN }}

View File

@@ -147,6 +147,10 @@ shellexpand = "3.1.0"
runner = 'wasm-bindgen-test-runner'
[[bench]]
name = "zero_finder"
harness = false
[[bench]]
name = "accum_dot"
harness = false
@@ -286,3 +290,11 @@ rustflags = ["-C", "relocation-model=pic"]
lto = "fat"
codegen-units = 1
# panic = "abort"
[package.metadata.wasm-pack.profile.release]
wasm-opt = [
"-O4",
"--flexible-inline-max-function-size",
"4294967295",
]

116
benches/zero_finder.rs Normal file
View File

@@ -0,0 +1,116 @@
use std::thread;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use halo2curves::{bn256::Fr as F, ff::Field};
use maybe_rayon::{
iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator},
slice::ParallelSlice,
};
use rand::Rng;
// Assuming these are your types
#[derive(Clone)]
enum ValType {
Constant(F),
AssignedConstant(usize, F),
Other,
}
// Helper to generate test data
fn generate_test_data(size: usize, zero_probability: f64) -> Vec<ValType> {
let mut rng = rand::thread_rng();
(0..size)
.map(|_i| {
if rng.gen::<f64>() < zero_probability {
ValType::Constant(F::ZERO)
} else {
ValType::Constant(F::ONE) // Or some other non-zero value
}
})
.collect()
}
fn bench_zero_finding(c: &mut Criterion) {
let sizes = [
1_000, // 1K
10_000, // 10K
100_000, // 100K
256 * 256 * 2, // Our specific case
1_000_000, // 1M
10_000_000, // 10M
];
let zero_probability = 0.1; // 10% zeros
let mut group = c.benchmark_group("zero_finding");
group.sample_size(10); // Adjust based on your needs
for &size in &sizes {
let data = generate_test_data(size, zero_probability);
// Benchmark sequential version
group.bench_function(format!("sequential_{}", size), |b| {
b.iter(|| {
let result = data
.iter()
.enumerate()
.filter_map(|(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(i)
}
_ => None,
})
.collect::<Vec<_>>();
black_box(result)
})
});
// Benchmark parallel version
group.bench_function(format!("parallel_{}", size), |b| {
b.iter(|| {
let result = data
.par_iter()
.enumerate()
.filter_map(|(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(i)
}
_ => None,
})
.collect::<Vec<_>>();
black_box(result)
})
});
// Benchmark chunked parallel version
group.bench_function(format!("chunked_parallel_{}", size), |b| {
b.iter(|| {
let num_cores = thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (size / num_cores).max(100);
let result = data
.par_chunks(chunk_size)
.enumerate()
.flat_map(|(chunk_idx, chunk)| {
chunk
.par_iter() // Make sure we use par_iter() here
.enumerate()
.filter_map(move |(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
}
_ => None,
})
})
.collect::<Vec<_>>();
black_box(result)
})
});
}
group.finish();
}
criterion_group!(benches, bench_zero_finding);
criterion_main!(benches);

View File

@@ -0,0 +1,766 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"This is a zk version of the tutorial found [here](https://github.com/bentrevett/pytorch-sentiment-analysis/blob/main/1%20-%20Neural%20Bag%20of%20Words.ipynb). The original tutorial is part of the PyTorch Sentiment Analysis series by Ben Trevett.\n",
"\n",
"1 - NBoW\n",
"\n",
"In this series we'll be building a machine learning model to perform sentiment analysis -- a subset of text classification where the task is to detect if a given sentence is positive or negative -- using PyTorch and torchtext. The dataset used will be movie reviews from the IMDb dataset, which we'll obtain using the datasets library.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"Preparing Data\n",
"\n",
"Before we can implement our NBoW model, we first have to perform quite a few steps to get our data ready to use. NLP usually requires quite a lot of data wrangling beforehand, though libraries such as datasets and torchtext handle most of this for us.\n",
"\n",
"The steps to take are:\n",
"\n",
" 1. importing modules\n",
" 2. loading data\n",
" 3. tokenizing data\n",
" 4. creating data splits\n",
" 5. creating a vocabulary\n",
" 6. numericalizing data\n",
" 7. creating the data loaders\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install torchtex"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import collections\n",
"\n",
"import datasets\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.optim as optim\n",
"import torchtext\n",
"import tqdm\n",
"\n",
"# It is usually good practice to run your experiments multiple times with different random seeds -- both to measure the variance of your model and also to avoid having results only calculated with either \"good\" or \"bad\" seeds, i.e. being very lucky or unlucky with the randomness in the training process.\n",
"\n",
"seed = 1234\n",
"\n",
"np.random.seed(seed)\n",
"torch.manual_seed(seed)\n",
"torch.cuda.manual_seed(seed)\n",
"torch.backends.cudnn.deterministic = True\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_data, test_data = datasets.load_dataset(\"imdb\", split=[\"train\", \"test\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can check the features attribute of a split to get more information about the features. We can see that text is a Value of dtype=string -- in other words, it's a string -- and that label is a ClassLabel. A ClassLabel means the feature is an integer representation of which class the example belongs to. num_classes=2 means that our labels are one of two values, 0 or 1, and names=['neg', 'pos'] gives us the human-readable versions of those values. Thus, a label of 0 means the example is a negative review and a label of 1 means the example is a positive review."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_data.features\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"train_data[0]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"One of the first things we need to do to our data is tokenize it. Machine learning models aren't designed to handle strings, they're design to handle numbers. So what we need to do is break down our string into individual tokens, and then convert these tokens to numbers. We'll get to the conversion later, but first we'll look at tokenization.\n",
"\n",
"Tokenization involves using a tokenizer to process the strings in our dataset. A tokenizer is a function that goes from a string to a list of strings. There are many types of tokenizers available, but we're going to use a relatively simple one provided by torchtext called the basic_english tokenizer. We load our tokenizer as such:"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"tokenizer = torchtext.data.utils.get_tokenizer(\"basic_english\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def tokenize_example(example, tokenizer, max_length):\n",
" tokens = tokenizer(example[\"text\"])[:max_length]\n",
" return {\"tokens\": tokens}\n",
"\n",
"\n",
"max_length = 256\n",
"\n",
"train_data = train_data.map(\n",
" tokenize_example, fn_kwargs={\"tokenizer\": tokenizer, \"max_length\": max_length}\n",
")\n",
"test_data = test_data.map(\n",
" tokenize_example, fn_kwargs={\"tokenizer\": tokenizer, \"max_length\": max_length}\n",
")\n",
"\n",
"\n",
"# create validation data \n",
"# Why have both a validation set and a test set? Your test set respresents the real world data that you'd see if you actually deployed this model. You won't be able to see what data your model will be fed once deployed, and your test set is supposed to reflect that. Every time we tune our model hyperparameters or training set-up to make it do a bit better on the test set, we are leak information from the test set into the training process. If we do this too often then we begin to overfit on the test set. Hence, we need some data which can act as a \"proxy\" test set which we can look at more frequently in order to evaluate how well our model actually does on unseen data -- this is the validation set.\n",
"\n",
"test_size = 0.25\n",
"\n",
"train_valid_data = train_data.train_test_split(test_size=test_size)\n",
"train_data = train_valid_data[\"train\"]\n",
"valid_data = train_valid_data[\"test\"]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Next, we have to build a vocabulary. This is look-up table where every unique token in your dataset has a corresponding index (an integer).\n",
"\n",
"We do this as machine learning models cannot operate on strings, only numerical vaslues. Each index is used to construct a one-hot vector for each token. A one-hot vector is a vector where all the elements are 0, except one, which is 1, and the dimensionality is the total number of unique tokens in your vocabulary, commonly denoted by V."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"min_freq = 5\n",
"special_tokens = [\"<unk>\", \"<pad>\"]\n",
"\n",
"vocab = torchtext.vocab.build_vocab_from_iterator(\n",
" train_data[\"tokens\"],\n",
" min_freq=min_freq,\n",
" specials=special_tokens,\n",
")\n",
"\n",
"# We store the indices of the unknown and padding tokens (zero and one, respectively) in variables, as we'll use these further on in this notebook.\n",
"\n",
"unk_index = vocab[\"<unk>\"]\n",
"pad_index = vocab[\"<pad>\"]\n",
"\n",
"\n",
"vocab.set_default_index(unk_index)\n",
"\n",
"# To look-up a list of tokens, we can use the vocabulary's lookup_indices method.\n",
"vocab.lookup_indices([\"hello\", \"world\", \"some_token\", \"<pad>\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we have our vocabulary, we can numericalize our data. This involves converting the tokens within our dataset into indices. Similar to how we tokenized our data using the Dataset.map method, we'll define a function that takes an example and our vocabulary, gets the index for each token in each example and then creates an ids field which containes the numericalized tokens."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def numericalize_example(example, vocab):\n",
" ids = vocab.lookup_indices(example[\"tokens\"])\n",
" return {\"ids\": ids}\n",
"\n",
"train_data = train_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
"valid_data = valid_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
"test_data = test_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
"\n",
"train_data = train_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n",
"valid_data = valid_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n",
"test_data = test_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The final step of preparing the data is creating the data loaders. We can iterate over a data loader to retrieve batches of examples. This is also where we will perform any padding that is necessary.\n",
"\n",
"We first need to define a function to collate a batch, consisting of a list of examples, into what we want our data loader to output.\n",
"\n",
"Here, our desired output from the data loader is a dictionary with keys of \"ids\" and \"label\".\n",
"\n",
"The value of batch[\"ids\"] should be a tensor of shape [batch size, length], where length is the length of the longest sentence (in terms of tokens) within the batch, and all sentences shorter than this should be padded to that length.\n",
"\n",
"The value of batch[\"label\"] should be a tensor of shape [batch size] consisting of the label for each sentence in the batch.\n",
"\n",
"We define a function, get_collate_fn, which is passed the pad token index and returns the actual collate function. Within the actual collate function, collate_fn, we get a list of \"ids\" tensors for each example in the batch, and then use the pad_sequence function, which converts the list of tensors into the desired [batch size, length] shaped tensor and performs padding using the specified pad_index. By default, pad_sequence will return a [length, batch size] shaped tensor, but by setting batch_first=True, these two dimensions are switched. We get a list of \"label\" tensors and convert the list of tensors into a single [batch size] shaped tensor."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_collate_fn(pad_index):\n",
" def collate_fn(batch):\n",
" batch_ids = [i[\"ids\"] for i in batch]\n",
" batch_ids = nn.utils.rnn.pad_sequence(\n",
" batch_ids, padding_value=pad_index, batch_first=True\n",
" )\n",
" batch_label = [i[\"label\"] for i in batch]\n",
" batch_label = torch.stack(batch_label)\n",
" batch = {\"ids\": batch_ids, \"label\": batch_label}\n",
" return batch\n",
"\n",
" return collate_fn\n",
"\n",
"def get_data_loader(dataset, batch_size, pad_index, shuffle=False):\n",
" collate_fn = get_collate_fn(pad_index)\n",
" data_loader = torch.utils.data.DataLoader(\n",
" dataset=dataset,\n",
" batch_size=batch_size,\n",
" collate_fn=collate_fn,\n",
" shuffle=shuffle,\n",
" )\n",
" return data_loader\n",
"\n",
"\n",
"batch_size = 512\n",
"\n",
"train_data_loader = get_data_loader(train_data, batch_size, pad_index, shuffle=True)\n",
"valid_data_loader = get_data_loader(valid_data, batch_size, pad_index)\n",
"test_data_loader = get_data_loader(test_data, batch_size, pad_index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"class NBoW(nn.Module):\n",
" def __init__(self, vocab_size, embedding_dim, output_dim, pad_index):\n",
" super().__init__()\n",
" self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_index)\n",
" self.fc = nn.Linear(embedding_dim, output_dim)\n",
"\n",
" def forward(self, ids):\n",
" # ids = [batch size, seq len]\n",
" embedded = self.embedding(ids)\n",
" # embedded = [batch size, seq len, embedding dim]\n",
" pooled = embedded.mean(dim=1)\n",
" # pooled = [batch size, embedding dim]\n",
" prediction = self.fc(pooled)\n",
" # prediction = [batch size, output dim]\n",
" return prediction\n",
"\n",
"\n",
"vocab_size = len(vocab)\n",
"embedding_dim = 300\n",
"output_dim = len(train_data.unique(\"label\"))\n",
"\n",
"model = NBoW(vocab_size, embedding_dim, output_dim, pad_index)\n",
"\n",
"def count_parameters(model):\n",
" return sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
"\n",
"\n",
"print(f\"The model has {count_parameters(model):,} trainable parameters\")\n",
"\n",
"vectors = torchtext.vocab.GloVe()\n",
"\n",
"pretrained_embedding = vectors.get_vecs_by_tokens(vocab.get_itos())\n",
"\n",
"optimizer = optim.Adam(model.parameters())\n",
"\n",
"criterion = nn.CrossEntropyLoss()\n",
"\n",
"device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
"\n",
"model = model.to(device)\n",
"criterion = criterion.to(device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def train(data_loader, model, criterion, optimizer, device):\n",
" model.train()\n",
" epoch_losses = []\n",
" epoch_accs = []\n",
" for batch in tqdm.tqdm(data_loader, desc=\"training...\"):\n",
" ids = batch[\"ids\"].to(device)\n",
" label = batch[\"label\"].to(device)\n",
" prediction = model(ids)\n",
" loss = criterion(prediction, label)\n",
" accuracy = get_accuracy(prediction, label)\n",
" optimizer.zero_grad()\n",
" loss.backward()\n",
" optimizer.step()\n",
" epoch_losses.append(loss.item())\n",
" epoch_accs.append(accuracy.item())\n",
" return np.mean(epoch_losses), np.mean(epoch_accs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def evaluate(data_loader, model, criterion, device):\n",
" model.eval()\n",
" epoch_losses = []\n",
" epoch_accs = []\n",
" with torch.no_grad():\n",
" for batch in tqdm.tqdm(data_loader, desc=\"evaluating...\"):\n",
" ids = batch[\"ids\"].to(device)\n",
" label = batch[\"label\"].to(device)\n",
" prediction = model(ids)\n",
" loss = criterion(prediction, label)\n",
" accuracy = get_accuracy(prediction, label)\n",
" epoch_losses.append(loss.item())\n",
" epoch_accs.append(accuracy.item())\n",
" return np.mean(epoch_losses), np.mean(epoch_accs)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def get_accuracy(prediction, label):\n",
" batch_size, _ = prediction.shape\n",
" predicted_classes = prediction.argmax(dim=-1)\n",
" correct_predictions = predicted_classes.eq(label).sum()\n",
" accuracy = correct_predictions / batch_size\n",
" return accuracy"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"n_epochs = 10\n",
"best_valid_loss = float(\"inf\")\n",
"\n",
"metrics = collections.defaultdict(list)\n",
"\n",
"for epoch in range(n_epochs):\n",
" train_loss, train_acc = train(\n",
" train_data_loader, model, criterion, optimizer, device\n",
" )\n",
" valid_loss, valid_acc = evaluate(valid_data_loader, model, criterion, device)\n",
" metrics[\"train_losses\"].append(train_loss)\n",
" metrics[\"train_accs\"].append(train_acc)\n",
" metrics[\"valid_losses\"].append(valid_loss)\n",
" metrics[\"valid_accs\"].append(valid_acc)\n",
" if valid_loss < best_valid_loss:\n",
" best_valid_loss = valid_loss\n",
" torch.save(model.state_dict(), \"nbow.pt\")\n",
" print(f\"epoch: {epoch}\")\n",
" print(f\"train_loss: {train_loss:.3f}, train_acc: {train_acc:.3f}\")\n",
" print(f\"valid_loss: {valid_loss:.3f}, valid_acc: {valid_acc:.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(10, 6))\n",
"ax = fig.add_subplot(1, 1, 1)\n",
"ax.plot(metrics[\"train_losses\"], label=\"train loss\")\n",
"ax.plot(metrics[\"valid_losses\"], label=\"valid loss\")\n",
"ax.set_xlabel(\"epoch\")\n",
"ax.set_ylabel(\"loss\")\n",
"ax.set_xticks(range(n_epochs))\n",
"ax.legend()\n",
"ax.grid()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"fig = plt.figure(figsize=(10, 6))\n",
"ax = fig.add_subplot(1, 1, 1)\n",
"ax.plot(metrics[\"train_accs\"], label=\"train accuracy\")\n",
"ax.plot(metrics[\"valid_accs\"], label=\"valid accuracy\")\n",
"ax.set_xlabel(\"epoch\")\n",
"ax.set_ylabel(\"loss\")\n",
"ax.set_xticks(range(n_epochs))\n",
"ax.legend()\n",
"ax.grid()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"model.load_state_dict(torch.load(\"nbow.pt\"))\n",
"\n",
"test_loss, test_acc = evaluate(test_data_loader, model, criterion, device)\n",
"\n",
"print(f\"test_loss: {test_loss:.3f}, test_acc: {test_acc:.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def predict_sentiment(text, model, tokenizer, vocab, device):\n",
" tokens = tokenizer(text)\n",
" ids = vocab.lookup_indices(tokens)\n",
" tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)\n",
" prediction = model(tensor).squeeze(dim=0)\n",
" probability = torch.softmax(prediction, dim=-1)\n",
" predicted_class = prediction.argmax(dim=-1).item()\n",
" predicted_probability = probability[predicted_class].item()\n",
" return predicted_class, predicted_probability"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = \"This film is terrible!\"\n",
"\n",
"predict_sentiment(text, model, tokenizer, vocab, device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = \"This film is great!\"\n",
"\n",
"predict_sentiment(text, model, tokenizer, vocab, device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = \"This film is not terrible, it's great!\"\n",
"\n",
"predict_sentiment(text, model, tokenizer, vocab, device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"text = \"This film is not great, it's terrible!\"\n",
"\n",
"predict_sentiment(text, model, tokenizer, vocab, device)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def text_to_tensor(text, tokenizer, vocab, device):\n",
" tokens = tokenizer(text)\n",
" ids = vocab.lookup_indices(tokens)\n",
" tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)\n",
" return tensor\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we do onnx stuff to get the data ready for the zk-circuit."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"import json\n",
"\n",
"text = \"This film is terrible!\"\n",
"x = text_to_tensor(text, tokenizer, vocab, device)\n",
"\n",
"# Flips the neural net into inference mode\n",
"model.eval()\n",
"model.to('cpu')\n",
"\n",
"model_path = \"network.onnx\"\n",
"data_path = \"input.json\"\n",
"\n",
" # Export the model\n",
"torch.onnx.export(model, # model being run\n",
" x, # model input (or a tuple for multiple inputs)\n",
" model_path, # where to save the model (can be a file or file-like object)\n",
" export_params=True, # store the trained parameter weights inside the model file\n",
" opset_version=10, # the ONNX version to export the model to\n",
" do_constant_folding=True, # whether to execute constant folding for optimization\n",
" input_names = ['input'], # the model's input names\n",
" output_names = ['output'], # the model's output names\n",
" dynamic_axes={'input' : {0 : 'batch_size'}, # variable length axes\n",
" 'output' : {0 : 'batch_size'}})\n",
"\n",
"\n",
"\n",
"data_array = ((x).detach().numpy()).reshape([-1]).tolist()\n",
"\n",
"data_json = dict(input_data = [data_array])\n",
"\n",
"print(data_json)\n",
"\n",
" # Serialize data into file:\n",
"json.dump(data_json, open(data_path, 'w'))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import ezkl\n",
"\n",
"run_args = ezkl.PyRunArgs()\n",
"run_args.logrows = 23\n",
"run_args.scale_rebase_multiplier = 10\n",
"# inputs should be auditable by all\n",
"run_args.input_visibility = \"public\"\n",
"# same with outputs\n",
"run_args.output_visibility = \"public\"\n",
"# for simplicity, we'll just use the fixed model visibility: i.e it is public and can't be changed by the prover\n",
"run_args.param_visibility = \"fixed\"\n",
"\n",
"\n",
"# TODO: Dictionary outputs\n",
"res = ezkl.gen_settings(py_run_args=run_args)\n",
"assert res == True\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res = ezkl.compile_circuit()\n",
"assert res == True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# srs path\n",
"res = await ezkl.get_srs()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# now generate the witness file\n",
"res = await ezkl.gen_witness()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res = ezkl.mock()\n",
"assert res == True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# HERE WE SETUP THE CIRCUIT PARAMS\n",
"# WE GOT KEYS\n",
"# WE GOT CIRCUIT PARAMETERS\n",
"# EVERYTHING ANYONE HAS EVER NEEDED FOR ZK\n",
"\n",
"res = ezkl.setup()\n",
"\n",
"assert res == True"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# GENERATE A PROOF\n",
"res = ezkl.prove(proof_path=\"proof.json\")\n",
"\n",
"print(res)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# VERIFY IT\n",
"res = ezkl.verify()\n",
"\n",
"assert res == True\n",
"print(\"verified\")\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"We can also verify it on chain by creating an onchain verifier"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# check if notebook is in colab\n",
"try:\n",
" import google.colab\n",
" import subprocess\n",
" import sys\n",
" subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"solc-select\"])\n",
" !solc-select install 0.8.20\n",
" !solc-select use 0.8.20\n",
" !solc --version\n",
" import os\n",
"\n",
"# rely on local installation if the notebook is not in colab\n",
"except:\n",
" import os\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"res = await ezkl.create_evm_verifier()\n",
"assert res == True\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"You should see a `Verifier.sol`. Right-click and save it locally.\n",
"\n",
"Now go to [https://remix.ethereum.org](https://remix.ethereum.org).\n",
"\n",
"Create a new file within remix and copy the verifier code over.\n",
"\n",
"Finally, compile the code and deploy. For the demo you can deploy to the test environment within remix.\n",
"\n",
"If everything works, you would have deployed your verifer onchain! Copy the values in the cell above to the respective fields to test if the verifier is working."
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -12,6 +12,7 @@ asyncio_mode = "auto"
[project]
name = "ezkl"
version = "0.0.0"
requires-python = ">=3.7"
classifiers = [
"Programming Language :: Rust",

View File

@@ -141,10 +141,11 @@ pub(crate) fn gen_vk(
.map_err(|e| EZKLError::InternalError(format!("Failed to create verifying key: {}", e)))?;
let mut serialized_vk = Vec::new();
vk.write(&mut serialized_vk, halo2_proofs::SerdeFormat::RawBytes)
.map_err(|e| {
EZKLError::InternalError(format!("Failed to serialize verifying key: {}", e))
})?;
vk.write(
&mut serialized_vk,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
)
.map_err(|e| EZKLError::InternalError(format!("Failed to serialize verifying key: {}", e)))?;
Ok(serialized_vk)
}
@@ -165,7 +166,7 @@ pub(crate) fn gen_pk(
let mut reader = BufReader::new(&vk[..]);
let vk = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit.settings().clone(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize verifying key: {}", e)))?;
@@ -197,7 +198,7 @@ pub(crate) fn verify(
let mut reader = BufReader::new(&vk[..]);
let vk = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit_settings.clone(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize vk: {}", e)))?;
@@ -277,7 +278,7 @@ pub(crate) fn verify_aggr(
let mut reader = BufReader::new(&vk[..]);
let vk = VerifyingKey::<G1Affine>::read::<_, AggregationCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize vk: {}", e)))?;
@@ -365,7 +366,7 @@ pub(crate) fn prove(
let mut reader = BufReader::new(&pk[..]);
let pk = ProvingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit.settings().clone(),
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize proving key: {}", e)))?;
@@ -487,7 +488,7 @@ pub(crate) fn vk_validation(vk: Vec<u8>, settings: Vec<u8>) -> Result<bool, EZKL
let mut reader = BufReader::new(&vk[..]);
let _ = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit_settings,
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize verifying key: {}", e)))?;
@@ -504,7 +505,7 @@ pub(crate) fn pk_validation(pk: Vec<u8>, settings: Vec<u8>) -> Result<bool, EZKL
let mut reader = BufReader::new(&pk[..]);
let _ = ProvingKey::<G1Affine>::read::<_, GraphCircuit>(
&mut reader,
halo2_proofs::SerdeFormat::RawBytes,
halo2_proofs::SerdeFormat::RawBytesUnchecked,
circuit_settings,
)
.map_err(|e| EZKLError::InternalError(format!("Failed to deserialize proving key: {}", e)))?;

View File

@@ -488,7 +488,8 @@ pub async fn deploy_da_verifier_via_solidity(
}
}
let contract = match call_to_account {
match call_to_account {
Some(call) => {
deploy_single_da_contract(
client,
@@ -514,8 +515,7 @@ pub async fn deploy_da_verifier_via_solidity(
)
.await
}
};
return contract;
}
}
async fn deploy_multi_da_contract(
@@ -630,7 +630,7 @@ async fn deploy_single_da_contract(
// bytes memory _callData,
PackedSeqToken(call_data.as_ref()),
// uint256 _decimals,
WordToken(B256::from(decimals).into()),
WordToken(B256::from(decimals)),
// uint[] memory _scales,
DynSeqToken(
scales

View File

@@ -142,8 +142,6 @@ use tract_onnx::prelude::SymbolValues;
pub fn extract_tensor_value(
input: Arc<tract_onnx::prelude::Tensor>,
) -> Result<Tensor<f32>, GraphError> {
use maybe_rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
let dt = input.datum_type();
let dims = input.shape().to_vec();
@@ -156,7 +154,7 @@ pub fn extract_tensor_value(
match dt {
DatumType::F16 => {
let vec = input.as_slice::<tract_onnx::prelude::f16>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| (*x).into()).collect();
let cast: Vec<f32> = vec.iter().map(|x| (*x).into()).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::F32 => {
@@ -165,61 +163,61 @@ pub fn extract_tensor_value(
}
DatumType::F64 => {
let vec = input.as_slice::<f64>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I64 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i64>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I32 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i32>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I16 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i16>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::I8 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<i8>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U8 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u8>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U16 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u16>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U32 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u32>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::U64 => {
// Generally a shape or hyperparam
let vec = input.as_slice::<u64>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::Bool => {
// Generally a shape or hyperparam
let vec = input.as_slice::<bool>()?.to_vec();
let cast: Vec<f32> = vec.par_iter().map(|x| *x as usize as f32).collect();
let cast: Vec<f32> = vec.iter().map(|x| *x as usize as f32).collect();
const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
}
DatumType::TDim => {
@@ -227,7 +225,7 @@ pub fn extract_tensor_value(
let vec = input.as_slice::<tract_onnx::prelude::TDim>()?.to_vec();
let cast: Result<Vec<f32>, GraphError> = vec
.par_iter()
.iter()
.map(|x| match x.to_i64() {
Ok(v) => Ok(v as f32),
Err(_) => match x.to_i64() {
@@ -1136,23 +1134,21 @@ pub fn new_op_from_onnx(
a: crate::circuit::utils::F32(exponent),
})
}
} else {
if let Some(c) = inputs[0].opkind().get_mutable_constant() {
inputs[0].decrement_use();
deleted_indices.push(0);
if c.raw_values.len() > 1 {
unimplemented!("only support scalar base")
}
let base = c.raw_values[0];
SupportedOp::Nonlinear(LookupOp::Exp {
scale: scale_to_multiplier(input_scales[1]).into(),
base: base.into(),
})
} else {
unimplemented!("only support constant base or pow for now")
} else if let Some(c) = inputs[0].opkind().get_mutable_constant() {
inputs[0].decrement_use();
deleted_indices.push(0);
if c.raw_values.len() > 1 {
unimplemented!("only support scalar base")
}
let base = c.raw_values[0];
SupportedOp::Nonlinear(LookupOp::Exp {
scale: scale_to_multiplier(input_scales[1]).into(),
base: base.into(),
})
} else {
unimplemented!("only support constant base or pow for now")
}
}
"Div" => {

View File

@@ -638,42 +638,44 @@ impl<T: Clone + TensorType> Tensor<T> {
where
T: Send + Sync,
{
if indices.is_empty() {
// Fast path: empty indices or full tensor slice
if indices.is_empty()
|| indices.iter().map(|x| x.end - x.start).collect::<Vec<_>>() == self.dims
{
return Ok(self.clone());
}
// Validate dimensions
if self.dims.len() < indices.len() {
return Err(TensorError::DimError(format!(
"The dimensionality of the slice {:?} is greater than the tensor's {:?}",
indices, self.dims
)));
} else if indices.iter().map(|x| x.end - x.start).collect::<Vec<_>>() == self.dims {
// else if slice is the same as dims, return self
return Ok(self.clone());
}
// if indices weren't specified we fill them in as required
let mut full_indices = indices.to_vec();
// Pre-allocate the full indices vector with capacity
let mut full_indices = Vec::with_capacity(self.dims.len());
full_indices.extend_from_slice(indices);
for i in 0..(self.dims.len() - indices.len()) {
full_indices.push(0..self.dims()[indices.len() + i])
}
// Fill remaining dimensions
full_indices.extend((indices.len()..self.dims.len()).map(|i| 0..self.dims[i]));
let cartesian_coord: Vec<Vec<usize>> = full_indices
// Pre-calculate total size and allocate result vector
let total_size: usize = full_indices
.iter()
.cloned()
.multi_cartesian_product()
.collect();
let res: Vec<T> = cartesian_coord
.par_iter()
.map(|e| {
let index = self.get_index(e);
self[index].clone()
})
.collect();
.map(|range| range.end - range.start)
.product();
let mut res = Vec::with_capacity(total_size);
// Calculate new dimensions once
let dims: Vec<usize> = full_indices.iter().map(|e| e.end - e.start).collect();
// Use iterator directly without collecting into intermediate Vec
for coord in full_indices.iter().cloned().multi_cartesian_product() {
let index = self.get_index(&coord);
res.push(self[index].clone());
}
Tensor::new(Some(&res), &dims)
}

View File

@@ -1,12 +1,12 @@
use crate::{circuit::region::ConstantsMap, fieldutils::felt_to_integer_rep};
use maybe_rayon::slice::Iter;
use maybe_rayon::slice::{Iter, ParallelSlice};
use super::{
ops::{intercalate_values, pad, resize},
*,
};
use halo2_proofs::{arithmetic::Field, circuit::Cell, plonk::Instance};
use maybe_rayon::iter::{FilterMap, IntoParallelIterator, ParallelIterator};
use maybe_rayon::iter::{FilterMap, ParallelIterator};
pub(crate) fn create_constant_tensor<
F: PrimeField + TensorType + std::marker::Send + std::marker::Sync + PartialOrd,
@@ -455,7 +455,7 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
}
}
/// Returns the number of constants in the [ValTensor].
/// Returns an iterator over the [ValTensor]'s constants.
pub fn create_constants_map_iterator(
&self,
) -> FilterMap<Iter<'_, ValType<F>>, fn(&ValType<F>) -> Option<(F, ValType<F>)>> {
@@ -473,20 +473,48 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
}
}
/// Returns the number of constants in the [ValTensor].
/// Returns a map of the constants in the [ValTensor].
pub fn create_constants_map(&self) -> ConstantsMap<F> {
match self {
ValTensor::Value { inner, .. } => inner
.par_iter()
.filter_map(|x| {
if let ValType::Constant(v) = x {
Some((*v, x.clone()))
} else {
None
}
})
.collect(),
ValTensor::Instance { .. } => ConstantsMap::new(),
let threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < threshold {
match self {
ValTensor::Value { inner, .. } => inner
.par_iter()
.filter_map(|x| {
if let ValType::Constant(v) = x {
Some((*v, x.clone()))
} else {
None
}
})
.collect(),
ValTensor::Instance { .. } => ConstantsMap::new(),
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match self {
ValTensor::Value { inner, .. } => inner
.par_chunks(chunk_size)
.flat_map(|chunk| {
chunk
.par_iter() // Make sure we use par_iter() here
.filter_map(|x| {
if let ValType::Constant(v) = x {
Some((*v, x.clone()))
} else {
None
}
})
})
.collect(),
ValTensor::Instance { .. } => ConstantsMap::new(),
}
}
}
@@ -878,70 +906,161 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
/// remove constant zero values constants
pub fn remove_const_zero_values(&mut self) {
match self {
ValTensor::Value { inner: v, dims, .. } => {
*v = v
.clone()
.into_par_iter()
.filter_map(|e| {
if let ValType::Constant(r) = e {
if r == F::ZERO {
return None;
let size_threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < size_threshold {
match self {
ValTensor::Value { inner: v, dims, .. } => {
*v = v
.clone()
.into_iter()
.filter_map(|e| {
if let ValType::Constant(r) = e {
if r == F::ZERO {
return None;
}
} else if let ValType::AssignedConstant(_, r) = e {
if r == F::ZERO {
return None;
}
}
} else if let ValType::AssignedConstant(_, r) = e {
if r == F::ZERO {
return None;
}
}
Some(e)
})
.collect();
*dims = v.dims().to_vec();
Some(e)
})
.collect();
*dims = v.dims().to_vec();
}
ValTensor::Instance { .. } => {}
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match self {
ValTensor::Value { inner: v, dims, .. } => {
*v = v
.par_chunks_mut(chunk_size)
.flat_map(|chunk| {
chunk
.par_iter_mut() // Make sure we use par_iter() here
.filter_map(|e| {
if let ValType::Constant(r) = e {
if *r == F::ZERO {
return None;
}
} else if let ValType::AssignedConstant(_, r) = e {
if *r == F::ZERO {
return None;
}
}
Some(e.clone())
})
})
.collect();
*dims = v.dims().to_vec();
}
ValTensor::Instance { .. } => {}
}
ValTensor::Instance { .. } => {}
}
}
/// gets constants
/// filter constant zero values constants
pub fn get_const_zero_indices(&self) -> Vec<usize> {
match self {
ValTensor::Value { inner: v, .. } => v
.par_iter()
.enumerate()
.filter_map(|(i, e)| {
if let ValType::Constant(r) = e {
if *r == F::ZERO {
return Some(i);
let size_threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < size_threshold {
// Use single-threaded for smaller arrays
match &self {
ValTensor::Value { inner: v, .. } => v
.iter()
.enumerate()
.filter_map(|(i, e)| {
match e {
// Combine both match arms to reduce branching
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(i)
}
_ => None,
}
} else if let ValType::AssignedConstant(_, r) = e {
if *r == F::ZERO {
return Some(i);
}
}
None
})
.collect(),
ValTensor::Instance { .. } => vec![],
})
.collect(),
ValTensor::Instance { .. } => vec![],
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match &self {
ValTensor::Value { inner: v, .. } => v
.par_chunks(chunk_size)
.enumerate()
.flat_map(|(chunk_idx, chunk)| {
chunk
.par_iter() // Make sure we use par_iter() here
.enumerate()
.filter_map(move |(i, e)| match e {
ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
(*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
}
_ => None,
})
})
.collect::<Vec<_>>(),
ValTensor::Instance { .. } => vec![],
}
}
}
/// gets constants
/// gets constant indices
pub fn get_const_indices(&self) -> Vec<usize> {
match self {
ValTensor::Value { inner: v, .. } => v
.par_iter()
.enumerate()
.filter_map(|(i, e)| {
if let ValType::Constant(_) = e {
Some(i)
} else if let ValType::AssignedConstant(_, _) = e {
Some(i)
} else {
None
}
})
.collect(),
ValTensor::Instance { .. } => vec![],
let size_threshold = 1_000_000; // Tuned using the benchmarks
if self.len() < size_threshold {
// Use single-threaded for smaller arrays
match &self {
ValTensor::Value { inner: v, .. } => v
.iter()
.enumerate()
.filter_map(|(i, e)| {
match e {
// Combine both match arms to reduce branching
ValType::Constant(_) | ValType::AssignedConstant(_, _) => Some(i),
_ => None,
}
})
.collect(),
ValTensor::Instance { .. } => vec![],
}
} else {
// Use parallel for larger arrays
let num_cores = std::thread::available_parallelism()
.map(|n| n.get())
.unwrap_or(1);
let chunk_size = (self.len() / num_cores).max(100_000);
match &self {
ValTensor::Value { inner: v, .. } => v
.par_chunks(chunk_size)
.enumerate()
.flat_map(|(chunk_idx, chunk)| {
chunk
.par_iter() // Make sure we use par_iter() here
.enumerate()
.filter_map(move |(i, e)| match e {
ValType::Constant(_) | ValType::AssignedConstant(_, _) => {
Some(chunk_idx * chunk_size + i)
}
_ => None,
})
})
.collect::<Vec<_>>(),
ValTensor::Instance { .. } => vec![],
}
}
}

View File

@@ -68,6 +68,8 @@ mod py_tests {
"install",
"torch-geometric==2.5.2",
"torch==2.2.2",
"datasets==3.2.0",
"torchtext==0.17.2",
"torchvision==0.17.2",
"pandas==2.2.1",
"numpy==1.26.4",
@@ -190,6 +192,16 @@ mod py_tests {
}
});
#[test]
fn neural_bag_of_words_notebook() {
crate::py_tests::init_binary();
let test_dir: TempDir = TempDir::new("neural_bow").unwrap();
let path = test_dir.path().to_str().unwrap();
crate::py_tests::mv_test_(path, "neural_bow.ipynb");
run_notebook(path, "neural_bow.ipynb");
test_dir.close().unwrap();
}
#[test]
fn felt_conversion_test_notebook() {
crate::py_tests::init_binary();