fix: get_slice should not use intermediate Vec (#894 )

fix: add version string and sed (#893 )
fix: const filtering strat is size dependent (#891 )
2026-01-13 16:27:59 -05:00 · 2024-12-27 23:26:22 -05:00 · 2024-12-27 14:24:28 -05:00 · 2024-12-27 09:43:59 -05:00 · 2024-12-16 10:49:13 -05:00 · 2024-12-12 22:20:29 -05:00
15 changed files with 1300 additions and 221 deletions
--- a/.github/workflows/pypi-gpu.yml
+++ b/.github/workflows/pypi-gpu.yml
@@ -34,6 +34,7 @@ jobs:
        run: |
            mv pyproject.toml pyproject.toml.orig
            sed "s/ezkl/ezkl-gpu/" pyproject.toml.orig >pyproject.toml
+            sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml

      - uses: actions-rs/toolchain@v1
        with:
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -233,6 +233,14 @@ jobs:
          python-version: 3.12
          architecture: x64

+      - name: Set pyproject.toml version to match github tag
+        shell: bash
+        env:
+          RELEASE_TAG: ${{ github.ref_name }}
+        run: |
+          mv pyproject.toml pyproject.toml.orig
+          sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" pyproject.toml.orig >pyproject.toml
+
      - name: Set Cargo.toml version to match github tag
        shell: bash
        env:
@@ -242,7 +250,6 @@ jobs:
          sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.toml.orig >Cargo.toml
          mv Cargo.lock Cargo.lock.orig
          sed "s/0\\.0\\.0/${RELEASE_TAG//v}/" Cargo.lock.orig >Cargo.lock
-
      - name: Install required libraries
        shell: bash
        run: |
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -650,7 +650,9 @@ jobs:
        run: python -m venv .env --clear; source .env/bin/activate; pip install -r requirements.txt; python -m ensurepip --upgrade
      - name: Build python ezkl
        run: source .env/bin/activate; unset CONDA_PREFIX; maturin develop --features python-bindings --release
-      - name: Postgres tutorials
+      - name: Neural bow
+        run: source .env/bin/activate; cargo nextest run py_tests::tests::neural_bag_of_words_ --no-capture
+      - name: Felt conversion
        run: source .env/bin/activate; cargo nextest run py_tests::tests::felt_conversion_test_ --no-capture
      - name: Postgres tutorials
        run: source .env/bin/activate; cargo nextest run py_tests::tests::postgres_ --no-capture
--- a/.github/workflows/swift-pm.yml
+++ b/.github/workflows/swift-pm.yml
@@ -0,0 +1,129 @@
+name: Build and Publish EZKL iOS SPM package
+
+on:
+  push:
+    tags:
+      # Only support SemVer versioning tags
+      - 'v[0-9]+.[0-9]+.[0-9]+'
+      - '[0-9]+.[0-9]+.[0-9]+'
+
+jobs:
+  build-and-update:
+    runs-on: macos-latest
+    env:
+      EZKL_SWIFT_PACKAGE_REPO: github.com/zkonduit/ezkl-swift-package.git
+
+    steps:
+      - name: Checkout EZKL
+        uses: actions/checkout@v3
+
+      - name: Extract TAG from github.ref_name
+        run: |
+          # github.ref_name is provided by GitHub Actions and contains the tag name directly.
+          TAG="${{ github.ref_name }}"
+          echo "Original TAG: $TAG"
+          # Remove leading 'v' if present to match the Swift Package Manager version format.
+          NEW_TAG=${TAG#v}
+          echo "Stripped TAG: $NEW_TAG"
+          echo "TAG=$NEW_TAG" >> $GITHUB_ENV
+
+      - name: Install Rust (nightly)
+        uses: actions-rs/toolchain@v1
+        with:
+          toolchain: nightly
+          override: true
+
+      - name: Build EzklCoreBindings
+        run: CONFIGURATION=release cargo run --bin ios_gen_bindings --features "ios-bindings uuid camino uniffi_bindgen" --no-default-features
+
+      - name: Clone ezkl-swift-package repository
+        run: |
+          git clone https://${{ env.EZKL_SWIFT_PACKAGE_REPO }}
+
+      - name: Copy EzklCoreBindings
+        run: |
+          rm -rf ezkl-swift-package/Sources/EzklCoreBindings
+          cp -r build/EzklCoreBindings ezkl-swift-package/Sources/
+
+      - name: Copy Test Files
+        run: |
+          rm -rf ezkl-swift-package/Tests/EzklAssets/*
+          cp tests/assets/kzg ezkl-swift-package/Tests/EzklAssets/kzg.srs
+          cp tests/assets/input.json ezkl-swift-package/Tests/EzklAssets/input.json
+          cp tests/assets/model.compiled ezkl-swift-package/Tests/EzklAssets/network.ezkl
+          cp tests/assets/settings.json ezkl-swift-package/Tests/EzklAssets/settings.json
+
+      - name: Check for changes
+        id: check_changes
+        run: |
+          cd ezkl-swift-package
+          if git diff --quiet Sources/EzklCoreBindings Tests/EzklAssets; then
+            echo "no_changes=true" >> $GITHUB_OUTPUT
+          else
+            echo "no_changes=false" >> $GITHUB_OUTPUT
+          fi
+
+      - name: Set up Xcode environment
+        if: steps.check_changes.outputs.no_changes == 'false'
+        run: |
+          sudo xcode-select -s /Applications/Xcode.app/Contents/Developer
+          sudo xcodebuild -license accept
+
+      - name: Run Package Tests
+        if: steps.check_changes.outputs.no_changes == 'false'
+        run: |
+          cd ezkl-swift-package
+          xcodebuild test \
+            -scheme EzklPackage \
+            -destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
+            -resultBundlePath ../testResults
+
+      - name: Run Example App Tests
+        if: steps.check_changes.outputs.no_changes == 'false'
+        run: |
+          cd ezkl-swift-package/Example
+          xcodebuild test \
+            -project Example.xcodeproj \
+            -scheme EzklApp \
+            -destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
+            -parallel-testing-enabled NO \
+            -resultBundlePath ../../exampleTestResults \
+            -skip-testing:EzklAppUITests/EzklAppUITests/testButtonClicksInOrder
+
+      - name: Setup Git
+        run: |
+          cd ezkl-swift-package
+          git config user.name "GitHub Action"
+          git config user.email "action@github.com"
+          git remote set-url origin https://zkonduit:${EZKL_SWIFT_PACKAGE_REPO_TOKEN}@${{ env.EZKL_SWIFT_PACKAGE_REPO }}
+        env:
+          EZKL_SWIFT_PACKAGE_REPO_TOKEN: ${{ secrets.EZKL_PORTER_TOKEN }}
+
+      - name: Commit and Push Changes
+        if: steps.check_changes.outputs.no_changes == 'false'
+        run: |
+          cd ezkl-swift-package
+          git add Sources/EzklCoreBindings Tests/EzklAssets
+          git commit -m "Automatically updated EzklCoreBindings for EZKL"
+          
+          if ! git push origin; then
+            echo "::error::Failed to push changes to ${{ env.EZKL_SWIFT_PACKAGE_REPO }}. Please ensure that EZKL_PORTER_TOKEN has the correct permissions."
+            exit 1
+          fi
+
+      - name: Tag the latest commit
+        run: |
+          cd ezkl-swift-package
+          source $GITHUB_ENV
+          
+          # Tag the latest commit on the current branch
+          if git rev-parse "$TAG" >/dev/null 2>&1; then
+            echo "Tag $TAG already exists locally. Skipping tag creation."
+          else
+            git tag "$TAG"
+          fi
+
+          if ! git push origin "$TAG"; then
+            echo "::error::Failed to push tag '$TAG' to ${{ env.EZKL_SWIFT_PACKAGE_REPO }}. Please ensure EZKL_PORTER_TOKEN has correct permissions."
+            exit 1
+          fi
--- a/.github/workflows/update-ios-package.yml
+++ b/.github/workflows/update-ios-package.yml
@@ -1,85 +0,0 @@
-name: Build and Publish EZKL iOS SPM package
-
-on:
-  workflow_dispatch:
-    inputs:
-      tag:
-        description: "The tag to release"
-        required: true
-  push:
-    tags:
-      - "*"
-
-jobs:
-  build-and-update:
-    runs-on: macos-latest
-
-    steps:
-      - name: Checkout EZKL
-        uses: actions/checkout@v3
-
-      - name: Install Rust
-        uses: actions-rs/toolchain@v1
-        with:
-          toolchain: nightly
-          override: true
-
-      - name: Build EzklCoreBindings
-        run: CONFIGURATION=release cargo run --bin ios_gen_bindings --features "ios-bindings uuid camino uniffi_bindgen" --no-default-features
-
-      - name: Clone ezkl-swift-package repository
-        run: |
-          git clone https://github.com/zkonduit/ezkl-swift-package.git
-
-      - name: Copy EzklCoreBindings
-        run: |
-          rm -rf ezkl-swift-package/Sources/EzklCoreBindings
-          cp -r build/EzklCoreBindings ezkl-swift-package/Sources/
-
-      - name: Copy Test Files
-        run: |
-          rm -rf ezkl-swift-package/Tests/EzklAssets/*
-          
-          cp tests/assets/kzg ezkl-swift-package/Tests/EzklAssets/kzg.srs
-          cp tests/assets/input.json ezkl-swift-package/Tests/EzklAssets/input.json
-          cp tests/assets/model.compiled ezkl-swift-package/Tests/EzklAssets/network.ezkl
-          cp tests/assets/settings.json ezkl-swift-package/Tests/EzklAssets/settings.json
-
-      - name: Set up Xcode environment
-        run: |
-          sudo xcode-select -s /Applications/Xcode.app/Contents/Developer
-          sudo xcodebuild -license accept
-
-      - name: Run Package Tests
-        run: |
-          cd ezkl-swift-package
-          xcodebuild test \
-            -scheme EzklPackage \
-            -destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-            -resultBundlePath ../testResults
-
-      - name: Run Example App Tests
-        run: |
-          cd ezkl-swift-package/Example
-          xcodebuild test \
-            -project Example.xcodeproj \
-            -scheme EzklApp \
-            -destination 'platform=iOS Simulator,name=iPhone 15 Pro,OS=17.5' \
-            -parallel-testing-enabled NO \
-            -resultBundlePath ../../exampleTestResults \
-            -skip-testing:EzklAppUITests/EzklAppUITests/testButtonClicksInOrder
-
-      - name: Commit and Push Changes to feat/ezkl-direct-integration
-        run: |
-          cd ezkl-swift-package
-          git config user.name "GitHub Action"
-          git config user.email "action@github.com"
-          git add Sources/EzklCoreBindings
-          git add Tests/EzklAssets
-          git commit -m "Automatically updated EzklCoreBindings for EZKL"
-          git tag ${{ github.event.inputs.tag }}
-          git remote set-url origin https://zkonduit:${EZKL_PORTER_TOKEN}@github.com/zkonduit/ezkl-swift-package.git
-          git push origin
-          git push origin tag ${{ github.event.inputs.tag }}
-        env:
-          EZKL_PORTER_TOKEN: ${{ secrets.EZKL_PORTER_TOKEN }}
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -147,6 +147,10 @@ shellexpand = "3.1.0"
 runner = 'wasm-bindgen-test-runner'


+[[bench]]
+name = "zero_finder"
+harness = false
+
 [[bench]]
 name = "accum_dot"
 harness = false
@@ -286,3 +290,11 @@ rustflags = ["-C", "relocation-model=pic"]
 lto = "fat"
 codegen-units = 1
 # panic = "abort"
+
+
+[package.metadata.wasm-pack.profile.release]
+wasm-opt = [
+    "-O4",
+    "--flexible-inline-max-function-size",
+    "4294967295",
+]
--- a/benches/zero_finder.rs
+++ b/benches/zero_finder.rs
@@ -0,0 +1,116 @@
+use std::thread;
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use halo2curves::{bn256::Fr as F, ff::Field};
+use maybe_rayon::{
+    iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator},
+    slice::ParallelSlice,
+};
+use rand::Rng;
+
+// Assuming these are your types
+#[derive(Clone)]
+enum ValType {
+    Constant(F),
+    AssignedConstant(usize, F),
+    Other,
+}
+
+// Helper to generate test data
+fn generate_test_data(size: usize, zero_probability: f64) -> Vec<ValType> {
+    let mut rng = rand::thread_rng();
+    (0..size)
+        .map(|_i| {
+            if rng.gen::<f64>() < zero_probability {
+                ValType::Constant(F::ZERO)
+            } else {
+                ValType::Constant(F::ONE) // Or some other non-zero value
+            }
+        })
+        .collect()
+}
+
+fn bench_zero_finding(c: &mut Criterion) {
+    let sizes = [
+        1_000,         // 1K
+        10_000,        // 10K
+        100_000,       // 100K
+        256 * 256 * 2, // Our specific case
+        1_000_000,     // 1M
+        10_000_000,    // 10M
+    ];
+
+    let zero_probability = 0.1; // 10% zeros
+
+    let mut group = c.benchmark_group("zero_finding");
+    group.sample_size(10); // Adjust based on your needs
+
+    for &size in &sizes {
+        let data = generate_test_data(size, zero_probability);
+
+        // Benchmark sequential version
+        group.bench_function(format!("sequential_{}", size), |b| {
+            b.iter(|| {
+                let result = data
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, e)| match e {
+                        ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
+                            (*r == F::ZERO).then_some(i)
+                        }
+                        _ => None,
+                    })
+                    .collect::<Vec<_>>();
+                black_box(result)
+            })
+        });
+
+        // Benchmark parallel version
+        group.bench_function(format!("parallel_{}", size), |b| {
+            b.iter(|| {
+                let result = data
+                    .par_iter()
+                    .enumerate()
+                    .filter_map(|(i, e)| match e {
+                        ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
+                            (*r == F::ZERO).then_some(i)
+                        }
+                        _ => None,
+                    })
+                    .collect::<Vec<_>>();
+                black_box(result)
+            })
+        });
+
+        // Benchmark chunked parallel version
+        group.bench_function(format!("chunked_parallel_{}", size), |b| {
+            b.iter(|| {
+                let num_cores = thread::available_parallelism()
+                    .map(|n| n.get())
+                    .unwrap_or(1);
+                let chunk_size = (size / num_cores).max(100);
+
+                let result = data
+                    .par_chunks(chunk_size)
+                    .enumerate()
+                    .flat_map(|(chunk_idx, chunk)| {
+                        chunk
+                            .par_iter() // Make sure we use par_iter() here
+                            .enumerate()
+                            .filter_map(move |(i, e)| match e {
+                                ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
+                                    (*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
+                                }
+                                _ => None,
+                            })
+                    })
+                    .collect::<Vec<_>>();
+                black_box(result)
+            })
+        });
+    }
+    group.finish();
+}
+
+criterion_group!(benches, bench_zero_finding);
+criterion_main!(benches);
--- a/examples/notebooks/neural_bow.ipynb
+++ b/examples/notebooks/neural_bow.ipynb
@@ -0,0 +1,766 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "This is a zk version of the tutorial found [here](https://github.com/bentrevett/pytorch-sentiment-analysis/blob/main/1%20-%20Neural%20Bag%20of%20Words.ipynb). The original tutorial is part of the PyTorch Sentiment Analysis series by Ben Trevett.\n",
+    "\n",
+    "1 - NBoW\n",
+    "\n",
+    "In this series we'll be building a machine learning model to perform sentiment analysis -- a subset of text classification where the task is to detect if a given sentence is positive or negative -- using PyTorch and torchtext. The dataset used will be movie reviews from the IMDb dataset, which we'll obtain using the datasets library.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "\n",
+    "Preparing Data\n",
+    "\n",
+    "Before we can implement our NBoW model, we first have to perform quite a few steps to get our data ready to use. NLP usually requires quite a lot of data wrangling beforehand, though libraries such as datasets and torchtext handle most of this for us.\n",
+    "\n",
+    "The steps to take are:\n",
+    "\n",
+    "    1. importing modules\n",
+    "    2. loading data\n",
+    "    3. tokenizing data\n",
+    "    4. creating data splits\n",
+    "    5. creating a vocabulary\n",
+    "    6. numericalizing data\n",
+    "    7. creating the data loaders\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip install torchtex"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import collections\n",
+    "\n",
+    "import datasets\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import torchtext\n",
+    "import tqdm\n",
+    "\n",
+    "# It is usually good practice to run your experiments multiple times with different random seeds -- both to measure the variance of your model and also to avoid having results only calculated with either \"good\" or \"bad\" seeds, i.e. being very lucky or unlucky with the randomness in the training process.\n",
+    "\n",
+    "seed = 1234\n",
+    "\n",
+    "np.random.seed(seed)\n",
+    "torch.manual_seed(seed)\n",
+    "torch.cuda.manual_seed(seed)\n",
+    "torch.backends.cudnn.deterministic = True\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data, test_data = datasets.load_dataset(\"imdb\", split=[\"train\", \"test\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can check the features attribute of a split to get more information about the features. We can see that text is a Value of dtype=string -- in other words, it's a string -- and that label is a ClassLabel. A ClassLabel means the feature is an integer representation of which class the example belongs to. num_classes=2 means that our labels are one of two values, 0 or 1, and names=['neg', 'pos'] gives us the human-readable versions of those values. Thus, a label of 0 means the example is a negative review and a label of 1 means the example is a positive review."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data.features\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_data[0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "One of the first things we need to do to our data is tokenize it. Machine learning models aren't designed to handle strings, they're design to handle numbers. So what we need to do is break down our string into individual tokens, and then convert these tokens to numbers. We'll get to the conversion later, but first we'll look at tokenization.\n",
+    "\n",
+    "Tokenization involves using a tokenizer to process the strings in our dataset. A tokenizer is a function that goes from a string to a list of strings. There are many types of tokenizers available, but we're going to use a relatively simple one provided by torchtext called the basic_english tokenizer. We load our tokenizer as such:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tokenizer = torchtext.data.utils.get_tokenizer(\"basic_english\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_example(example, tokenizer, max_length):\n",
+    "    tokens = tokenizer(example[\"text\"])[:max_length]\n",
+    "    return {\"tokens\": tokens}\n",
+    "\n",
+    "\n",
+    "max_length = 256\n",
+    "\n",
+    "train_data = train_data.map(\n",
+    "    tokenize_example, fn_kwargs={\"tokenizer\": tokenizer, \"max_length\": max_length}\n",
+    ")\n",
+    "test_data = test_data.map(\n",
+    "    tokenize_example, fn_kwargs={\"tokenizer\": tokenizer, \"max_length\": max_length}\n",
+    ")\n",
+    "\n",
+    "\n",
+    "# create validation data \n",
+    "# Why have both a validation set and a test set? Your test set respresents the real world data that you'd see if you actually deployed this model. You won't be able to see what data your model will be fed once deployed, and your test set is supposed to reflect that. Every time we tune our model hyperparameters or training set-up to make it do a bit better on the test set, we are leak information from the test set into the training process. If we do this too often then we begin to overfit on the test set. Hence, we need some data which can act as a \"proxy\" test set which we can look at more frequently in order to evaluate how well our model actually does on unseen data -- this is the validation set.\n",
+    "\n",
+    "test_size = 0.25\n",
+    "\n",
+    "train_valid_data = train_data.train_test_split(test_size=test_size)\n",
+    "train_data = train_valid_data[\"train\"]\n",
+    "valid_data = train_valid_data[\"test\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we have to build a vocabulary. This is look-up table where every unique token in your dataset has a corresponding index (an integer).\n",
+    "\n",
+    "We do this as machine learning models cannot operate on strings, only numerical vaslues. Each index is used to construct a one-hot vector for each token. A one-hot vector is a vector where all the elements are 0, except one, which is 1, and the dimensionality is the total number of unique tokens in your vocabulary, commonly denoted by V."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "min_freq = 5\n",
+    "special_tokens = [\"<unk>\", \"<pad>\"]\n",
+    "\n",
+    "vocab = torchtext.vocab.build_vocab_from_iterator(\n",
+    "    train_data[\"tokens\"],\n",
+    "    min_freq=min_freq,\n",
+    "    specials=special_tokens,\n",
+    ")\n",
+    "\n",
+    "# We store the indices of the unknown and padding tokens (zero and one, respectively) in variables, as we'll use these further on in this notebook.\n",
+    "\n",
+    "unk_index = vocab[\"<unk>\"]\n",
+    "pad_index = vocab[\"<pad>\"]\n",
+    "\n",
+    "\n",
+    "vocab.set_default_index(unk_index)\n",
+    "\n",
+    "# To look-up a list of tokens, we can use the vocabulary's lookup_indices method.\n",
+    "vocab.lookup_indices([\"hello\", \"world\", \"some_token\", \"<pad>\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we have our vocabulary, we can numericalize our data. This involves converting the tokens within our dataset into indices. Similar to how we tokenized our data using the Dataset.map method, we'll define a function that takes an example and our vocabulary, gets the index for each token in each example and then creates an ids field which containes the numericalized tokens."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def numericalize_example(example, vocab):\n",
+    "    ids = vocab.lookup_indices(example[\"tokens\"])\n",
+    "    return {\"ids\": ids}\n",
+    "\n",
+    "train_data = train_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
+    "valid_data = valid_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
+    "test_data = test_data.map(numericalize_example, fn_kwargs={\"vocab\": vocab})\n",
+    "\n",
+    "train_data = train_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n",
+    "valid_data = valid_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n",
+    "test_data = test_data.with_format(type=\"torch\", columns=[\"ids\", \"label\"])\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The final step of preparing the data is creating the data loaders. We can iterate over a data loader to retrieve batches of examples. This is also where we will perform any padding that is necessary.\n",
+    "\n",
+    "We first need to define a function to collate a batch, consisting of a list of examples, into what we want our data loader to output.\n",
+    "\n",
+    "Here, our desired output from the data loader is a dictionary with keys of \"ids\" and \"label\".\n",
+    "\n",
+    "The value of batch[\"ids\"] should be a tensor of shape [batch size, length], where length is the length of the longest sentence (in terms of tokens) within the batch, and all sentences shorter than this should be padded to that length.\n",
+    "\n",
+    "The value of batch[\"label\"] should be a tensor of shape [batch size] consisting of the label for each sentence in the batch.\n",
+    "\n",
+    "We define a function, get_collate_fn, which is passed the pad token index and returns the actual collate function. Within the actual collate function, collate_fn, we get a list of \"ids\" tensors for each example in the batch, and then use the pad_sequence function, which converts the list of tensors into the desired [batch size, length] shaped tensor and performs padding using the specified pad_index. By default, pad_sequence will return a [length, batch size] shaped tensor, but by setting batch_first=True, these two dimensions are switched. We get a list of \"label\" tensors and convert the list of tensors into a single [batch size] shaped tensor."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_collate_fn(pad_index):\n",
+    "    def collate_fn(batch):\n",
+    "        batch_ids = [i[\"ids\"] for i in batch]\n",
+    "        batch_ids = nn.utils.rnn.pad_sequence(\n",
+    "            batch_ids, padding_value=pad_index, batch_first=True\n",
+    "        )\n",
+    "        batch_label = [i[\"label\"] for i in batch]\n",
+    "        batch_label = torch.stack(batch_label)\n",
+    "        batch = {\"ids\": batch_ids, \"label\": batch_label}\n",
+    "        return batch\n",
+    "\n",
+    "    return collate_fn\n",
+    "\n",
+    "def get_data_loader(dataset, batch_size, pad_index, shuffle=False):\n",
+    "    collate_fn = get_collate_fn(pad_index)\n",
+    "    data_loader = torch.utils.data.DataLoader(\n",
+    "        dataset=dataset,\n",
+    "        batch_size=batch_size,\n",
+    "        collate_fn=collate_fn,\n",
+    "        shuffle=shuffle,\n",
+    "    )\n",
+    "    return data_loader\n",
+    "\n",
+    "\n",
+    "batch_size = 512\n",
+    "\n",
+    "train_data_loader = get_data_loader(train_data, batch_size, pad_index, shuffle=True)\n",
+    "valid_data_loader = get_data_loader(valid_data, batch_size, pad_index)\n",
+    "test_data_loader = get_data_loader(test_data, batch_size, pad_index)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "class NBoW(nn.Module):\n",
+    "    def __init__(self, vocab_size, embedding_dim, output_dim, pad_index):\n",
+    "        super().__init__()\n",
+    "        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_index)\n",
+    "        self.fc = nn.Linear(embedding_dim, output_dim)\n",
+    "\n",
+    "    def forward(self, ids):\n",
+    "        # ids = [batch size, seq len]\n",
+    "        embedded = self.embedding(ids)\n",
+    "        # embedded = [batch size, seq len, embedding dim]\n",
+    "        pooled = embedded.mean(dim=1)\n",
+    "        # pooled = [batch size, embedding dim]\n",
+    "        prediction = self.fc(pooled)\n",
+    "        # prediction = [batch size, output dim]\n",
+    "        return prediction\n",
+    "\n",
+    "\n",
+    "vocab_size = len(vocab)\n",
+    "embedding_dim = 300\n",
+    "output_dim = len(train_data.unique(\"label\"))\n",
+    "\n",
+    "model = NBoW(vocab_size, embedding_dim, output_dim, pad_index)\n",
+    "\n",
+    "def count_parameters(model):\n",
+    "    return sum(p.numel() for p in model.parameters() if p.requires_grad)\n",
+    "\n",
+    "\n",
+    "print(f\"The model has {count_parameters(model):,} trainable parameters\")\n",
+    "\n",
+    "vectors = torchtext.vocab.GloVe()\n",
+    "\n",
+    "pretrained_embedding = vectors.get_vecs_by_tokens(vocab.get_itos())\n",
+    "\n",
+    "optimizer = optim.Adam(model.parameters())\n",
+    "\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "\n",
+    "model = model.to(device)\n",
+    "criterion = criterion.to(device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def train(data_loader, model, criterion, optimizer, device):\n",
+    "    model.train()\n",
+    "    epoch_losses = []\n",
+    "    epoch_accs = []\n",
+    "    for batch in tqdm.tqdm(data_loader, desc=\"training...\"):\n",
+    "        ids = batch[\"ids\"].to(device)\n",
+    "        label = batch[\"label\"].to(device)\n",
+    "        prediction = model(ids)\n",
+    "        loss = criterion(prediction, label)\n",
+    "        accuracy = get_accuracy(prediction, label)\n",
+    "        optimizer.zero_grad()\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "        epoch_losses.append(loss.item())\n",
+    "        epoch_accs.append(accuracy.item())\n",
+    "    return np.mean(epoch_losses), np.mean(epoch_accs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(data_loader, model, criterion, device):\n",
+    "    model.eval()\n",
+    "    epoch_losses = []\n",
+    "    epoch_accs = []\n",
+    "    with torch.no_grad():\n",
+    "        for batch in tqdm.tqdm(data_loader, desc=\"evaluating...\"):\n",
+    "            ids = batch[\"ids\"].to(device)\n",
+    "            label = batch[\"label\"].to(device)\n",
+    "            prediction = model(ids)\n",
+    "            loss = criterion(prediction, label)\n",
+    "            accuracy = get_accuracy(prediction, label)\n",
+    "            epoch_losses.append(loss.item())\n",
+    "            epoch_accs.append(accuracy.item())\n",
+    "    return np.mean(epoch_losses), np.mean(epoch_accs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_accuracy(prediction, label):\n",
+    "    batch_size, _ = prediction.shape\n",
+    "    predicted_classes = prediction.argmax(dim=-1)\n",
+    "    correct_predictions = predicted_classes.eq(label).sum()\n",
+    "    accuracy = correct_predictions / batch_size\n",
+    "    return accuracy"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "n_epochs = 10\n",
+    "best_valid_loss = float(\"inf\")\n",
+    "\n",
+    "metrics = collections.defaultdict(list)\n",
+    "\n",
+    "for epoch in range(n_epochs):\n",
+    "    train_loss, train_acc = train(\n",
+    "        train_data_loader, model, criterion, optimizer, device\n",
+    "    )\n",
+    "    valid_loss, valid_acc = evaluate(valid_data_loader, model, criterion, device)\n",
+    "    metrics[\"train_losses\"].append(train_loss)\n",
+    "    metrics[\"train_accs\"].append(train_acc)\n",
+    "    metrics[\"valid_losses\"].append(valid_loss)\n",
+    "    metrics[\"valid_accs\"].append(valid_acc)\n",
+    "    if valid_loss < best_valid_loss:\n",
+    "        best_valid_loss = valid_loss\n",
+    "        torch.save(model.state_dict(), \"nbow.pt\")\n",
+    "    print(f\"epoch: {epoch}\")\n",
+    "    print(f\"train_loss: {train_loss:.3f}, train_acc: {train_acc:.3f}\")\n",
+    "    print(f\"valid_loss: {valid_loss:.3f}, valid_acc: {valid_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(10, 6))\n",
+    "ax = fig.add_subplot(1, 1, 1)\n",
+    "ax.plot(metrics[\"train_losses\"], label=\"train loss\")\n",
+    "ax.plot(metrics[\"valid_losses\"], label=\"valid loss\")\n",
+    "ax.set_xlabel(\"epoch\")\n",
+    "ax.set_ylabel(\"loss\")\n",
+    "ax.set_xticks(range(n_epochs))\n",
+    "ax.legend()\n",
+    "ax.grid()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fig = plt.figure(figsize=(10, 6))\n",
+    "ax = fig.add_subplot(1, 1, 1)\n",
+    "ax.plot(metrics[\"train_accs\"], label=\"train accuracy\")\n",
+    "ax.plot(metrics[\"valid_accs\"], label=\"valid accuracy\")\n",
+    "ax.set_xlabel(\"epoch\")\n",
+    "ax.set_ylabel(\"loss\")\n",
+    "ax.set_xticks(range(n_epochs))\n",
+    "ax.legend()\n",
+    "ax.grid()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.load_state_dict(torch.load(\"nbow.pt\"))\n",
+    "\n",
+    "test_loss, test_acc = evaluate(test_data_loader, model, criterion, device)\n",
+    "\n",
+    "print(f\"test_loss: {test_loss:.3f}, test_acc: {test_acc:.3f}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def predict_sentiment(text, model, tokenizer, vocab, device):\n",
+    "    tokens = tokenizer(text)\n",
+    "    ids = vocab.lookup_indices(tokens)\n",
+    "    tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)\n",
+    "    prediction = model(tensor).squeeze(dim=0)\n",
+    "    probability = torch.softmax(prediction, dim=-1)\n",
+    "    predicted_class = prediction.argmax(dim=-1).item()\n",
+    "    predicted_probability = probability[predicted_class].item()\n",
+    "    return predicted_class, predicted_probability"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is terrible!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is great!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is not terrible, it's great!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "text = \"This film is not great, it's terrible!\"\n",
+    "\n",
+    "predict_sentiment(text, model, tokenizer, vocab, device)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def text_to_tensor(text, tokenizer, vocab, device):\n",
+    "    tokens = tokenizer(text)\n",
+    "    ids = vocab.lookup_indices(tokens)\n",
+    "    tensor = torch.LongTensor(ids).unsqueeze(dim=0).to(device)\n",
+    "    return tensor\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now we do onnx stuff to get the data ready for the zk-circuit."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import json\n",
+    "\n",
+    "text = \"This film is terrible!\"\n",
+    "x = text_to_tensor(text, tokenizer, vocab, device)\n",
+    "\n",
+    "# Flips the neural net into inference mode\n",
+    "model.eval()\n",
+    "model.to('cpu')\n",
+    "\n",
+    "model_path = \"network.onnx\"\n",
+    "data_path = \"input.json\"\n",
+    "\n",
+    "    # Export the model\n",
+    "torch.onnx.export(model,               # model being run\n",
+    "                      x,                   # model input (or a tuple for multiple inputs)\n",
+    "                      model_path,            # where to save the model (can be a file or file-like object)\n",
+    "                      export_params=True,        # store the trained parameter weights inside the model file\n",
+    "                      opset_version=10,          # the ONNX version to export the model to\n",
+    "                      do_constant_folding=True,  # whether to execute constant folding for optimization\n",
+    "                      input_names = ['input'],   # the model's input names\n",
+    "                      output_names = ['output'], # the model's output names\n",
+    "                      dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes\n",
+    "                                    'output' : {0 : 'batch_size'}})\n",
+    "\n",
+    "\n",
+    "\n",
+    "data_array = ((x).detach().numpy()).reshape([-1]).tolist()\n",
+    "\n",
+    "data_json = dict(input_data = [data_array])\n",
+    "\n",
+    "print(data_json)\n",
+    "\n",
+    "    # Serialize data into file:\n",
+    "json.dump(data_json, open(data_path, 'w'))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ezkl\n",
+    "\n",
+    "run_args = ezkl.PyRunArgs()\n",
+    "run_args.logrows = 23\n",
+    "run_args.scale_rebase_multiplier = 10\n",
+    "# inputs should be auditable by all\n",
+    "run_args.input_visibility = \"public\"\n",
+    "# same with outputs\n",
+    "run_args.output_visibility = \"public\"\n",
+    "# for simplicity, we'll just use the fixed model visibility: i.e it is public and can't be changed by the prover\n",
+    "run_args.param_visibility = \"fixed\"\n",
+    "\n",
+    "\n",
+    "# TODO: Dictionary outputs\n",
+    "res = ezkl.gen_settings(py_run_args=run_args)\n",
+    "assert res == True\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = ezkl.compile_circuit()\n",
+    "assert res == True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# srs path\n",
+    "res = await ezkl.get_srs()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# now generate the witness file\n",
+    "res = await ezkl.gen_witness()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = ezkl.mock()\n",
+    "assert res == True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# HERE WE SETUP THE CIRCUIT PARAMS\n",
+    "# WE GOT KEYS\n",
+    "# WE GOT CIRCUIT PARAMETERS\n",
+    "# EVERYTHING ANYONE HAS EVER NEEDED FOR ZK\n",
+    "\n",
+    "res = ezkl.setup()\n",
+    "\n",
+    "assert res == True"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# GENERATE A PROOF\n",
+    "res = ezkl.prove(proof_path=\"proof.json\")\n",
+    "\n",
+    "print(res)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# VERIFY IT\n",
+    "res = ezkl.verify()\n",
+    "\n",
+    "assert res == True\n",
+    "print(\"verified\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also verify it on chain by creating an onchain verifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# check if notebook is in colab\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    import subprocess\n",
+    "    import sys\n",
+    "    subprocess.check_call([sys.executable, \"-m\", \"pip\", \"install\", \"solc-select\"])\n",
+    "    !solc-select install 0.8.20\n",
+    "    !solc-select use 0.8.20\n",
+    "    !solc --version\n",
+    "    import os\n",
+    "\n",
+    "# rely on local installation if the notebook is not in colab\n",
+    "except:\n",
+    "    import os\n",
+    "    pass"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "res = await ezkl.create_evm_verifier()\n",
+    "assert res == True\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You should see a `Verifier.sol`. Right-click and save it locally.\n",
+    "\n",
+    "Now go to [https://remix.ethereum.org](https://remix.ethereum.org).\n",
+    "\n",
+    "Create a new file within remix and copy the verifier code over.\n",
+    "\n",
+    "Finally, compile the code and deploy. For the demo you can deploy to the test environment within remix.\n",
+    "\n",
+    "If everything works, you would have deployed your verifer onchain! Copy the values in the cell above to the respective fields to test if the verifier is working."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".env",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ asyncio_mode = "auto"

 [project]
 name = "ezkl"
+version = "0.0.0"
 requires-python = ">=3.7"
 classifiers = [
    "Programming Language :: Rust",
--- a/src/bindings/universal.rs
+++ b/src/bindings/universal.rs
@@ -141,10 +141,11 @@ pub(crate) fn gen_vk(
    .map_err(|e| EZKLError::InternalError(format!("Failed to create verifying key: {}", e)))?;

    let mut serialized_vk = Vec::new();
-    vk.write(&mut serialized_vk, halo2_proofs::SerdeFormat::RawBytes)
-        .map_err(|e| {
-            EZKLError::InternalError(format!("Failed to serialize verifying key: {}", e))
-        })?;
+    vk.write(
+        &mut serialized_vk,
+        halo2_proofs::SerdeFormat::RawBytesUnchecked,
+    )
+    .map_err(|e| EZKLError::InternalError(format!("Failed to serialize verifying key: {}", e)))?;

    Ok(serialized_vk)
 }
@@ -165,7 +166,7 @@ pub(crate) fn gen_pk(
    let mut reader = BufReader::new(&vk[..]);
    let vk = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
        &mut reader,
-        halo2_proofs::SerdeFormat::RawBytes,
+        halo2_proofs::SerdeFormat::RawBytesUnchecked,
        circuit.settings().clone(),
    )
    .map_err(|e| EZKLError::InternalError(format!("Failed to deserialize verifying key: {}", e)))?;
@@ -197,7 +198,7 @@ pub(crate) fn verify(
    let mut reader = BufReader::new(&vk[..]);
    let vk = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
        &mut reader,
-        halo2_proofs::SerdeFormat::RawBytes,
+        halo2_proofs::SerdeFormat::RawBytesUnchecked,
        circuit_settings.clone(),
    )
    .map_err(|e| EZKLError::InternalError(format!("Failed to deserialize vk: {}", e)))?;
@@ -277,7 +278,7 @@ pub(crate) fn verify_aggr(
    let mut reader = BufReader::new(&vk[..]);
    let vk = VerifyingKey::<G1Affine>::read::<_, AggregationCircuit>(
        &mut reader,
-        halo2_proofs::SerdeFormat::RawBytes,
+        halo2_proofs::SerdeFormat::RawBytesUnchecked,
        (),
    )
    .map_err(|e| EZKLError::InternalError(format!("Failed to deserialize vk: {}", e)))?;
@@ -365,7 +366,7 @@ pub(crate) fn prove(
    let mut reader = BufReader::new(&pk[..]);
    let pk = ProvingKey::<G1Affine>::read::<_, GraphCircuit>(
        &mut reader,
-        halo2_proofs::SerdeFormat::RawBytes,
+        halo2_proofs::SerdeFormat::RawBytesUnchecked,
        circuit.settings().clone(),
    )
    .map_err(|e| EZKLError::InternalError(format!("Failed to deserialize proving key: {}", e)))?;
@@ -487,7 +488,7 @@ pub(crate) fn vk_validation(vk: Vec<u8>, settings: Vec<u8>) -> Result<bool, EZKL
    let mut reader = BufReader::new(&vk[..]);
    let _ = VerifyingKey::<G1Affine>::read::<_, GraphCircuit>(
        &mut reader,
-        halo2_proofs::SerdeFormat::RawBytes,
+        halo2_proofs::SerdeFormat::RawBytesUnchecked,
        circuit_settings,
    )
    .map_err(|e| EZKLError::InternalError(format!("Failed to deserialize verifying key: {}", e)))?;
@@ -504,7 +505,7 @@ pub(crate) fn pk_validation(pk: Vec<u8>, settings: Vec<u8>) -> Result<bool, EZKL
    let mut reader = BufReader::new(&pk[..]);
    let _ = ProvingKey::<G1Affine>::read::<_, GraphCircuit>(
        &mut reader,
-        halo2_proofs::SerdeFormat::RawBytes,
+        halo2_proofs::SerdeFormat::RawBytesUnchecked,
        circuit_settings,
    )
    .map_err(|e| EZKLError::InternalError(format!("Failed to deserialize proving key: {}", e)))?;
--- a/src/eth.rs
+++ b/src/eth.rs
@@ -488,7 +488,8 @@ pub async fn deploy_da_verifier_via_solidity(
        }
    }

-    let contract = match call_to_account {
+    
+    match call_to_account {
        Some(call) => {
            deploy_single_da_contract(
                client,
@@ -514,8 +515,7 @@ pub async fn deploy_da_verifier_via_solidity(
            )
            .await
        }
-    };
-    return contract;
+    }
 }

 async fn deploy_multi_da_contract(
@@ -630,7 +630,7 @@ async fn deploy_single_da_contract(
            // bytes memory _callData,
            PackedSeqToken(call_data.as_ref()),
            // uint256 _decimals,
-            WordToken(B256::from(decimals).into()),
+            WordToken(B256::from(decimals)),
            // uint[] memory _scales,
            DynSeqToken(
                scales
--- a/src/graph/utilities.rs
+++ b/src/graph/utilities.rs
@@ -142,8 +142,6 @@ use tract_onnx::prelude::SymbolValues;
 pub fn extract_tensor_value(
    input: Arc<tract_onnx::prelude::Tensor>,
 ) -> Result<Tensor<f32>, GraphError> {
-    use maybe_rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
-
    let dt = input.datum_type();
    let dims = input.shape().to_vec();

@@ -156,7 +154,7 @@ pub fn extract_tensor_value(
    match dt {
        DatumType::F16 => {
            let vec = input.as_slice::<tract_onnx::prelude::f16>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| (*x).into()).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| (*x).into()).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::F32 => {
@@ -165,61 +163,61 @@ pub fn extract_tensor_value(
        }
        DatumType::F64 => {
            let vec = input.as_slice::<f64>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::I64 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<i64>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::I32 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<i32>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::I16 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<i16>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::I8 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<i8>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::U8 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<u8>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::U16 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<u16>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::U32 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<u32>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::U64 => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<u64>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::Bool => {
            // Generally a shape or hyperparam
            let vec = input.as_slice::<bool>()?.to_vec();
-            let cast: Vec<f32> = vec.par_iter().map(|x| *x as usize as f32).collect();
+            let cast: Vec<f32> = vec.iter().map(|x| *x as usize as f32).collect();
            const_value = Tensor::<f32>::new(Some(&cast), &dims)?;
        }
        DatumType::TDim => {
@@ -227,7 +225,7 @@ pub fn extract_tensor_value(
            let vec = input.as_slice::<tract_onnx::prelude::TDim>()?.to_vec();

            let cast: Result<Vec<f32>, GraphError> = vec
-                .par_iter()
+                .iter()
                .map(|x| match x.to_i64() {
                    Ok(v) => Ok(v as f32),
                    Err(_) => match x.to_i64() {
@@ -1136,23 +1134,21 @@ pub fn new_op_from_onnx(
                        a: crate::circuit::utils::F32(exponent),
                    })
                }
-            } else {
-                if let Some(c) = inputs[0].opkind().get_mutable_constant() {
-                    inputs[0].decrement_use();
-                    deleted_indices.push(0);
-                    if c.raw_values.len() > 1 {
-                        unimplemented!("only support scalar base")
-                    }
-
-                    let base = c.raw_values[0];
-
-                    SupportedOp::Nonlinear(LookupOp::Exp {
-                        scale: scale_to_multiplier(input_scales[1]).into(),
-                        base: base.into(),
-                    })
-                } else {
-                    unimplemented!("only support constant base or pow for now")
+            } else if let Some(c) = inputs[0].opkind().get_mutable_constant() {
+                inputs[0].decrement_use();
+                deleted_indices.push(0);
+                if c.raw_values.len() > 1 {
+                    unimplemented!("only support scalar base")
                }
+
+                let base = c.raw_values[0];
+
+                SupportedOp::Nonlinear(LookupOp::Exp {
+                    scale: scale_to_multiplier(input_scales[1]).into(),
+                    base: base.into(),
+                })
+            } else {
+                unimplemented!("only support constant base or pow for now")
            }
        }
        "Div" => {
--- a/src/tensor/mod.rs
+++ b/src/tensor/mod.rs
@@ -638,42 +638,44 @@ impl<T: Clone + TensorType> Tensor<T> {
    where
        T: Send + Sync,
    {
-        if indices.is_empty() {
+        // Fast path: empty indices or full tensor slice
+        if indices.is_empty()
+            || indices.iter().map(|x| x.end - x.start).collect::<Vec<_>>() == self.dims
+        {
            return Ok(self.clone());
        }
+
+        // Validate dimensions
        if self.dims.len() < indices.len() {
            return Err(TensorError::DimError(format!(
                "The dimensionality of the slice {:?} is greater than the tensor's {:?}",
                indices, self.dims
            )));
-        } else if indices.iter().map(|x| x.end - x.start).collect::<Vec<_>>() == self.dims {
-            // else if slice is the same as dims, return self
-            return Ok(self.clone());
        }

-        // if indices weren't specified we fill them in as required
-        let mut full_indices = indices.to_vec();
+        // Pre-allocate the full indices vector with capacity
+        let mut full_indices = Vec::with_capacity(self.dims.len());
+        full_indices.extend_from_slice(indices);

-        for i in 0..(self.dims.len() - indices.len()) {
-            full_indices.push(0..self.dims()[indices.len() + i])
-        }
+        // Fill remaining dimensions
+        full_indices.extend((indices.len()..self.dims.len()).map(|i| 0..self.dims[i]));

-        let cartesian_coord: Vec<Vec<usize>> = full_indices
+        // Pre-calculate total size and allocate result vector
+        let total_size: usize = full_indices
            .iter()
-            .cloned()
-            .multi_cartesian_product()
-            .collect();
-
-        let res: Vec<T> = cartesian_coord
-            .par_iter()
-            .map(|e| {
-                let index = self.get_index(e);
-                self[index].clone()
-            })
-            .collect();
+            .map(|range| range.end - range.start)
+            .product();
+        let mut res = Vec::with_capacity(total_size);

+        // Calculate new dimensions once
        let dims: Vec<usize> = full_indices.iter().map(|e| e.end - e.start).collect();

+        // Use iterator directly without collecting into intermediate Vec
+        for coord in full_indices.iter().cloned().multi_cartesian_product() {
+            let index = self.get_index(&coord);
+            res.push(self[index].clone());
+        }
+
        Tensor::new(Some(&res), &dims)
    }

--- a/src/tensor/val.rs
+++ b/src/tensor/val.rs
@@ -1,12 +1,12 @@
 use crate::{circuit::region::ConstantsMap, fieldutils::felt_to_integer_rep};
-use maybe_rayon::slice::Iter;
+use maybe_rayon::slice::{Iter, ParallelSlice};

 use super::{
    ops::{intercalate_values, pad, resize},
    *,
 };
 use halo2_proofs::{arithmetic::Field, circuit::Cell, plonk::Instance};
-use maybe_rayon::iter::{FilterMap, IntoParallelIterator, ParallelIterator};
+use maybe_rayon::iter::{FilterMap, ParallelIterator};

 pub(crate) fn create_constant_tensor<
    F: PrimeField + TensorType + std::marker::Send + std::marker::Sync + PartialOrd,
@@ -455,7 +455,7 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
        }
    }

-    /// Returns the number of constants in the [ValTensor].
+    /// Returns an iterator over the [ValTensor]'s constants.
    pub fn create_constants_map_iterator(
        &self,
    ) -> FilterMap<Iter<'_, ValType<F>>, fn(&ValType<F>) -> Option<(F, ValType<F>)>> {
@@ -473,20 +473,48 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {
        }
    }

-    /// Returns the number of constants in the [ValTensor].
+    /// Returns a map of the constants in the [ValTensor].
    pub fn create_constants_map(&self) -> ConstantsMap<F> {
-        match self {
-            ValTensor::Value { inner, .. } => inner
-                .par_iter()
-                .filter_map(|x| {
-                    if let ValType::Constant(v) = x {
-                        Some((*v, x.clone()))
-                    } else {
-                        None
-                    }
-                })
-                .collect(),
-            ValTensor::Instance { .. } => ConstantsMap::new(),
+        let threshold = 1_000_000; // Tuned using the benchmarks
+
+        if self.len() < threshold {
+            match self {
+                ValTensor::Value { inner, .. } => inner
+                    .par_iter()
+                    .filter_map(|x| {
+                        if let ValType::Constant(v) = x {
+                            Some((*v, x.clone()))
+                        } else {
+                            None
+                        }
+                    })
+                    .collect(),
+                ValTensor::Instance { .. } => ConstantsMap::new(),
+            }
+        } else {
+            // Use parallel for larger arrays
+            let num_cores = std::thread::available_parallelism()
+                .map(|n| n.get())
+                .unwrap_or(1);
+            let chunk_size = (self.len() / num_cores).max(100_000);
+
+            match self {
+                ValTensor::Value { inner, .. } => inner
+                    .par_chunks(chunk_size)
+                    .flat_map(|chunk| {
+                        chunk
+                            .par_iter() // Make sure we use par_iter() here
+                            .filter_map(|x| {
+                                if let ValType::Constant(v) = x {
+                                    Some((*v, x.clone()))
+                                } else {
+                                    None
+                                }
+                            })
+                    })
+                    .collect(),
+                ValTensor::Instance { .. } => ConstantsMap::new(),
+            }
        }
    }

@@ -878,70 +906,161 @@ impl<F: PrimeField + TensorType + PartialOrd + std::hash::Hash> ValTensor<F> {

    /// remove constant zero values constants
    pub fn remove_const_zero_values(&mut self) {
-        match self {
-            ValTensor::Value { inner: v, dims, .. } => {
-                *v = v
-                    .clone()
-                    .into_par_iter()
-                    .filter_map(|e| {
-                        if let ValType::Constant(r) = e {
-                            if r == F::ZERO {
-                                return None;
+        let size_threshold = 1_000_000; // Tuned using the benchmarks
+
+        if self.len() < size_threshold {
+            match self {
+                ValTensor::Value { inner: v, dims, .. } => {
+                    *v = v
+                        .clone()
+                        .into_iter()
+                        .filter_map(|e| {
+                            if let ValType::Constant(r) = e {
+                                if r == F::ZERO {
+                                    return None;
+                                }
+                            } else if let ValType::AssignedConstant(_, r) = e {
+                                if r == F::ZERO {
+                                    return None;
+                                }
                            }
-                        } else if let ValType::AssignedConstant(_, r) = e {
-                            if r == F::ZERO {
-                                return None;
-                            }
-                        }
-                        Some(e)
-                    })
-                    .collect();
-                *dims = v.dims().to_vec();
+                            Some(e)
+                        })
+                        .collect();
+                    *dims = v.dims().to_vec();
+                }
+                ValTensor::Instance { .. } => {}
+            }
+        } else {
+            // Use parallel for larger arrays
+            let num_cores = std::thread::available_parallelism()
+                .map(|n| n.get())
+                .unwrap_or(1);
+            let chunk_size = (self.len() / num_cores).max(100_000);
+
+            match self {
+                ValTensor::Value { inner: v, dims, .. } => {
+                    *v = v
+                        .par_chunks_mut(chunk_size)
+                        .flat_map(|chunk| {
+                            chunk
+                                .par_iter_mut() // Make sure we use par_iter() here
+                                .filter_map(|e| {
+                                    if let ValType::Constant(r) = e {
+                                        if *r == F::ZERO {
+                                            return None;
+                                        }
+                                    } else if let ValType::AssignedConstant(_, r) = e {
+                                        if *r == F::ZERO {
+                                            return None;
+                                        }
+                                    }
+                                    Some(e.clone())
+                                })
+                        })
+                        .collect();
+                    *dims = v.dims().to_vec();
+                }
+                ValTensor::Instance { .. } => {}
            }
-            ValTensor::Instance { .. } => {}
        }
    }

-    /// gets constants
+    /// filter constant zero values constants
    pub fn get_const_zero_indices(&self) -> Vec<usize> {
-        match self {
-            ValTensor::Value { inner: v, .. } => v
-                .par_iter()
-                .enumerate()
-                .filter_map(|(i, e)| {
-                    if let ValType::Constant(r) = e {
-                        if *r == F::ZERO {
-                            return Some(i);
+        let size_threshold = 1_000_000; // Tuned using the benchmarks
+
+        if self.len() < size_threshold {
+            // Use single-threaded for smaller arrays
+            match &self {
+                ValTensor::Value { inner: v, .. } => v
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, e)| {
+                        match e {
+                            // Combine both match arms to reduce branching
+                            ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
+                                (*r == F::ZERO).then_some(i)
+                            }
+                            _ => None,
                        }
-                    } else if let ValType::AssignedConstant(_, r) = e {
-                        if *r == F::ZERO {
-                            return Some(i);
-                        }
-                    }
-                    None
-                })
-                .collect(),
-            ValTensor::Instance { .. } => vec![],
+                    })
+                    .collect(),
+                ValTensor::Instance { .. } => vec![],
+            }
+        } else {
+            // Use parallel for larger arrays
+            let num_cores = std::thread::available_parallelism()
+                .map(|n| n.get())
+                .unwrap_or(1);
+            let chunk_size = (self.len() / num_cores).max(100_000);
+
+            match &self {
+                ValTensor::Value { inner: v, .. } => v
+                    .par_chunks(chunk_size)
+                    .enumerate()
+                    .flat_map(|(chunk_idx, chunk)| {
+                        chunk
+                            .par_iter() // Make sure we use par_iter() here
+                            .enumerate()
+                            .filter_map(move |(i, e)| match e {
+                                ValType::Constant(r) | ValType::AssignedConstant(_, r) => {
+                                    (*r == F::ZERO).then_some(chunk_idx * chunk_size + i)
+                                }
+                                _ => None,
+                            })
+                    })
+                    .collect::<Vec<_>>(),
+                ValTensor::Instance { .. } => vec![],
+            }
        }
    }

-    /// gets constants
+    /// gets constant indices
    pub fn get_const_indices(&self) -> Vec<usize> {
-        match self {
-            ValTensor::Value { inner: v, .. } => v
-                .par_iter()
-                .enumerate()
-                .filter_map(|(i, e)| {
-                    if let ValType::Constant(_) = e {
-                        Some(i)
-                    } else if let ValType::AssignedConstant(_, _) = e {
-                        Some(i)
-                    } else {
-                        None
-                    }
-                })
-                .collect(),
-            ValTensor::Instance { .. } => vec![],
+        let size_threshold = 1_000_000; // Tuned using the benchmarks
+
+        if self.len() < size_threshold {
+            // Use single-threaded for smaller arrays
+            match &self {
+                ValTensor::Value { inner: v, .. } => v
+                    .iter()
+                    .enumerate()
+                    .filter_map(|(i, e)| {
+                        match e {
+                            // Combine both match arms to reduce branching
+                            ValType::Constant(_) | ValType::AssignedConstant(_, _) => Some(i),
+                            _ => None,
+                        }
+                    })
+                    .collect(),
+                ValTensor::Instance { .. } => vec![],
+            }
+        } else {
+            // Use parallel for larger arrays
+            let num_cores = std::thread::available_parallelism()
+                .map(|n| n.get())
+                .unwrap_or(1);
+            let chunk_size = (self.len() / num_cores).max(100_000);
+
+            match &self {
+                ValTensor::Value { inner: v, .. } => v
+                    .par_chunks(chunk_size)
+                    .enumerate()
+                    .flat_map(|(chunk_idx, chunk)| {
+                        chunk
+                            .par_iter() // Make sure we use par_iter() here
+                            .enumerate()
+                            .filter_map(move |(i, e)| match e {
+                                ValType::Constant(_) | ValType::AssignedConstant(_, _) => {
+                                    Some(chunk_idx * chunk_size + i)
+                                }
+                                _ => None,
+                            })
+                    })
+                    .collect::<Vec<_>>(),
+                ValTensor::Instance { .. } => vec![],
+            }
        }
    }

--- a/tests/py_integration_tests.rs
+++ b/tests/py_integration_tests.rs
@@ -68,6 +68,8 @@ mod py_tests {
                    "install",
                    "torch-geometric==2.5.2",
                    "torch==2.2.2",
+                    "datasets==3.2.0",
+                    "torchtext==0.17.2",
                    "torchvision==0.17.2",
                    "pandas==2.2.1",
                    "numpy==1.26.4",
@@ -190,6 +192,16 @@ mod py_tests {
            }
            });

+            #[test]
+            fn neural_bag_of_words_notebook() {
+                crate::py_tests::init_binary();
+                let test_dir: TempDir = TempDir::new("neural_bow").unwrap();
+                let path = test_dir.path().to_str().unwrap();
+                crate::py_tests::mv_test_(path, "neural_bow.ipynb");
+                run_notebook(path, "neural_bow.ipynb");
+                test_dir.close().unwrap();
+            }
+
            #[test]
            fn felt_conversion_test_notebook() {
                crate::py_tests::init_binary();
Author	SHA1	Message	Date
dante	d48d0b0b3e	fix: `get_slice` should not use intermediate `Vec` (#894 )	2024-12-27 23:26:22 -05:00
Jseam	8b223354cc	fix: add version string and sed (#893 )	2024-12-27 14:24:28 -05:00
dante	caa6ef8e16	fix: const filtering strat is size dependent (#891 )	2024-12-27 09:43:59 -05:00
Artem	c4354c10a5	fix: ios bindings update action (#886 )	2024-12-16 10:49:13 -05:00
dante	c1ce8c88d0	chore: rm wasm serialization checks (#890 )	2024-12-12 22:20:29 -05:00
dante	876a9584a1	chore: optimize wasm bundle for speed over size (#889 )	2024-12-12 15:35:17 -05:00
dante	7d7f049cc4	chore: neural bag of words example (#888 )	2024-12-12 14:20:21 -05:00