refactor

2026-01-13 01:17:57 -05:00 · 2024-02-28 11:41:09 -04:00 · 2024-02-28 11:37:25 -04:00 · 2024-02-28 11:29:29 -04:00 · 2024-02-28 11:27:33 -04:00 · 2024-02-28 11:19:59 -04:00
57 changed files with 519 additions and 1506 deletions
--- a/.github/changed-files.yml
+++ b/.github/changed-files.yml
@@ -1,7 +1,7 @@
 golang:
-  - wrappers/golang/**/*.go
-  - wrappers/golang/**/*.h
-  - wrappers/golang/**/*.tmpl
+  - wrappers/golang/**/*.go'
+  - wrappers/golang/**/*.h'
+  - wrappers/golang/**/*.tmpl'
  - go.mod
 rust:
  - wrappers/rust
--- a/.github/workflows/check-changed-files.yml
+++ b/.github/workflows/check-changed-files.yml
@@ -1,39 +0,0 @@
-name: Check Changed Files
-
-on:
-  workflow_call:
-    outputs:
-      golang:
-        description: "Flag for if GoLang files changed"
-        value: ${{ jobs.check-changed-files.outputs.golang }}
-      rust:
-        description: "Flag for if Rust files changed"
-        value: ${{ jobs.check-changed-files.outputs.rust }}
-      cpp_cuda:
-        description: "Flag for if C++/CUDA files changed"
-        value: ${{ jobs.check-changed-files.outputs.cpp_cuda }}
-
-jobs:
-  check-changed-files:
-    name: Check Changed Files
-    runs-on: ubuntu-22.04
-    outputs:
-      golang: ${{ steps.changed_files.outputs.golang }}
-      rust: ${{ steps.changed_files.outputs.rust }}
-      cpp_cuda: ${{ steps.changed_files.outputs.cpp_cuda }}
-    steps:
-    - name: Checkout Repo
-      uses: actions/checkout@v4
-    - name: Get all changed files
-      id: changed-files-yaml
-      uses: tj-actions/changed-files@v39
-      # https://github.com/tj-actions/changed-files#input_files_yaml_from_source_file
-      with:
-        files_yaml_from_source_file: .github/changed-files.yml
-    - name: Run Changed Files script
-      id: changed_files
-      # https://github.com/tj-actions/changed-files#outputs-
-      run: |
-        echo "golang=${{ steps.changed-files-yaml.outputs.golang_any_modified }}" >> "$GITHUB_OUTPUT"
-        echo "rust=${{ steps.changed-files-yaml.outputs.rust_any_modified }}" >> "$GITHUB_OUTPUT"
-        echo "cpp_cuda=${{ steps.changed-files-yaml.outputs.cpp_any_modified }}" >> "$GITHUB_OUTPUT"
--- a/.github/workflows/codespell.yml
+++ b/.github/workflows/codespell.yml
@@ -11,7 +11,7 @@ jobs:
    name: Check Spelling
    runs-on: ubuntu-latest
    steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v3
      - uses: codespell-project/actions-codespell@v2
        with:
          # https://github.com/codespell-project/actions-codespell?tab=readme-ov-file#parameter-skip
--- a/.github/workflows/cpp_cuda.yml
+++ b/.github/workflows/cpp_cuda.yml
@@ -1,52 +0,0 @@
-name: C++/CUDA
-
-on:
-  pull_request:
-    branches:
-      - main
-      - dev
-  push:
-    branches:
-      - main
-      - dev
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check-changed-files:
-    uses: ./.github/workflows/check-changed-files.yml
-
-  check-format:
-    name: Check Code Format
-    runs-on: ubuntu-22.04
-    needs: check-changed-files
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v4
-    - name: Check clang-format
-      if: needs.check-changed-files.outputs.cpp_cuda == 'true'
-      run: if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]]; then echo "Please run clang-format"; exit 1; fi
-
-  test-linux:
-    name: Test on Linux
-    runs-on: [self-hosted, Linux, X64, icicle]
-    needs: [check-changed-files, check-format]
-    strategy:
-      matrix:
-        curve: [bn254, bls12_381, bls12_377, bw6_761]
-    steps:
-    - name: Checkout Repo
-      uses: actions/checkout@v4
-    - name: Build
-      working-directory: ./icicle
-      if: needs.check-changed-files.outputs.cpp_cuda == 'true'
-      run: |
-        mkdir -p build
-        cmake -DBUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release -DCURVE=${{ matrix.curve }} -DG2_DEFINED=ON -S . -B build
-        cmake --build build
-    - name: Run C++ Tests
-      working-directory: ./icicle/build
-      if: needs.check-changed-files.outputs.cpp_cuda == 'true'
-      run: ctest
--- a/.github/workflows/examples.yml
+++ b/.github/workflows/examples.yml
@@ -21,19 +21,14 @@ concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: true

-jobs:
-  check-changed-files:
-    uses: ./.github/workflows/check-changed-files.yml
-
-  run-examples:
+jobs:  
+  test-examples:
    runs-on: [self-hosted, Linux, X64, icicle, examples]
-    needs: check-changed-files
    steps:
    - name: Checkout
-      uses: actions/checkout@v4
+      uses: actions/checkout@v2
    - name: c++ examples
      working-directory: ./examples/c++
-      if: needs.check-changed-files.outputs.cpp_cuda == 'true'
      run: |
        # loop over all directories in the current directory
        for dir in $(find . -mindepth 1 -maxdepth 1 -type d); do
@@ -47,7 +42,6 @@ jobs:
        done    
    - name: Rust examples
      working-directory: ./examples/rust
-      if: needs.check-changed-files.outputs.rust == 'true'
      run: |
        # loop over all directories in the current directory
        for dir in $(find . -mindepth 1 -maxdepth 1 -type d); do
--- a/.github/workflows/golang.yml
+++ b/.github/workflows/golang.yml
@@ -1,103 +0,0 @@
-name: GoLang
-
-on:
-  pull_request:
-    branches:
-      - main
-      - dev
-  push:
-    branches:
-      - main
-      - dev
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check-changed-files:
-    uses: ./.github/workflows/check-changed-files.yml
-
-  check-format:
-    name: Check Code Format
-    runs-on: ubuntu-22.04
-    needs: check-changed-files
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v4
-    - name: Check gofmt
-      if: needs.check-changed-files.outputs.golang == 'true'
-      run: if [[ $(go list ./... | xargs go fmt) ]]; then echo "Please run go fmt"; exit 1; fi
-
-  build-linux:
-    name: Build on Linux
-    runs-on: [self-hosted, Linux, X64, icicle]
-    needs: [check-changed-files, check-format]
-    strategy:
-      matrix:
-        curve: [bn254, bls12_381, bls12_377, bw6_761]
-    steps:
-    - name: Checkout Repo
-      uses: actions/checkout@v4
-    - name: Build
-      working-directory: ./wrappers/golang
-      if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      run: ./build.sh ${{ matrix.curve }} ON # builds a single curve with G2 enabled
-    - name: Upload ICICLE lib artifacts
-      uses: actions/upload-artifact@v4
-      if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      with:
-        name: icicle-builds-${{ matrix.curve }}-${{ github.workflow }}-${{ github.sha }}
-        path: icicle/build/libingo_${{ matrix.curve }}.a
-        retention-days: 1
-  
-  test-linux:
-    name: Test on Linux
-    runs-on: [self-hosted, Linux, X64, icicle]
-    needs: [check-changed-files, build-linux]
-    steps:
-    - name: Checkout Repo
-      uses: actions/checkout@v4
-    - name: Download ICICLE lib artifacts
-      uses: actions/download-artifact@v4
-      if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      with:
-        path: ./icicle/build/
-        merge-multiple: true
-    - name: Run Tests
-      working-directory: ./wrappers/golang
-      if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      # -count ensures the test results are not cached
-      # -p controls the number of programs that can be run in parallel
-      run: |
-        export CPATH=$CPATH:/usr/local/cuda/include
-        go test --tags=g2 ./... -count=1 -failfast -p 2 -timeout 60m
-  
-  # TODO: bw6 on windows requires more memory than the standard runner has
-  # Add a large runner and then enable this job
-  # build-windows:
-  #   name: Build on Windows
-  #   runs-on: windows-2022
-  #   needs: [check-changed-files, check-format]
-  #   strategy:
-  #     matrix:
-  #       curve: [bn254, bls12_381, bls12_377, bw6_761]
-  #   steps:     
-  #   - name: Checkout Repo
-  #     uses: actions/checkout@v4
-  #   - name: Download and Install Cuda
-  #     if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-  #     id: cuda-toolkit
-  #     uses: Jimver/cuda-toolkit@v0.2.11
-  #     with:
-  #       cuda: '12.0.0'
-  #       method: 'network'
-  #       # https://docs.nvidia.com/cuda/archive/12.0.0/cuda-installation-guide-microsoft-windows/index.html
-  #       sub-packages: '["cudart", "nvcc", "thrust", "visual_studio_integration"]'
-  #   - name: Build libs
-  #     if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-  #     working-directory: ./wrappers/golang
-  #     env:
-  #       CUDA_PATH: ${{ steps.cuda-toolkit.outputs.CUDA_PATH }}
-  #     shell: pwsh
-  #     run: ./build.ps1 ${{ matrix.curve }} ON # builds a single curve with G2 enabled
--- a/.github/workflows/main-build.yml
+++ b/.github/workflows/main-build.yml
@@ -0,0 +1,119 @@
+name: Build
+
+on:
+  pull_request:
+    branches:
+      - main
+      - dev
+  push:
+    branches:
+      - main
+      - dev
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+  ARCH_TYPE: native
+
+jobs:
+  check-changed-files:
+    name: Check Changed Files
+    runs-on: ubuntu-22.04
+    outputs:
+      golang: ${{ steps.changed_files.outputs.golang }}
+      rust: ${{ steps.changed_files.outputs.rust }}
+      cpp_cuda: ${{ steps.changed_files.outputs.cpp_cuda }}
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Get all changed files
+      id: changed-files-yaml
+      uses: tj-actions/changed-files@v39
+      # https://github.com/tj-actions/changed-files#input_files_yaml_from_source_file
+      with:
+        files_yaml_from_source_file: .github/changed-files.yml
+    - name: Run Changed Files script
+      id: changed_files
+      # https://github.com/tj-actions/changed-files#outputs-
+      run: |
+        echo "golang=${{ steps.changed-files-yaml.outputs.golang_any_modified }}" >> "$GITHUB_OUTPUT"
+        echo "rust=${{ steps.changed-files-yaml.outputs.rust_any_modified }}" >> "$GITHUB_OUTPUT"
+        echo "cpp_cuda=${{ steps.changed-files-yaml.outputs.cpp_any_modified }}" >> "$GITHUB_OUTPUT"
+
+  build-rust-linux:
+    name: Build Rust on Linux
+    runs-on: [self-hosted, Linux, X64, icicle]
+    needs: check-changed-files
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Build Rust
+      working-directory: ./wrappers/rust
+      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+      # Building from the root workspace will build all members of the workspace by default
+      run: cargo build --release --verbose
+
+  build-rust-windows:
+    name: Build Rust on Windows
+    runs-on: windows-2022
+    needs: check-changed-files
+    steps:     
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Download and Install Cuda
+      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+      id: cuda-toolkit
+      uses: Jimver/cuda-toolkit@v0.2.11
+      with:
+        cuda: '12.0.0'
+        method: 'network'
+        # https://docs.nvidia.com/cuda/archive/12.0.0/cuda-installation-guide-microsoft-windows/index.html
+        sub-packages: '["cudart", "nvcc", "thrust", "visual_studio_integration"]'
+    - name: Build Rust Targets
+      working-directory: ./wrappers/rust
+      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+      env:
+        CUDA_PATH: ${{ steps.cuda-toolkit.outputs.CUDA_PATH }}
+      # Building from the root workspace will build all members of the workspace by default
+      run: cargo build --release --verbose
+
+  build-golang-linux:
+    name: Build Golang on Linux
+    runs-on: [self-hosted, Linux, X64, icicle]
+    needs: check-changed-files
+    strategy:
+      matrix:
+        curve: [bn254, bls12_381, bls12_377, bw6_761]
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Build CUDA libs
+      if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+      working-directory: ./wrappers/golang
+      run: |
+        export CPATH=$CPATH:/usr/local/cuda/include
+        ./build.sh ${{ matrix.curve }} ON
+
+  # TODO: Add once Golang make file supports building for Windows
+  # build-golang-windows:
+  #   name: Build Golang on Windows
+  #   runs-on: windows-2022
+  #   needs: check-changed-files
+  #   steps:     
+  #   - name: Checkout Repo
+  #     uses: actions/checkout@v3
+  #   - name: Download and Install Cuda
+  #     if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+  #     uses: Jimver/cuda-toolkit@v0.2.11
+  #     with:
+  #       cuda: '12.0.0'
+  #       method: 'network'
+  #       # https://docs.nvidia.com/cuda/archive/12.0.0/cuda-installation-guide-microsoft-windows/index.html
+  #       sub-packages: '["cudart", "nvcc", "thrust"]'
+  #   - name: Build cpp libs
+  #     if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+  #     run: make all
+  #     working-directory: ./goicicle
--- a/.github/workflows/main-format.yml
+++ b/.github/workflows/main-format.yml
@@ -0,0 +1,47 @@
+name: Format
+
+on:
+  pull_request:
+    branches:
+      - main
+      - dev
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  formatting-rust:
+    name: Check Rust Code Formatting
+    runs-on: ubuntu-22.04
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+    - name: Check rustfmt
+      working-directory: ./wrappers/rust
+      # "-name tagret -prune" removes searching in any directory named "target"
+      # Formatting by single file is necessary due to generated files not being present
+      # before building the project.
+      # e.g. icicle-cuda-runtime/src/bindings.rs is generated and icicle-cuda-runtime/src/lib.rs includes that module
+      # causing rustfmt to fail.
+      run: if [[ $(find . -name target -prune -o -iname *.rs -print | xargs cargo fmt --check --) ]]; then echo "Please run cargo fmt"; exit 1; fi
+    # - name: Check clippy
+    #   run: cargo clippy --no-deps --all-features --all-targets
+
+  formatting-golang:
+    name: Check Golang Code Formatting
+    runs-on: ubuntu-22.04
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+    - name: Check gofmt
+      run: if [[ $(go list ./... | xargs go fmt) ]]; then echo "Please run go fmt"; exit 1; fi
+
+  formatting-cpp-cuda:
+    name: Check C++/CUDA Code Formatting
+    runs-on: ubuntu-22.04
+    steps:
+    - name: Checkout
+      uses: actions/checkout@v3
+    - name: Check clang-format
+      run: if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]]; then echo "Please run clang-format"; exit 1; fi
--- a/.github/workflows/main-test.yml
+++ b/.github/workflows/main-test.yml
@@ -0,0 +1,99 @@
+name: Test
+
+on:
+  pull_request:
+    branches:
+      - main
+      - dev
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+env:
+  CARGO_TERM_COLOR: always
+  ARCH_TYPE: native
+
+jobs:
+  check-changed-files:
+    name: Check Changed Files
+    runs-on: ubuntu-22.04
+    outputs:
+      golang: ${{ steps.changed_files.outputs.golang }}
+      rust: ${{ steps.changed_files.outputs.rust }}
+      cpp_cuda: ${{ steps.changed_files.outputs.cpp_cuda }}
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Get all changed files
+      id: changed-files-yaml
+      uses: tj-actions/changed-files@v39
+      # https://github.com/tj-actions/changed-files#input_files_yaml_from_source_file
+      with:
+        files_yaml_from_source_file: .github/changed-files.yml
+    - name: Run Changed Files script
+      id: changed_files
+      # https://github.com/tj-actions/changed-files#outputs-
+      run: |
+        echo "golang=${{ steps.changed-files-yaml.outputs.golang_any_modified }}" >> "$GITHUB_OUTPUT"
+        echo "rust=${{ steps.changed-files-yaml.outputs.rust_any_modified }}" >> "$GITHUB_OUTPUT"
+        echo "cpp_cuda=${{ steps.changed-files-yaml.outputs.cpp_any_modified }}" >> "$GITHUB_OUTPUT"
+
+  test-rust-linux:
+    name: Test Rust on Linux
+    runs-on: [self-hosted, Linux, X64, icicle]
+    needs: check-changed-files
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Run Rust Tests
+      working-directory: ./wrappers/rust
+      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+      # Running tests from the root workspace will run all workspace members' tests by default
+      # We need to limit the number of threads to avoid running out of memory on weaker machines
+      run: cargo test --release --verbose --features=g2 -- --test-threads=2
+
+  test-cpp-linux:
+    name: Test C++ on Linux
+    runs-on: [self-hosted, Linux, X64, icicle]
+    needs: check-changed-files
+    strategy:
+      matrix:
+        curve: [bn254, bls12_381, bls12_377, bw6_761]
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Build C++
+      working-directory: ./icicle
+      if: needs.check-changed-files.outputs.cpp_cuda == 'true'
+      run: |
+        mkdir -p build
+        cmake -DBUILD_TESTS=ON -DCMAKE_BUILD_TYPE=Release -DCURVE=${{ matrix.curve }} -S . -B build
+        cmake --build build
+    - name: Run C++ Tests
+      working-directory: ./icicle/build
+      if: needs.check-changed-files.outputs.cpp_cuda == 'true'
+      run: ctest
+  
+  test-golang-linux:
+    name: Test Golang on Linux
+    runs-on: [self-hosted, Linux, X64, icicle]
+    needs: check-changed-files
+    # strategy:
+    #   matrix:
+    #     curve: [bn254, bls12_381, bls12_377, bw6_761]
+    steps:
+    - name: Checkout Repo
+      uses: actions/checkout@v3
+    - name: Build CUDA libs
+      working-directory: ./wrappers/golang
+      if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+      # builds all curves with g2 ON
+      run: |
+        export CPATH=$CPATH:/usr/local/cuda/include
+        ./build.sh all ON
+    - name: Run Golang Tests
+      if: needs.check-changed-files.outputs.golang == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
+      run: |
+        export CPATH=$CPATH:/usr/local/cuda/include
+        go test --tags=g2 ./... -count=1 -timeout 60m
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -1,87 +0,0 @@
-name: Rust
-
-on:
-  pull_request:
-    branches:
-      - main
-      - dev
-  push:
-    branches:
-      - main
-      - dev
-
-concurrency:
-  group: ${{ github.workflow }}-${{ github.ref }}
-  cancel-in-progress: true
-
-jobs:
-  check-changed-files:
-    uses: ./.github/workflows/check-changed-files.yml
-
-  check-format:
-    name: Check Code Format
-    runs-on: ubuntu-22.04
-    needs: check-changed-files
-    steps:
-    - name: Checkout
-      uses: actions/checkout@v4
-    - name: Check rustfmt
-      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      working-directory: ./wrappers/rust
-      # "-name target -prune" removes searching in any directory named "target"
-      # Formatting by single file is necessary due to generated files not being present
-      # before building the project.
-      # e.g. icicle-cuda-runtime/src/bindings.rs is generated and icicle-cuda-runtime/src/lib.rs includes that module
-      # causing rustfmt to fail.
-      run: if [[ $(find . -name target -prune -o -iname *.rs -print | xargs cargo fmt --check --) ]]; then echo "Please run cargo fmt"; exit 1; fi
-
-  build-linux:
-    name: Build on Linux
-    runs-on: [self-hosted, Linux, X64, icicle]
-    needs: [check-changed-files, check-format]
-    steps:
-    - name: Checkout Repo
-      uses: actions/checkout@v4
-    - name: Build
-      working-directory: ./wrappers/rust
-      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      # Building from the root workspace will build all members of the workspace by default
-      run: cargo build --release --verbose
-  
-  test-linux:
-    name: Test on Linux
-    runs-on: [self-hosted, Linux, X64, icicle]
-    needs: [check-changed-files, build-linux]
-    steps:
-    - name: Checkout Repo
-      uses: actions/checkout@v4
-    - name: Run tests
-      working-directory: ./wrappers/rust
-      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      # Running tests from the root workspace will run all workspace members' tests by default
-      # We need to limit the number of threads to avoid running out of memory on weaker machines
-      run: cargo test --release --verbose --features=g2 -- --test-threads=2
-
-  build-windows:
-    name: Build on Windows
-    runs-on: windows-2022
-    needs: check-changed-files
-    steps:     
-    - name: Checkout Repo
-      uses: actions/checkout@v4
-    - name: Download and Install Cuda
-      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      id: cuda-toolkit
-      uses: Jimver/cuda-toolkit@v0.2.11
-      with:
-        cuda: '12.0.0'
-        method: 'network'
-        # https://docs.nvidia.com/cuda/archive/12.0.0/cuda-installation-guide-microsoft-windows/index.html
-        sub-packages: '["cudart", "nvcc", "thrust", "visual_studio_integration"]'
-    - name: Build targets
-      working-directory: ./wrappers/rust
-      if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
-      env:
-        CUDA_PATH: ${{ steps.cuda-toolkit.outputs.CUDA_PATH }}
-      # Building from the root workspace will build all members of the workspace by default
-      run: cargo build --release --verbose
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # ICICLE

-<div align="center">ICICLE is a library for ZK acceleration using CUDA-enabled GPUs.</div>
+**<div align="center">ICICLE is a library for ZK acceleration using CUDA-enabled GPUs.</div>**

 <p align="center">
  <img alt="ICICLE" width="300" height="300" src="https://user-images.githubusercontent.com/2446179/223707486-ed8eb5ab-0616-4601-8557-12050df8ccf7.png"/>
@@ -13,12 +13,8 @@
    <img src="https://img.shields.io/twitter/follow/Ingo_zk?style=social&logo=twitter" alt="Follow us on Twitter">
  </a>
  <img src="https://img.shields.io/badge/Machines%20running%20ICICLE-544-lightblue" alt="Machines running ICICLE">
-  <a href="https://github.com/ingonyama-zk/icicle/releases">
-    <img src="https://img.shields.io/github/v/release/ingonyama-zk/icicle" alt="GitHub Release">
-  </a>
 </p>

-
 ## Background

 Zero Knowledge Proofs (ZKPs) are considered one of the greatest achievements of modern cryptography. Accordingly, ZKPs are expected to disrupt a number of industries and will usher in an era of trustless and privacy preserving services and infrastructure.
--- a/docs/docs/icicle/overview.md
+++ b/docs/docs/icicle/overview.md
@@ -1,8 +1,8 @@
 # What is ICICLE?

-[![GitHub Release](https://img.shields.io/github/v/release/ingonyama-zk/icicle)](https://github.com/ingonyama-zk/icicle/releases)
+[![Static Badge](https://img.shields.io/badge/Latest-v1.4.0-8a2be2)](https://github.com/ingonyama-zk/icicle/releases)

-![Static Badge](https://img.shields.io/badge/Machines%20running%20ICICLE-544-blue)
+![Static Badge](https://img.shields.io/badge/Machines%20running%20ICICLE-544-lightblue)



--- a/docs/docs/icicle/primitives/overview.md
+++ b/docs/docs/icicle/primitives/overview.md
@@ -6,6 +6,5 @@ This section of the documentation is dedicated to the ICICLE primitives, we will
 ## Supported primitives


- [MSM](./msm.md)
- [NTT](./ntt.md)
+- [MSM](./msm)
 - [Poseidon Hash](./poseidon.md)
--- a/docs/docs/icicle/rust-bindings/multi-gpu.md
+++ b/docs/docs/icicle/rust-bindings/multi-gpu.md
@@ -4,54 +4,6 @@ To learn more about the theory of Multi GPU programming refer to [this part](../

 Here we will cover the core multi GPU apis and a [example](#a-multi-gpu-example)

-
-## A Multi GPU example
-
-In this example we will display how you can
-
-1. Fetch the number of devices installed on a machine
-2. For every GPU launch a thread and set an active device per thread.
-3. Execute a MSM on each GPU
-
-
-
-```rust
-
-...
-
-let device_count = get_device_count().unwrap();
-
-(0..device_count)
-        .into_par_iter()
-        .for_each(move |device_id| {
-          set_device(device_id).unwrap();
-
-          // you can allocate points and scalars_d here
-
-          let mut cfg = MSMConfig::default_for_device(device_id);
-          cfg.ctx.stream = &stream;
-          cfg.is_async = true;
-          cfg.are_scalars_montgomery_form = true;
-          msm(&scalars_d, &HostOrDeviceSlice::on_host(points), &cfg, &mut msm_results).unwrap();
-
-          // collect and process results
-        })
-
-...
-```
-
-
-We use `get_device_count` to fetch the number of connected devices, device IDs will be `0, 1, 2, ..., device_count - 1`
-
-[`into_par_iter`](https://docs.rs/rayon/latest/rayon/iter/trait.IntoParallelIterator.html#tymethod.into_par_iter) is a parallel iterator, you should expect it to launch a thread for every iteration.
-
-We then call `set_device(device_id).unwrap();` it should set the context of that thread to the selected `device_id`.
-
-Any data you now allocate from the context of this thread will be linked to the `device_id`. We create our `MSMConfig` with the selected device ID `let mut cfg = MSMConfig::default_for_device(device_id);`, behind the scene this will create for us a `DeviceContext` configured for that specific GPU. 
-
-We finally call our `msm` method.
-
-
 ## Device management API

 To streamline device management we offer as part of `icicle-cuda-runtime` package methods for dealing with devices.
@@ -200,3 +152,50 @@ let device_id: i32 = 0; // Example device ID
 check_device(device_id);
 // Ensures that the current context is correctly set for the specified device ID.
 ```
+
+
+## A Multi GPU example
+
+In this example we will display how you can
+
+1. Fetch the number of devices installed on a machine
+2. For every GPU launch a thread and set a active device per thread.
+3. Execute a MSM on each GPU
+
+
+
+```rust
+
+...
+
+let device_count = get_device_count().unwrap();
+
+(0..device_count)
+        .into_par_iter()
+        .for_each(move |device_id| {
+          set_device(device_id).unwrap();
+
+          // you can allocate points and scalars_d here
+
+          let mut cfg = MSMConfig::default_for_device(device_id);
+          cfg.ctx.stream = &stream;
+          cfg.is_async = true;
+          cfg.are_scalars_montgomery_form = true;
+          msm(&scalars_d, &HostOrDeviceSlice::on_host(points), &cfg, &mut msm_results).unwrap();
+
+          // collect and process results
+        })
+
+...
+```
+
+
+We use `get_device_count` to fetch the number of connected devices, device IDs will be `0...device_count-1`
+
+[`into_par_iter`](https://docs.rs/rayon/latest/rayon/iter/trait.IntoParallelIterator.html#tymethod.into_par_iter) is a parallel iterator, you should expect it to launch a thread for every iteration.
+
+We then call `set_device(device_id).unwrap();` it should set the context of that thread to the selected `device_id`.
+
+Any data you now allocate from the context of this thread will be linked to the `device_id`. We create our `MSMConfig` with the selected device ID `let mut cfg = MSMConfig::default_for_device(device_id);`, behind the scene this will create for us a `DeviceContext` configured for that specific GPU. 
+
+We finally call our `msm` method.
--- a/docs/docs/icicle/rust-bindings/vec-ops.md
+++ b/docs/docs/icicle/rust-bindings/vec-ops.md
@@ -1,159 +0,0 @@
-# Vector Operations API
-
-Our vector operations API which is part of `icicle-cuda-runtime` package, includes fundamental methods for addition, subtraction, and multiplication of vectors, with support for both host and device memory. 
-
-
-## Supported curves
-
-Vector operations are supported on the following curves:
-
-`bls12-377`, `bls12-381`, `bn-254`, `bw6-761`, `grumpkin`
-
-## Examples
-
-### Addition of Scalars
-
-```rust
-use icicle_bn254::curve::{ScalarCfg, ScalarField};
-use icicle_core::vec_ops::{add_scalars};
-
-let test_size = 1 << 18;
-
-let a: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
-let b: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
-let mut result: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
-
-let cfg = VecOpsConfig::default();
-add_scalars(&a, &b, &mut result, &cfg).unwrap();
-```
-
-### Subtraction of Scalars
-
-```rust
-use icicle_bn254::curve::{ScalarCfg, ScalarField};
-use icicle_core::vec_ops::{sub_scalars};
-
-let test_size = 1 << 18;
-
-let a: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
-let b: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
-let mut result: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
-
-let cfg = VecOpsConfig::default();
-sub_scalars(&a, &b, &mut result, &cfg).unwrap();
-```
-
-### Multiplication of Scalars
-
-```rust
-use icicle_bn254::curve::{ScalarCfg, ScalarField};
-use icicle_core::vec_ops::{mul_scalars};
-
-let test_size = 1 << 18;
-
-let a: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(F::Config::generate_random(test_size));
-let ones: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(vec![F::one(); test_size]);
-let mut result: HostOrDeviceSlice<'_, ScalarField> = HostOrDeviceSlice::on_host(vec![F::zero(); test_size]);
-
-let cfg = VecOpsConfig::default();
-mul_scalars(&a, &ones, &mut result, &cfg).unwrap();
-```
-
-
-## Vector Operations Configuration
-
-The `VecOpsConfig` struct encapsulates the settings for vector operations, including device context and operation modes.
-
-### `VecOpsConfig`
-
-Defines configuration parameters for vector operations.
-
-```rust
-pub struct VecOpsConfig<'a> {
-    pub ctx: DeviceContext<'a>,
-    is_a_on_device: bool,
-    is_b_on_device: bool,
-    is_result_on_device: bool,
-    is_result_montgomery_form: bool,
-    pub is_async: bool,
-}
-```
-
-#### Fields
-
- **`ctx: DeviceContext<'a>`**: Specifies the device context for the operation, including the device ID and memory pool.
- **`is_a_on_device`**: Indicates if the first operand vector resides in device memory.
- **`is_b_on_device`**: Indicates if the second operand vector resides in device memory.
- **`is_result_on_device`**: Specifies if the result vector should be stored in device memory.
- **`is_result_montgomery_form`**: Determines if the result should be in Montgomery form.
- **`is_async`**: Enables asynchronous operation. If `true`, operations are non-blocking; otherwise, they block the current thread.
-
-### Default Configuration
-
-`VecOpsConfig` can be initialized with default settings tailored for a specific device:
-
-```
-let cfg = VecOpsConfig::default();
-```
-
-These are the default settings.
-
-```rust
-impl<'a> Default for VecOpsConfig<'a> {
-    fn default() -> Self {
-        Self::default_for_device(DEFAULT_DEVICE_ID)
-    }
-}
-
-impl<'a> VecOpsConfig<'a> {
-    pub fn default_for_device(device_id: usize) -> Self {
-        VecOpsConfig {
-            ctx: DeviceContext::default_for_device(device_id),
-            is_a_on_device: false,
-            is_b_on_device: false,
-            is_result_on_device: false,
-            is_result_montgomery_form: false,
-            is_async: false,
-        }
-    }
-}
-```
-
-## Vector Operations
-
-Vector operations are implemented through the `VecOps` trait, these traits are implemented for all [supported curves](#supported-curves) providing methods for addition, subtraction, and multiplication of vectors.
-
-### `VecOps` Trait
-
-```rust
-pub trait VecOps<F> {
-    fn add(
-        a: &HostOrDeviceSlice<F>,
-        b: &HostOrDeviceSlice<F>,
-        result: &mut HostOrDeviceSlice<F>,
-        cfg: &VecOpsConfig,
-    ) -> IcicleResult<()>;
-
-    fn sub(
-        a: &HostOrDeviceSlice<F>,
-        b: &HostOrDeviceSlice<F>,
-        result: &mut HostOrDeviceSlice<F>,
-        cfg: &VecOpsConfig,
-    ) -> IcicleResult<()>;
-
-    fn mul(
-        a: &HostOrDeviceSlice<F>,
-        b: &HostOrDeviceSlice<F>,
-        result: &mut HostOrDeviceSlice<F>,
-        cfg: &VecOpsConfig,
-    ) -> IcicleResult<()>;
-}
-```
-
-#### Methods
-
-All operations are element-wise operations, and the results placed into the `result` param. These operations are not in place.
-
- **`add`**: Computes the element-wise sum of two vectors.
- **`sub`**: Computes the element-wise difference between two vectors.
- **`mul`**: Performs element-wise multiplication of two vectors.
--- a/docs/docusaurus.config.js
+++ b/docs/docusaurus.config.js
@@ -9,7 +9,7 @@ const config = {
  title: 'Ingonyama Developer Documentation',
  tagline: 'Ingonyama is a next-generation semiconductor company, focusing on Zero-Knowledge Proof hardware acceleration. We build accelerators for advanced cryptography, unlocking real-time applications.',
  url: 'https://dev.ingonyama.com/',
-  baseUrl: '/',
+  baseUrl: '/icicle/',
  onBrokenLinks: 'throw',
  onBrokenMarkdownLinks: 'warn',
  favicon: 'img/logo.png',
@@ -29,13 +29,13 @@ const config = {
          remarkPlugins: [math, require('mdx-mermaid')],
          rehypePlugins: [katex],
          sidebarPath: require.resolve('./sidebars.js'),
-          editUrl: 'https://github.com/ingonyama-zk/icicle/tree/main',
+          editUrl: 'https://github.com/ingonyama-zk/developer-docs/tree/main',
        },
        blog: {
          remarkPlugins: [math, require('mdx-mermaid')],
          rehypePlugins: [katex],
          showReadingTime: true,
-          editUrl: 'https://github.com/ingonyama-zk/icicle/tree/main',
+          editUrl: 'https://github.com/ingonyama-zk/developer-docs/tree/main',
        },
        pages: {},
        theme: {
--- a/docs/sidebars.js
+++ b/docs/sidebars.js
@@ -42,11 +42,6 @@ module.exports = {
              type: "doc",
              label: "Multi GPU Support",
              id: "icicle/rust-bindings/multi-gpu",
-            },
-            {
-              type: "doc",
-              label: "Vector operations",
-              id: "icicle/rust-bindings/vec-ops",
            }
          ]
        },
--- a/examples/c++/pedersen-commitment/CMakeLists.txt
+++ b/examples/c++/pedersen-commitment/CMakeLists.txt
@@ -1,25 +0,0 @@
-cmake_minimum_required(VERSION 3.18)
-set(CMAKE_CXX_STANDARD 17)
-set(CMAKE_CUDA_STANDARD 17)
-set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
-set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
-if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
-    set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
-else()
-    set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
-endif ()
-project(icicle LANGUAGES CUDA CXX)
-
-set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
-set(CMAKE_CUDA_FLAGS_RELEASE "")
-set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
-# change the path to your Icicle location
-include_directories("../../../icicle")
-add_executable(
-  example
-  example.cu
-)
-find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
-target_link_libraries(example ${NVML_LIBRARY})
-set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
-
--- a/examples/c++/pedersen-commitment/README.md
+++ b/examples/c++/pedersen-commitment/README.md
@@ -1,33 +0,0 @@
-# ICICLE example: Pedersen Commitment
-
-## Best-Practices
-
-We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
-
-## Key-Takeaway
-
-A Pedersen Commitment is a cryptographic primitive to commit to a value or a vector of values while keeping it hidden, yet enabling the committer to reveal the value later. It provides both hiding (the commitment does not reveal any information about the value) and binding properties (once a value is committed, it cannot be changed without detection).
-
-Pedersen commitment is based on Multi-Scalar Multiplication [MSM](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
-`ICICLE` provides CUDA C++ support for [MSM](https://dev.ingonyama.com/icicle/primitives/msm). 
-An example of MSM is [here](../msm/README.md).
-
-## Running the example
-
- `cd` to your example directory
- compile with  `./compile.sh`
- run with `./run.sh`
-
-## Concise Explanation
-
-We recommend this simple [explanation](https://www.rareskills.io/post/pedersen-commitment).
-
-The original paper: T. P. Pedersen, "Non-Interactive and Information-Theoretic Secure Verifiable Secret Sharing," in Advances in Cryptology — CRYPTO ’91, Lecture Notes in Computer Science, vol 576. Springer, Berlin, Heidelberg.
-
-## What's in the example
-
-1. Define the curve and the size of commitment vector
-2. Use public random seed to transparently generate points on the elliptic curve without known discrete logarithm
-3. Generate (random) commitment vector and salt (a.k.a blinding factor)
-4. Configure and execute MSM using on-host data
-5. Output commitment as elliptic point
--- a/examples/c++/pedersen-commitment/compile.sh
+++ b/examples/c++/pedersen-commitment/compile.sh
@@ -1,9 +0,0 @@
-#!/bin/bash
-
-# Exit immediately on error
-set -e
-
-rm -rf build
-mkdir -p build
-cmake -S . -B build
-cmake --build build
--- a/examples/c++/pedersen-commitment/example.cu
+++ b/examples/c++/pedersen-commitment/example.cu
@@ -1,159 +0,0 @@
-#include <iostream>
-#include <iomanip>
-#include <chrono>
-#include <cassert>
-#include <nvml.h>
-
-#define CURVE_ID BN254
-#include "appUtils/msm/msm.cu"
-using namespace curve_config;
-
-typedef point_field_t T;
-
-// modular power
-T modPow(T base, T exp) {
-  T r = T::one();
-  T b = base;
-  T e = exp;
-  while (e != T::zero()) {
-      // If exp is odd, multiply the base with result
-      if (T::is_odd(e)) {
-          r = r * b;
-      }
-      // Now exp must be even, divide it by 2
-      e =T::div2(e);
-      b = b * b;
-  }
-  return r;
-}
-
-// Check if y2 is a quadratic residue using Euler's Criterion
-bool quadratic_residue(T y2) {
-  return modPow(y2, T::div2(T::zero() - T::one())) == T::one();
-}
-
-// modular square root adapted from:
-// https://github.com/ShahjalalShohag/code-library/blob/main/Number%20Theory/Tonelli%20Shanks%20Algorithm.cpp
-bool mySQRT(T a, T *result) {
-  if (a == T::zero()) {
-    *result = T::zero();
-    return true;
-  }
-  if (modPow(a, T::div2(T::zero() - T::one())) != T::one() ) {
-    return false; // solution does not exist
-  }
-  // TODO: consider special cases
-  // if (p % 4 == 3) return power(a, (p + 1) / 4, p); 
-  T s = T::zero() - T::one(); // p - 1, 
-  T n = T::one() + T::one(); //2;
-  T r = T::zero(); 
-  T m;
-  while (T::is_even(s)) {
-    r = r + T::one();
-    s = T::div2(s); //s /= 2;
-  }
-  // find a non-square mod p
-  while (modPow(n, T::div2((T::zero() - T::one())) ) != T::zero() - T::one()) {
-    n = n + T::one();
-  }
-  T x = modPow(a, T::div2(s + T::one()));
-  T b = modPow(a, s);
-  T g = modPow(n, s);
-  for (;; r = m) {
-    T t = b;
-    for (m = T::zero(); T::lt(m,r) /* m < r*/ && t != T::one(); m = m + T::one()) t =  t * t;
-    if (m == T::zero() ) {
-      *result = x;
-      return true;
-    }
-    T gs = modPow(g, modPow(T::one() + T::one(), r - m - T::one()) );
-    g = gs * gs ;
-    x = x * gs ;
-    b =  b * g ;
-  }
-}
-
-void point_near_x(T x, affine_t *point) {
-  const T wb = T { weierstrass_b };
-  T y2;
-  while (y2 = x*x*x + wb, quadratic_residue(y2) == false)
-  {
-    x = x + T::one();
-  };
-  T y;
-  bool found = mySQRT(y2, &y);
-  assert(y*y == y2);
-  point->x = x;
-  point->y = y;
-}
-
-static int seed = 0;
-static HOST_INLINE T rand_host_seed()
-  {
-    std::mt19937_64 generator(seed++);
-    std::uniform_int_distribution<unsigned> distribution;
-    
-    T value;
-    for (unsigned i = 0; i <  T::TLC-1 ; i++)
-    // TODO: use the full range of limbs: for (unsigned i = 0; i <  T::TLC ; i++)
-      value.limbs_storage.limbs[i] = distribution(generator);
-    // while (lt(Field{get_modulus()}, value))
-    //   value = value - Field{get_modulus()};
-    return value;
-  }
-
-using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
-#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
-#define END_TIMER(timer, msg) printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
-
-int main(int argc, char** argv)
-{
-  const unsigned N = pow(2, 10);
-  std::cout << "Commitment vector size: " << N << "+1 for salt (a.k.a blinding factor)" << std::endl;
-  T* xs = new T[N+1];
-  
-  std::cout << "Generating random points transparently using publicly chosen seed" << std::endl;
-  std::cout << "Public seed prevents committer from knowing the discrete logs of points used in the commitment" << std::endl;
-  seed = 1234;
-  std::cout << "Using seed: " << seed << std::endl;
-  std::cout << "Generating random field values" << std::endl;
-  START_TIMER(gen);
-  
-  for (unsigned i = 0; i < N; i++) {
-    xs[i] = rand_host_seed();
-  }
-  END_TIMER(gen, "Time to generate field values");
-  std::cout << "xs[0]: " << xs[0]  << std::endl;
-  std::cout << "xs[1]: " << xs[1]  << std::endl;
-  
-  // affine_t points[N];
-  affine_t* points = new affine_t[N+1];
-  std::cout << "Generating point about random field values" << std::endl;
-  START_TIMER(points);
-  for (unsigned i = 0; i < N+1; i++) {
-    point_near_x(xs[i], &points[i]);
-  }
-  END_TIMER(points, "Time to generate points");
-  
-  std::cout << "Generating commitment vector" << std::endl;
-  projective_t result;
-  scalar_t* scalars = new scalar_t[N+1];
-  scalar_t::RandHostMany(scalars, N);
-
-  std::cout << "Generating salt" << std::endl;
-  scalars[N] = scalar_t::rand_host();
-
-  std::cout << "Executing MSM" << std::endl;
-  auto config = msm::DefaultMSMConfig<scalar_t>();
-  START_TIMER(msm);
-  msm::MSM<scalar_t, affine_t, projective_t>(scalars, points, N+1, config, &result);
-  END_TIMER(msm, "Time to execute MSM");
-
-  std::cout << "Computed commitment: " << result << std::endl;
-
-  std::cout << "Cleaning up..." << std::endl;
-  delete[] xs;
-  delete[] scalars;
-  delete[] points;
-  return 0;
-}
--- a/examples/c++/pedersen-commitment/run.sh
+++ b/examples/c++/pedersen-commitment/run.sh
@@ -1,2 +0,0 @@
-#!/bin/bash
-./build/example
--- a/wrappers/golang/build.ps1
+++ b/wrappers/golang/build.ps1
@@ -1,23 +0,0 @@
-$G2_DEFINED = "OFF"
-
-if ($args.Count -gt 1) {
-  $G2_DEFINED = "ON"
-}
-
-$BUILD_DIR = (Get-Location).Path + "\..\icicle\build"
-$SUPPORTED_CURVES = @("bn254", "bls12_377", "bls12_381", "bw6_761")
-
-if ($args[0] -eq "all") {
-  $BUILD_CURVES = $SUPPORTED_CURVES
-} else {
-  $BUILD_CURVES = @($args[0])
-}
-
-Set-Location "../../icicle"
-
-New-Item -ItemType Directory -Path "build" -Force
-
-foreach ($CURVE in $BUILD_CURVES) {
-  cmake -DCURVE:STRING=$CURVE -DG2_DEFINED:STRING=$G2_DEFINED -DCMAKE_BUILD_TYPE:STRING=Release -S . -B build
-  cmake --build build
-}
--- a/wrappers/golang/core/error.go
+++ b/wrappers/golang/core/error.go
@@ -1,7 +1,7 @@
 package core

 import (
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
+	"github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 )

 type IcicleErrorCode int
@@ -16,13 +16,13 @@ const (

 type IcicleError struct {
 	IcicleErrorCode IcicleErrorCode
-	CudaErrorCode   cr.CudaError
+	CudaErrorCode   cuda_runtime.CudaError
 	reason          string
 }

-func FromCudaError(error cr.CudaError) (err IcicleError) {
+func FromCudaError(error cuda_runtime.CudaError) (err IcicleError) {
 	switch error {
-	case cr.CudaSuccess:
+	case cuda_runtime.CudaSuccess:
 		err.IcicleErrorCode = IcicleSuccess
 	default:
 		err.IcicleErrorCode = InternalCudaError
@@ -38,6 +38,6 @@ func FromCodeAndReason(code IcicleErrorCode, reason string) IcicleError {
 	return IcicleError{
 		IcicleErrorCode: code,
 		reason:          reason,
-		CudaErrorCode:   cr.CudaErrorUnknown,
+		CudaErrorCode:   cuda_runtime.CudaErrorUnknown,
 	}
 }
--- a/wrappers/golang/core/msm.go
+++ b/wrappers/golang/core/msm.go
@@ -3,12 +3,12 @@ package core
 import (
 	"fmt"

-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
+	"github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 )

 type MSMConfig struct {
 	/// Details related to the device such as its id and stream.
-	Ctx cr.DeviceContext
+	Ctx cuda_runtime.DeviceContext

 	pointsSize int32

@@ -55,8 +55,13 @@ type MSMConfig struct {
 	IsAsync bool
 }

+// type MSM interface {
+// 	Msm(scalars, points *cuda_runtime.HostOrDeviceSlice, cfg *MSMConfig, results *cuda_runtime.HostOrDeviceSlice) cuda_runtime.CudaError
+// 	GetDefaultMSMConfig() MSMConfig
+// }
+
 func GetDefaultMSMConfig() MSMConfig {
-	ctx, _ := cr.GetDefaultDeviceContext()
+	ctx, _ := cuda_runtime.GetDefaultDeviceContext()
 	return MSMConfig{
 		ctx,   // Ctx
 		0,     // pointsSize
--- a/wrappers/golang/core/msm_test.go
+++ b/wrappers/golang/core/msm_test.go
@@ -4,13 +4,13 @@ import (
 	"testing"

 	"github.com/ingonyama-zk/icicle/wrappers/golang/core/internal"
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
+	"github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"

 	"github.com/stretchr/testify/assert"
 )

 func TestMSMDefaultConfig(t *testing.T) {
-	ctx, _ := cr.GetDefaultDeviceContext()
+	ctx, _ := cuda_runtime.GetDefaultDeviceContext()
 	expected := MSMConfig{
 		ctx,   // Ctx
 		0,     // pointsSize
--- a/wrappers/golang/core/ntt.go
+++ b/wrappers/golang/core/ntt.go
@@ -3,7 +3,7 @@ package core
 import (
 	"fmt"

-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
+	"github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 )

 type NTTDir int8
@@ -26,7 +26,7 @@ const (

 type NTTConfig[T any] struct {
 	/// Details related to the device such as its id and stream id. See [DeviceContext](@ref device_context::DeviceContext).
-	Ctx cr.DeviceContext
+	Ctx cuda_runtime.DeviceContext
 	/// Coset generator. Used to perform coset (i)NTTs. Default value: `S::one()` (corresponding to no coset being used).
 	CosetGen T
 	/// The number of NTTs to compute. Default value: 1.
@@ -41,7 +41,7 @@ type NTTConfig[T any] struct {
 }

 func GetDefaultNTTConfig[T any](cosetGen T) NTTConfig[T] {
-	ctx, _ := cr.GetDefaultDeviceContext()
+	ctx, _ := cuda_runtime.GetDefaultDeviceContext()
 	return NTTConfig[T]{
 		ctx,      // Ctx
 		cosetGen, // CosetGen
--- a/wrappers/golang/core/ntt_test.go
+++ b/wrappers/golang/core/ntt_test.go
@@ -5,7 +5,7 @@ import (
 	"testing"

 	"github.com/ingonyama-zk/icicle/wrappers/golang/core/internal"
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
+	"github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 	"github.com/stretchr/testify/assert"
 )

@@ -14,7 +14,7 @@ func TestNTTDefaultConfig(t *testing.T) {
 	cosetGenField.One()
 	var cosetGen [1]uint32
 	copy(cosetGen[:], cosetGenField.GetLimbs())
-	ctx, _ := cr.GetDefaultDeviceContext()
+	ctx, _ := cuda_runtime.GetDefaultDeviceContext()
 	expected := NTTConfig[[1]uint32]{
 		ctx,      // Ctx
 		cosetGen, // CosetGen
--- a/wrappers/golang/core/slice.go
+++ b/wrappers/golang/core/slice.go
@@ -3,7 +3,7 @@ package core
 import (
 	"unsafe"

-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
+	"github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 )

 type HostOrDeviceSlice interface {
@@ -45,25 +45,25 @@ func (d DeviceSlice) IsOnDevice() bool {

 // TODO: change signature to be Malloc(element, numElements)
 // calc size internally
-func (d *DeviceSlice) Malloc(size, sizeOfElement int) (DeviceSlice, cr.CudaError) {
-	dp, err := cr.Malloc(uint(size))
+func (d *DeviceSlice) Malloc(size, sizeOfElement int) (DeviceSlice, cuda_runtime.CudaError) {
+	dp, err := cuda_runtime.Malloc(uint(size))
 	d.inner = dp
 	d.capacity = size
 	d.length = size / sizeOfElement
 	return *d, err
 }

-func (d *DeviceSlice) MallocAsync(size, sizeOfElement int, stream cr.CudaStream) (DeviceSlice, cr.CudaError) {
-	dp, err := cr.MallocAsync(uint(size), stream)
+func (d *DeviceSlice) MallocAsync(size, sizeOfElement int, stream cuda_runtime.CudaStream) (DeviceSlice, cuda_runtime.CudaError) {
+	dp, err := cuda_runtime.MallocAsync(uint(size), stream)
 	d.inner = dp
 	d.capacity = size
 	d.length = size / sizeOfElement
 	return *d, err
 }

-func (d *DeviceSlice) Free() cr.CudaError {
-	err := cr.Free(d.inner)
-	if err == cr.CudaSuccess {
+func (d *DeviceSlice) Free() cuda_runtime.CudaError {
+	err := cuda_runtime.Free(d.inner)
+	if err == cuda_runtime.CudaSuccess {
 		d.length, d.capacity = 0, 0
 		d.inner = nil
 	}
@@ -123,12 +123,12 @@ func (h HostSlice[T]) CopyToDevice(dst *DeviceSlice, shouldAllocate bool) *Devic

 	// hostSrc := unsafe.Pointer(h.AsPointer())
 	hostSrc := unsafe.Pointer(&h[0])
-	cr.CopyToDevice(dst.inner, hostSrc, uint(size))
+	cuda_runtime.CopyToDevice(dst.inner, hostSrc, uint(size))
 	dst.length = h.Len()
 	return dst
 }

-func (h HostSlice[T]) CopyToDeviceAsync(dst *DeviceSlice, stream cr.CudaStream, shouldAllocate bool) *DeviceSlice {
+func (h HostSlice[T]) CopyToDeviceAsync(dst *DeviceSlice, stream cuda_runtime.CudaStream, shouldAllocate bool) *DeviceSlice {
 	size := h.Len() * h.SizeOfElement()
 	if shouldAllocate {
 		dst.MallocAsync(size, h.SizeOfElement(), stream)
@@ -138,7 +138,7 @@ func (h HostSlice[T]) CopyToDeviceAsync(dst *DeviceSlice, stream cr.CudaStream,
 	}

 	hostSrc := unsafe.Pointer(&h[0])
-	cr.CopyToDeviceAsync(dst.inner, hostSrc, uint(size), stream)
+	cuda_runtime.CopyToDeviceAsync(dst.inner, hostSrc, uint(size), stream)
 	dst.length = h.Len()
 	return dst
 }
@@ -148,13 +148,13 @@ func (h HostSlice[T]) CopyFromDevice(src *DeviceSlice) {
 		panic("destination and source slices have different lengths")
 	}
 	bytesSize := src.Len() * h.SizeOfElement()
-	cr.CopyFromDevice(unsafe.Pointer(&h[0]), src.inner, uint(bytesSize))
+	cuda_runtime.CopyFromDevice(unsafe.Pointer(&h[0]), src.inner, uint(bytesSize))
 }

-func (h HostSlice[T]) CopyFromDeviceAsync(src *DeviceSlice, stream cr.Stream) {
+func (h HostSlice[T]) CopyFromDeviceAsync(src *DeviceSlice, stream cuda_runtime.Stream) {
 	if h.Len() != src.Len() {
 		panic("destination and source slices have different lengths")
 	}
 	bytesSize := src.Len() * h.SizeOfElement()
-	cr.CopyFromDeviceAsync(unsafe.Pointer(&h[0]), src.inner, uint(bytesSize), stream)
+	cuda_runtime.CopyFromDeviceAsync(unsafe.Pointer(&h[0]), src.inner, uint(bytesSize), stream)
 }
--- a/wrappers/golang/core/vec_ops.go
+++ b/wrappers/golang/core/vec_ops.go
@@ -1,74 +0,0 @@
-package core
-
-import (
-	"fmt"
-
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
-)
-
-type VecOps int
-
-const (
-	Sub VecOps = iota
-	Add
-	Mul
-)
-
-type VecOpsConfig struct {
-	/*Details related to the device such as its id and stream. */
-	Ctx cr.DeviceContext
-	/* True if `a` is on device and false if it is not. Default value: false. */
-	isAOnDevice bool
-	/* True if `b` is on device and false if it is not. Default value: false. */
-	isBOnDevice bool
-	/* If true, output is preserved on device, otherwise on host. Default value: false. */
-	isResultOnDevice bool
-	/* True if `result` vector should be in Montgomery form and false otherwise. Default value: false. */
-	IsResultMontgomeryForm bool
-	/* Whether to run the vector operations asynchronously. If set to `true`, the function will be
-	*  non-blocking and you'll need to synchronize it explicitly by calling
-	*  `SynchronizeStream`. If set to false, the function will block the current CPU thread. */
-	IsAsync bool
-}
-
-/**
- * A function that returns the default value of [VecOpsConfig](@ref VecOpsConfig).
- * @return Default value of [VecOpsConfig](@ref VecOpsConfig).
- */
-func DefaultVecOpsConfig() VecOpsConfig {
-	ctx, _ := cr.GetDefaultDeviceContext()
-	config := VecOpsConfig{
-		ctx,   // ctx
-		false, // isAOnDevice
-		false, // isBOnDevice
-		false, // isResultOnDevice
-		false, // IsResultMontgomeryForm
-		false, // IsAsync
-	}
-
-	return config
-}
-
-func VecOpCheck(a, b, out HostOrDeviceSlice, cfg *VecOpsConfig) {
-	aLen, bLen, outLen := a.Len(), b.Len(), out.Len()
-	if aLen != bLen {
-		errorString := fmt.Sprintf(
-			"a and b vector lengths %d; %d are not equal",
-			aLen,
-			bLen,
-		)
-		panic(errorString)
-	}
-	if aLen != outLen {
-		errorString := fmt.Sprintf(
-			"a and out vector lengths %d; %d are not equal",
-			aLen,
-			outLen,
-		)
-		panic(errorString)
-	}
-
-	cfg.isAOnDevice = a.IsOnDevice()
-	cfg.isBOnDevice = b.IsOnDevice()
-	cfg.isResultOnDevice = out.IsOnDevice()
-}
--- a/wrappers/golang/core/vec_ops_test.go
+++ b/wrappers/golang/core/vec_ops_test.go
@@ -1,23 +0,0 @@
-package core
-
-import (
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
-	"github.com/stretchr/testify/assert"
-	"testing"
-)
-
-func TestVecOpsDefaultConfig(t *testing.T) {
-	ctx, _ := cr.GetDefaultDeviceContext()
-	expected := VecOpsConfig{
-		ctx,   // Ctx
-		false, // isAOnDevice
-		false, // isBOnDevice
-		false, // isResultOnDevice
-		false, // IsResultMontgomeryForm
-		false, // IsAsync
-	}
-
-	actual := DefaultVecOpsConfig()
-
-	assert.Equal(t, expected, actual)
-}
--- a/wrappers/golang/curves/bls12377/include/vec_ops.h
+++ b/wrappers/golang/curves/bls12377/include/vec_ops.h
@@ -1,39 +0,0 @@
-#include <cuda_runtime.h>
-#include "../../include/types.h"
-
-#ifndef _BLS12_377_VEC_OPS_H
-#define _BLS12_377_VEC_OPS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-cudaError_t bls12_377MulCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bls12_377AddCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bls12_377SubCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
--- a/wrappers/golang/curves/bls12377/scalar_field.go
+++ b/wrappers/golang/curves/bls12377/scalar_field.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"encoding/binary"
 	"fmt"
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
+	core "github.com/ingonyama-zk/icicle/wrappers/golang/core"
 	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 	"unsafe"
 )
@@ -85,7 +85,12 @@ func (f ScalarField) ToBytesLittleEndian() []byte {
 }

 func GenerateScalars(size int) core.HostSlice[ScalarField] {
-	scalarSlice := make(core.HostSlice[ScalarField], size)
+	scalars := make([]ScalarField, size)
+	for i := range scalars {
+		scalars[i] = ScalarField{}
+	}
+
+	scalarSlice := core.HostSliceFromElements[ScalarField](scalars)

 	cScalars := (*C.scalar_t)(unsafe.Pointer(&scalarSlice[0]))
 	cSize := (C.int)(size)
--- a/wrappers/golang/curves/bls12377/vec_ops.go
+++ b/wrappers/golang/curves/bls12377/vec_ops.go
@@ -1,49 +0,0 @@
-package bls12377
-
-// #cgo CFLAGS: -I./include/
-// #include "vec_ops.h"
-import "C"
-
-import (
-	"unsafe"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
-)
-
-func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret cr.CudaError) {
-	core.VecOpCheck(a, b, out, &config)
-	var cA, cB, cOut *C.scalar_t
-
-	if a.IsOnDevice() {
-		cA = (*C.scalar_t)(a.(core.DeviceSlice).AsPointer())
-	} else {
-		cA = (*C.scalar_t)(unsafe.Pointer(&a.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if b.IsOnDevice() {
-		cB = (*C.scalar_t)(b.(core.DeviceSlice).AsPointer())
-	} else {
-		cB = (*C.scalar_t)(unsafe.Pointer(&b.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if out.IsOnDevice() {
-		cOut = (*C.scalar_t)(out.(core.DeviceSlice).AsPointer())
-	} else {
-		cOut = (*C.scalar_t)(unsafe.Pointer(&out.(core.HostSlice[ScalarField])[0]))
-	}
-
-	cConfig := (*C.VecOpsConfig)(unsafe.Pointer(&config))
-	cSize := (C.int)(a.Len())
-
-	switch op {
-	case core.Sub:
-		ret = (cr.CudaError)(C.bls12_377SubCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Add:
-		ret = (cr.CudaError)(C.bls12_377AddCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Mul:
-		ret = (cr.CudaError)(C.bls12_377MulCuda(cA, cB, cSize, cConfig, cOut))
-	}
-
-	return ret
-}
--- a/wrappers/golang/curves/bls12377/vec_ops_test.go
+++ b/wrappers/golang/curves/bls12377/vec_ops_test.go
@@ -1,33 +0,0 @@
-package bls12377
-
-import (
-	"testing"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestVecOps(t *testing.T) {
-	testSize := 1 << 14
-
-	a := GenerateScalars(testSize)
-	b := GenerateScalars(testSize)
-	var scalar ScalarField
-	scalar.One()
-	ones := core.HostSliceWithValue(scalar, testSize)
-
-	out := make(core.HostSlice[ScalarField], testSize)
-	out2 := make(core.HostSlice[ScalarField], testSize)
-	out3 := make(core.HostSlice[ScalarField], testSize)
-
-	cfg := core.DefaultVecOpsConfig()
-
-	VecOp(a, b, out, cfg, core.Add)
-	VecOp(out, b, out2, cfg, core.Sub)
-
-	assert.Equal(t, a, out2)
-
-	VecOp(a, ones, out3, cfg, core.Mul)
-
-	assert.Equal(t, a, out3)
-}
--- a/wrappers/golang/curves/bls12381/include/vec_ops.h
+++ b/wrappers/golang/curves/bls12381/include/vec_ops.h
@@ -1,39 +0,0 @@
-#include <cuda_runtime.h>
-#include "../../include/types.h"
-
-#ifndef _BLS12_381_VEC_OPS_H
-#define _BLS12_381_VEC_OPS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-cudaError_t bls12_381MulCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bls12_381AddCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bls12_381SubCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
--- a/wrappers/golang/curves/bls12381/scalar_field.go
+++ b/wrappers/golang/curves/bls12381/scalar_field.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"encoding/binary"
 	"fmt"
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
+	core "github.com/ingonyama-zk/icicle/wrappers/golang/core"
 	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 	"unsafe"
 )
@@ -85,7 +85,12 @@ func (f ScalarField) ToBytesLittleEndian() []byte {
 }

 func GenerateScalars(size int) core.HostSlice[ScalarField] {
-	scalarSlice := make(core.HostSlice[ScalarField], size)
+	scalars := make([]ScalarField, size)
+	for i := range scalars {
+		scalars[i] = ScalarField{}
+	}
+
+	scalarSlice := core.HostSliceFromElements[ScalarField](scalars)

 	cScalars := (*C.scalar_t)(unsafe.Pointer(&scalarSlice[0]))
 	cSize := (C.int)(size)
--- a/wrappers/golang/curves/bls12381/vec_ops.go
+++ b/wrappers/golang/curves/bls12381/vec_ops.go
@@ -1,49 +0,0 @@
-package bls12381
-
-// #cgo CFLAGS: -I./include/
-// #include "vec_ops.h"
-import "C"
-
-import (
-	"unsafe"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
-)
-
-func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret cr.CudaError) {
-	core.VecOpCheck(a, b, out, &config)
-	var cA, cB, cOut *C.scalar_t
-
-	if a.IsOnDevice() {
-		cA = (*C.scalar_t)(a.(core.DeviceSlice).AsPointer())
-	} else {
-		cA = (*C.scalar_t)(unsafe.Pointer(&a.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if b.IsOnDevice() {
-		cB = (*C.scalar_t)(b.(core.DeviceSlice).AsPointer())
-	} else {
-		cB = (*C.scalar_t)(unsafe.Pointer(&b.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if out.IsOnDevice() {
-		cOut = (*C.scalar_t)(out.(core.DeviceSlice).AsPointer())
-	} else {
-		cOut = (*C.scalar_t)(unsafe.Pointer(&out.(core.HostSlice[ScalarField])[0]))
-	}
-
-	cConfig := (*C.VecOpsConfig)(unsafe.Pointer(&config))
-	cSize := (C.int)(a.Len())
-
-	switch op {
-	case core.Sub:
-		ret = (cr.CudaError)(C.bls12_381SubCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Add:
-		ret = (cr.CudaError)(C.bls12_381AddCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Mul:
-		ret = (cr.CudaError)(C.bls12_381MulCuda(cA, cB, cSize, cConfig, cOut))
-	}
-
-	return ret
-}
--- a/wrappers/golang/curves/bls12381/vec_ops_test.go
+++ b/wrappers/golang/curves/bls12381/vec_ops_test.go
@@ -1,33 +0,0 @@
-package bls12381
-
-import (
-	"testing"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestVecOps(t *testing.T) {
-	testSize := 1 << 14
-
-	a := GenerateScalars(testSize)
-	b := GenerateScalars(testSize)
-	var scalar ScalarField
-	scalar.One()
-	ones := core.HostSliceWithValue(scalar, testSize)
-
-	out := make(core.HostSlice[ScalarField], testSize)
-	out2 := make(core.HostSlice[ScalarField], testSize)
-	out3 := make(core.HostSlice[ScalarField], testSize)
-
-	cfg := core.DefaultVecOpsConfig()
-
-	VecOp(a, b, out, cfg, core.Add)
-	VecOp(out, b, out2, cfg, core.Sub)
-
-	assert.Equal(t, a, out2)
-
-	VecOp(a, ones, out3, cfg, core.Mul)
-
-	assert.Equal(t, a, out3)
-}
--- a/wrappers/golang/curves/bn254/include/ve_mod_mult.h
+++ b/wrappers/golang/curves/bn254/include/ve_mod_mult.h
@@ -0,0 +1,70 @@
+
+// Copyright 2023 Ingonyama
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Code generated by Ingonyama DO NOT EDIT
+
+#include <cuda.h>
+#include <cuda_runtime.h>
+#include <stdbool.h>
+// ve_mod_mult.h
+
+#ifndef _BN254_VEC_MULT_H
+#define _BN254_VEC_MULT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+  cudaStream_t stream;   /**< Stream to use. Default value: 0. */
+  int device_id;         /**< Index of the currently used GPU. Default value: 0. */
+  cudaMemPool_t mempool; /**< Mempool to use. Default value: 0. */
+} DeviceContext;
+
+typedef struct BN254_scalar_t BN254_scalar_t;
+
+int bn254AddCuda(
+  BN254_scalar_t* vec_a,
+  BN254_scalar_t* vec_b,
+  int n,
+  bool is_on_device,
+  DeviceContext ctx,
+  BN254_scalar_t* result
+);
+
+int bn254SubCuda(
+  BN254_scalar_t* vec_a,
+  BN254_scalar_t* vec_b,
+  int n,
+  bool is_on_device,
+  DeviceContext ctx,
+  BN254_scalar_t* result
+);
+
+int bn254MulCuda(
+  BN254_scalar_t* vec_a,
+  BN254_scalar_t* vec_b,
+  int n,
+  bool is_on_device,
+  bool is_montgomery,
+  DeviceContext ctx,
+  BN254_scalar_t* result
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BN254_VEC_MULT_H */
--- a/wrappers/golang/curves/bn254/include/vec_ops.h
+++ b/wrappers/golang/curves/bn254/include/vec_ops.h
@@ -1,39 +0,0 @@
-#include <cuda_runtime.h>
-#include "../../include/types.h"
-
-#ifndef _BN254_VEC_OPS_H
-#define _BN254_VEC_OPS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-cudaError_t bn254MulCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bn254AddCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bn254SubCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
--- a/wrappers/golang/curves/bn254/scalar_field.go
+++ b/wrappers/golang/curves/bn254/scalar_field.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"encoding/binary"
 	"fmt"
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
+	core "github.com/ingonyama-zk/icicle/wrappers/golang/core"
 	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 	"unsafe"
 )
@@ -85,7 +85,12 @@ func (f ScalarField) ToBytesLittleEndian() []byte {
 }

 func GenerateScalars(size int) core.HostSlice[ScalarField] {
-	scalarSlice := make(core.HostSlice[ScalarField], size)
+	scalars := make([]ScalarField, size)
+	for i := range scalars {
+		scalars[i] = ScalarField{}
+	}
+
+	scalarSlice := core.HostSliceFromElements[ScalarField](scalars)

 	cScalars := (*C.scalar_t)(unsafe.Pointer(&scalarSlice[0]))
 	cSize := (C.int)(size)
--- a/wrappers/golang/curves/bn254/vec_ops.go
+++ b/wrappers/golang/curves/bn254/vec_ops.go
@@ -1,49 +0,0 @@
-package bn254
-
-// #cgo CFLAGS: -I./include/
-// #include "vec_ops.h"
-import "C"
-
-import (
-	"unsafe"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
-)
-
-func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret cr.CudaError) {
-	core.VecOpCheck(a, b, out, &config)
-	var cA, cB, cOut *C.scalar_t
-
-	if a.IsOnDevice() {
-		cA = (*C.scalar_t)(a.(core.DeviceSlice).AsPointer())
-	} else {
-		cA = (*C.scalar_t)(unsafe.Pointer(&a.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if b.IsOnDevice() {
-		cB = (*C.scalar_t)(b.(core.DeviceSlice).AsPointer())
-	} else {
-		cB = (*C.scalar_t)(unsafe.Pointer(&b.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if out.IsOnDevice() {
-		cOut = (*C.scalar_t)(out.(core.DeviceSlice).AsPointer())
-	} else {
-		cOut = (*C.scalar_t)(unsafe.Pointer(&out.(core.HostSlice[ScalarField])[0]))
-	}
-
-	cConfig := (*C.VecOpsConfig)(unsafe.Pointer(&config))
-	cSize := (C.int)(a.Len())
-
-	switch op {
-	case core.Sub:
-		ret = (cr.CudaError)(C.bn254SubCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Add:
-		ret = (cr.CudaError)(C.bn254AddCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Mul:
-		ret = (cr.CudaError)(C.bn254MulCuda(cA, cB, cSize, cConfig, cOut))
-	}
-
-	return ret
-}
--- a/wrappers/golang/curves/bn254/vec_ops_test.go
+++ b/wrappers/golang/curves/bn254/vec_ops_test.go
@@ -1,33 +0,0 @@
-package bn254
-
-import (
-	"testing"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestVecOps(t *testing.T) {
-	testSize := 1 << 14
-
-	a := GenerateScalars(testSize)
-	b := GenerateScalars(testSize)
-	var scalar ScalarField
-	scalar.One()
-	ones := core.HostSliceWithValue(scalar, testSize)
-
-	out := make(core.HostSlice[ScalarField], testSize)
-	out2 := make(core.HostSlice[ScalarField], testSize)
-	out3 := make(core.HostSlice[ScalarField], testSize)
-
-	cfg := core.DefaultVecOpsConfig()
-
-	VecOp(a, b, out, cfg, core.Add)
-	VecOp(out, b, out2, cfg, core.Sub)
-
-	assert.Equal(t, a, out2)
-
-	VecOp(a, ones, out3, cfg, core.Mul)
-
-	assert.Equal(t, a, out3)
-}
--- a/wrappers/golang/curves/bw6761/include/vec_ops.h
+++ b/wrappers/golang/curves/bw6761/include/vec_ops.h
@@ -1,39 +0,0 @@
-#include <cuda_runtime.h>
-#include "../../include/types.h"
-
-#ifndef _BW6_761_VEC_OPS_H
-#define _BW6_761_VEC_OPS_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-cudaError_t bw6_761MulCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bw6_761AddCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-cudaError_t bw6_761SubCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
-  int n,
-  VecOpsConfig* config,
-  scalar_t* result
-);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
--- a/wrappers/golang/curves/bw6761/scalar_field.go
+++ b/wrappers/golang/curves/bw6761/scalar_field.go
@@ -6,7 +6,7 @@ import "C"
 import (
 	"encoding/binary"
 	"fmt"
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
+	core "github.com/ingonyama-zk/icicle/wrappers/golang/core"
 	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 	"unsafe"
 )
@@ -85,7 +85,12 @@ func (f ScalarField) ToBytesLittleEndian() []byte {
 }

 func GenerateScalars(size int) core.HostSlice[ScalarField] {
-	scalarSlice := make(core.HostSlice[ScalarField], size)
+	scalars := make([]ScalarField, size)
+	for i := range scalars {
+		scalars[i] = ScalarField{}
+	}
+
+	scalarSlice := core.HostSliceFromElements[ScalarField](scalars)

 	cScalars := (*C.scalar_t)(unsafe.Pointer(&scalarSlice[0]))
 	cSize := (C.int)(size)
--- a/wrappers/golang/curves/bw6761/vec_ops.go
+++ b/wrappers/golang/curves/bw6761/vec_ops.go
@@ -1,49 +0,0 @@
-package bw6761
-
-// #cgo CFLAGS: -I./include/
-// #include "vec_ops.h"
-import "C"
-
-import (
-	"unsafe"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
-)
-
-func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret cr.CudaError) {
-	core.VecOpCheck(a, b, out, &config)
-	var cA, cB, cOut *C.scalar_t
-
-	if a.IsOnDevice() {
-		cA = (*C.scalar_t)(a.(core.DeviceSlice).AsPointer())
-	} else {
-		cA = (*C.scalar_t)(unsafe.Pointer(&a.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if b.IsOnDevice() {
-		cB = (*C.scalar_t)(b.(core.DeviceSlice).AsPointer())
-	} else {
-		cB = (*C.scalar_t)(unsafe.Pointer(&b.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if out.IsOnDevice() {
-		cOut = (*C.scalar_t)(out.(core.DeviceSlice).AsPointer())
-	} else {
-		cOut = (*C.scalar_t)(unsafe.Pointer(&out.(core.HostSlice[ScalarField])[0]))
-	}
-
-	cConfig := (*C.VecOpsConfig)(unsafe.Pointer(&config))
-	cSize := (C.int)(a.Len())
-
-	switch op {
-	case core.Sub:
-		ret = (cr.CudaError)(C.bw6_761SubCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Add:
-		ret = (cr.CudaError)(C.bw6_761AddCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Mul:
-		ret = (cr.CudaError)(C.bw6_761MulCuda(cA, cB, cSize, cConfig, cOut))
-	}
-
-	return ret
-}
--- a/wrappers/golang/curves/bw6761/vec_ops_test.go
+++ b/wrappers/golang/curves/bw6761/vec_ops_test.go
@@ -1,33 +0,0 @@
-package bw6761
-
-import (
-	"testing"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestVecOps(t *testing.T) {
-	testSize := 1 << 14
-
-	a := GenerateScalars(testSize)
-	b := GenerateScalars(testSize)
-	var scalar ScalarField
-	scalar.One()
-	ones := core.HostSliceWithValue(scalar, testSize)
-
-	out := make(core.HostSlice[ScalarField], testSize)
-	out2 := make(core.HostSlice[ScalarField], testSize)
-	out3 := make(core.HostSlice[ScalarField], testSize)
-
-	cfg := core.DefaultVecOpsConfig()
-
-	VecOp(a, b, out, cfg, core.Add)
-	VecOp(out, b, out2, cfg, core.Sub)
-
-	assert.Equal(t, a, out2)
-
-	VecOp(a, ones, out3, cfg, core.Mul)
-
-	assert.Equal(t, a, out3)
-}
--- a/wrappers/golang/curves/include/types.h
+++ b/wrappers/golang/curves/include/types.h
@@ -1,5 +1,8 @@
 #include <cuda_runtime.h>

+// #define G2_DEFINED
+// #include "../../../../../icicle/curves/curve_config.cuh"
+
 #ifndef _TYPES_H
 #define _TYPES_H

@@ -7,6 +10,13 @@
 extern "C" {
 #endif

+// typedef curve_config::scalar_t scalar_t;
+// typedef curve_config::projective_t projective_t;
+// typedef curve_config::g2_projective_t g2_projective_t;
+// typedef curve_config::affine_t affine_t;
+// typedef curve_config::g2_affine_t g2_affine_t;
+
+// typedef struct uint32 unsigned long int;
 typedef struct scalar_t scalar_t;
 typedef struct projective_t projective_t;
 typedef struct g2_projective_t g2_projective_t;
@@ -15,7 +25,6 @@ typedef struct g2_affine_t g2_affine_t;

 typedef struct MSMConfig MSMConfig;
 typedef struct NTTConfig NTTConfig;
-typedef struct VecOpsConfig VecOpsConfig;
 typedef struct DeviceContext DeviceContext;

 typedef cudaError_t cudaError_t;
--- a/wrappers/golang/internal/generator/main.go
+++ b/wrappers/golang/internal/generator/main.go
@@ -104,8 +104,7 @@ func generateFiles() {
 		"ntt_test.go.tmpl",
 		"curve_test.go.tmpl",
 		"curve.go.tmpl",
-		"vec_ops_test.go.tmpl",
-		"vec_ops.go.tmpl",
+		/* "vec_ops.h.tmpl,"*/
 		"helpers_test.go.tmpl",
 	}

@@ -172,7 +171,7 @@ func generateFiles() {
 		"msm.h.tmpl",
 		"g2_msm.h.tmpl",
 		"ntt.h.tmpl",
-		"vec_ops.h.tmpl",
+		/*"vec_ops.h.tmpl",*/
 	}

 	for _, includeFile := range templateIncludeFiles {
--- a/wrappers/golang/internal/generator/templates/include/vec_ops.h.tmpl
+++ b/wrappers/golang/internal/generator/templates/include/vec_ops.h.tmpl
@@ -1,35 +1,49 @@
+#include <cuda.h>
 #include <cuda_runtime.h>
-#include "../../include/types.h"
+#include <stdbool.h>
+// ve_mod_mult.h

-#ifndef _{{toUpper .Curve}}_VEC_OPS_H
-#define _{{toUpper .Curve}}_VEC_OPS_H
+#ifndef _BN254_VEC_MULT_H
+#define _BN254_VEC_MULT_H

 #ifdef __cplusplus
 extern "C" {
 #endif

-cudaError_t {{.Curve}}MulCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
+typedef struct {
+  cudaStream_t stream;   /**< Stream to use. Default value: 0. */
+  int device_id;         /**< Index of the currently used GPU. Default value: 0. */
+  cudaMemPool_t mempool; /**< Mempool to use. Default value: 0. */
+} DeviceContext;
+
+typedef struct BN254_scalar_t BN254_scalar_t;
+
+int bn254AddCuda(
+  BN254_scalar_t* vec_a,
+  BN254_scalar_t* vec_b,
  int n,
-  VecOpsConfig* config,
-  scalar_t* result
+  bool is_on_device,
+  DeviceContext ctx,
+  BN254_scalar_t* result
 );

-cudaError_t {{.Curve}}AddCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
+int bn254SubCuda(
+  BN254_scalar_t* vec_a,
+  BN254_scalar_t* vec_b,
  int n,
-  VecOpsConfig* config,
-  scalar_t* result
+  bool is_on_device,
+  DeviceContext ctx,
+  BN254_scalar_t* result
 );

-cudaError_t {{.Curve}}SubCuda(
-  scalar_t* vec_a,
-  scalar_t* vec_b,
+int bn254MulCuda(
+  BN254_scalar_t* vec_a,
+  BN254_scalar_t* vec_b,
  int n,
-  VecOpsConfig* config,
-  scalar_t* result
+  bool is_on_device,
+  bool is_montgomery,
+  DeviceContext ctx,
+  BN254_scalar_t* result
 );

 #ifdef __cplusplus
--- a/wrappers/golang/internal/generator/templates/scalar_field.go.tmpl
+++ b/wrappers/golang/internal/generator/templates/scalar_field.go.tmpl
@@ -5,14 +5,19 @@ import "C"
 {{- end }}

 {{- define "scalar_field_go_imports" }}
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
+	core "github.com/ingonyama-zk/icicle/wrappers/golang/core"
 	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
 	"unsafe"
 {{- end }}

 {{- define "scalar_field_funcs" }}
 func GenerateScalars(size int) core.HostSlice[ScalarField] {
-	scalarSlice := make(core.HostSlice[ScalarField], size)
+	scalars := make([]ScalarField, size)
+	for i := range scalars {
+		scalars[i] = ScalarField{}
+	}
+
+	scalarSlice := core.HostSliceFromElements[ScalarField](scalars)

 	cScalars := (*C.scalar_t)(unsafe.Pointer(&scalarSlice[0]))
 	cSize := (C.int)(size)
--- a/wrappers/golang/internal/generator/templates/vec_ops.go.tmpl
+++ b/wrappers/golang/internal/generator/templates/vec_ops.go.tmpl
@@ -1,49 +0,0 @@
-package {{.PackageName}}
-
-// #cgo CFLAGS: -I./include/
-// #include "vec_ops.h"
-import "C"
-
-import (
-	"unsafe"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	cr "github.com/ingonyama-zk/icicle/wrappers/golang/cuda_runtime"
-)
-
-func VecOp(a, b, out core.HostOrDeviceSlice, config core.VecOpsConfig, op core.VecOps) (ret cr.CudaError) {
-	core.VecOpCheck(a, b, out, &config)
-	var cA, cB, cOut *C.scalar_t
-
-	if a.IsOnDevice() {
-		cA = (*C.scalar_t)(a.(core.DeviceSlice).AsPointer())
-	} else {
-		cA = (*C.scalar_t)(unsafe.Pointer(&a.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if b.IsOnDevice() {
-		cB = (*C.scalar_t)(b.(core.DeviceSlice).AsPointer())
-	} else {
-		cB = (*C.scalar_t)(unsafe.Pointer(&b.(core.HostSlice[ScalarField])[0]))
-	}
-
-	if out.IsOnDevice() {
-		cOut = (*C.scalar_t)(out.(core.DeviceSlice).AsPointer())
-	} else {
-		cOut = (*C.scalar_t)(unsafe.Pointer(&out.(core.HostSlice[ScalarField])[0]))
-	}
-
-	cConfig := (*C.VecOpsConfig)(unsafe.Pointer(&config))
-	cSize := (C.int)(a.Len())
-
-	switch op {
-	case core.Sub:
-		ret = (cr.CudaError)(C.{{.Curve}}SubCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Add:
-		ret = (cr.CudaError)(C.{{.Curve}}AddCuda(cA, cB, cSize, cConfig, cOut))
-	case core.Mul:
-		ret = (cr.CudaError)(C.{{.Curve}}MulCuda(cA, cB, cSize, cConfig, cOut))
-	}
-
-	return ret
-}
--- a/wrappers/golang/internal/generator/templates/vec_ops_test.go.tmpl
+++ b/wrappers/golang/internal/generator/templates/vec_ops_test.go.tmpl
@@ -1,33 +0,0 @@
-package {{.PackageName}}
-
-import (
-	"testing"
-
-	"github.com/ingonyama-zk/icicle/wrappers/golang/core"
-	"github.com/stretchr/testify/assert"
-)
-
-func TestVecOps(t *testing.T) {
-	testSize := 1 << 14
-
-	a := GenerateScalars(testSize)
-	b := GenerateScalars(testSize)
-	var scalar ScalarField
-	scalar.One()
-	ones := core.HostSliceWithValue(scalar, testSize)
-
-	out := make(core.HostSlice[ScalarField], testSize)
-	out2 := make(core.HostSlice[ScalarField], testSize)
-	out3 := make(core.HostSlice[ScalarField], testSize)
-
-	cfg := core.DefaultVecOpsConfig()
-
-	VecOp(a, b, out, cfg, core.Add)
-	VecOp(out, b, out2, cfg, core.Sub)
-
-	assert.Equal(t, a, out2)
-
-	VecOp(a, ones, out3, cfg, core.Mul)
-
-	assert.Equal(t, a, out3)
-}
--- a/wrappers/rust/icicle-core/src/msm/mod.rs
+++ b/wrappers/rust/icicle-core/src/msm/mod.rs
@@ -114,13 +114,13 @@ pub fn msm<C: Curve + MSM<C>>(
    cfg: &MSMConfig,
    results: &mut HostOrDeviceSlice<Projective<C>>,
 ) -> IcicleResult<()> {
-    // if scalars.len() % points.len() != 0 {
-    //     panic!(
-    //         "Number of points {} does not divide the number of scalars {}",
-    //         points.len(),
-    //         scalars.len()
-    //     );
-    // }
+    if scalars.len() % points.len() != 0 {
+        panic!(
+            "Number of points {} does not divide the number of scalars {}",
+            points.len(),
+            scalars.len()
+        );
+    }
    if scalars.len() % results.len() != 0 {
        panic!(
            "Number of results {} does not divide the number of scalars {}",
@@ -129,7 +129,7 @@ pub fn msm<C: Curve + MSM<C>>(
        );
    }
    let mut local_cfg = cfg.clone();
-    local_cfg.points_size = scalars.len() as i32;
+    local_cfg.points_size = points.len() as i32;
    local_cfg.batch_size = results.len() as i32;
    local_cfg.are_scalars_on_device = scalars.is_on_device();
    local_cfg.are_points_on_device = points.is_on_device();
--- a/wrappers/rust/icicle-cuda-runtime/Cargo.toml
+++ b/wrappers/rust/icicle-cuda-runtime/Cargo.toml
@@ -6,9 +6,10 @@ authors = [ "Ingonyama" ]
 description = "Ingonyama's Rust wrapper of CUDA runtime"
 homepage = "https://www.ingonyama.com"
 repository = "https://github.com/ingonyama-zk/icicle"
+rust-version = "1.70.0"

 [dependencies]
 bitflags = "1.3"

 [build-dependencies]
-bindgen = "*"
+bindgen = "*"
--- a/wrappers/rust/icicle-cuda-runtime/src/memory.rs
+++ b/wrappers/rust/icicle-cuda-runtime/src/memory.rs
@@ -78,10 +78,6 @@ impl<'a, T> HostOrDeviceSlice<'a, T> {
        Self::Host(src)
    }

-    pub fn from_slice(slice: &'a mut [T]) -> Self {
-        Self::Device(slice, 0)
-    }
-
    pub fn cuda_malloc(count: usize) -> CudaResult<Self> {
        let size = count
            .checked_mul(size_of::<T>())
Author	SHA1	Message	Date
ImmanuelSegol	b5364c24dd	refactor	2024-02-28 11:41:09 -04:00
ImmanuelSegol	c2b73aee8d	refactor	2024-02-28 11:37:25 -04:00
ImmanuelSegol	49663d89d3	refactor	2024-02-28 11:29:29 -04:00
ImmanuelSegol	dd509f095b	refactor	2024-02-28 11:27:33 -04:00
ImmanuelSegol	9449ffd7cb	refactor	2024-02-28 11:19:59 -04:00