mirror of
https://github.com/pseXperiments/icicle.git
synced 2026-01-11 16:38:27 -05:00
Compare commits
185 Commits
V3
...
cpu-mult-o
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0a163b85f6 | ||
|
|
29f678068d | ||
|
|
ecd7a0d6e9 | ||
|
|
f7fcd14714 | ||
|
|
5332f4c8f8 | ||
|
|
a45746fc3b | ||
|
|
4a4b25445d | ||
|
|
dc1b2fce6f | ||
|
|
65ca51ca5e | ||
|
|
f8661ac0ef | ||
|
|
5d821d3db1 | ||
|
|
131e22d3c6 | ||
|
|
10963fbe0c | ||
|
|
2a3dcd776a | ||
|
|
8d03fb6bc8 | ||
|
|
445eec88af | ||
|
|
a5a8bf8e3d | ||
|
|
68802bc14a | ||
|
|
63dcd4ef5d | ||
|
|
c302aebeca | ||
|
|
b07d02096f | ||
|
|
454fe5b922 | ||
|
|
99e58532ad | ||
|
|
9618e6db74 | ||
|
|
fc2ac41710 | ||
|
|
49beb324a8 | ||
|
|
b52191f9dd | ||
|
|
05d918fcda | ||
|
|
bdf3cd5672 | ||
|
|
58780f0115 | ||
|
|
4f47c9d4f3 | ||
|
|
1baabd93d6 | ||
|
|
a8e794fdfd | ||
|
|
934f934976 | ||
|
|
cd3353362a | ||
|
|
afe48317a6 | ||
|
|
7de94fff84 | ||
|
|
d3b27a7b68 | ||
|
|
0409c6f5d7 | ||
|
|
9c35aaa5c3 | ||
|
|
db19c3846d | ||
|
|
06243e5493 | ||
|
|
8ade7ef393 | ||
|
|
749d3f9b6e | ||
|
|
0885195619 | ||
|
|
6f8c480fd1 | ||
|
|
f13f24cff2 | ||
|
|
028f59cef0 | ||
|
|
61af965b41 | ||
|
|
091a19354e | ||
|
|
0c8a849d7c | ||
|
|
87a25afee7 | ||
|
|
4d0b1a03a3 | ||
|
|
de88ad144e | ||
|
|
9b46b8489f | ||
|
|
caf3b4fab2 | ||
|
|
e49540de20 | ||
|
|
74f7a3cbcd | ||
|
|
36c879912a | ||
|
|
ca7fb84cf0 | ||
|
|
76cf23d02a | ||
|
|
5a7c0ccd22 | ||
|
|
ec596c3372 | ||
|
|
be7cbbac89 | ||
|
|
ef85fccd08 | ||
|
|
40693a0e3e | ||
|
|
6b095e6e29 | ||
|
|
b8e9f90e87 | ||
|
|
8a6cf0258b | ||
|
|
3e50854b61 | ||
|
|
fe28f8f160 | ||
|
|
1146f5620a | ||
|
|
401b5b8b33 | ||
|
|
48269e3a8e | ||
|
|
f23051967d | ||
|
|
7ed3cc71ce | ||
|
|
763b736c46 | ||
|
|
f2236562f2 | ||
|
|
45c0e3e4b9 | ||
|
|
2da454aa84 | ||
|
|
0cdfb84a65 | ||
|
|
6578125a88 | ||
|
|
e14be6fc5b | ||
|
|
d9248b7110 | ||
|
|
86b757a6ce | ||
|
|
6627b7b5f5 | ||
|
|
3072f6ce21 | ||
|
|
29c83c7453 | ||
|
|
de650b8784 | ||
|
|
e67ac8608f | ||
|
|
df290c07a4 | ||
|
|
ffc68c25e0 | ||
|
|
59ff2d9a11 | ||
|
|
ca4281dafc | ||
|
|
a1c0c8eed8 | ||
|
|
d8115c0404 | ||
|
|
754f7bc2ae | ||
|
|
5c85599ac2 | ||
|
|
15abeb93c2 | ||
|
|
a86ebb47ec | ||
|
|
25d53e86f4 | ||
|
|
c5b75f7868 | ||
|
|
42f774abbd | ||
|
|
ff6306c7a7 | ||
|
|
acb49eb598 | ||
|
|
9ae8d44cf1 | ||
|
|
83cdf4145c | ||
|
|
3b92f60372 | ||
|
|
da007d112e | ||
|
|
3309044a8c | ||
|
|
baae668edf | ||
|
|
59843ee2b1 | ||
|
|
77eb89fc2e | ||
|
|
6fe6916050 | ||
|
|
091c9ba616 | ||
|
|
41962b6d23 | ||
|
|
42490afdad | ||
|
|
aea2dba1f0 | ||
|
|
0e4af8f3ca | ||
|
|
039065082a | ||
|
|
c94bcad4f6 | ||
|
|
6671c3b391 | ||
|
|
4e7b8869da | ||
|
|
5b5fbfb9ad | ||
|
|
c04a3ab182 | ||
|
|
e2237ec34d | ||
|
|
4318aa1072 | ||
|
|
729d1c0fd9 | ||
|
|
632fb91a4b | ||
|
|
bd436f2c00 | ||
|
|
954b0e1891 | ||
|
|
212068c196 | ||
|
|
3c028bf44a | ||
|
|
e524db39d8 | ||
|
|
d3d296699d | ||
|
|
b43518138c | ||
|
|
53d73d3e69 | ||
|
|
d55dc491d0 | ||
|
|
87f725da9f | ||
|
|
4faace4d44 | ||
|
|
aab5d0d6b5 | ||
|
|
4f8fe8387f | ||
|
|
e5b12bbd50 | ||
|
|
0e0470d20a | ||
|
|
87323a68d5 | ||
|
|
874ebf569b | ||
|
|
6d0b326f0b | ||
|
|
fe12616c5e | ||
|
|
eb5a6f95d8 | ||
|
|
2b4a4c22e7 | ||
|
|
c2267714ad | ||
|
|
c1e2a55919 | ||
|
|
c4c7744648 | ||
|
|
ba2a03fbc1 | ||
|
|
c07eb08587 | ||
|
|
eda61b2e28 | ||
|
|
e40c661dee | ||
|
|
42b659b2c6 | ||
|
|
b1abf09896 | ||
|
|
0216068892 | ||
|
|
7068723fad | ||
|
|
4fcd5eb35c | ||
|
|
608880db96 | ||
|
|
3ae2799cde | ||
|
|
0a48a5d49a | ||
|
|
37e4fafad9 | ||
|
|
ade9651ba0 | ||
|
|
0dcece404e | ||
|
|
8b8d778294 | ||
|
|
ff1017e38b | ||
|
|
513b45f5cb | ||
|
|
b50d6aa3cb | ||
|
|
e4e1b3f8d7 | ||
|
|
8d9c2b1014 | ||
|
|
d15edcb60e | ||
|
|
fdec847061 | ||
|
|
7ade66b977 | ||
|
|
0af022ad49 | ||
|
|
d5aac95ff9 | ||
|
|
5bbdb0f501 | ||
|
|
755a44d7e8 | ||
|
|
b3fad0fff3 | ||
|
|
d7f86e6942 | ||
|
|
5410d072ee | ||
|
|
4fa154dceb |
25
.github/changed-files.yml
vendored
25
.github/changed-files.yml
vendored
@@ -5,21 +5,20 @@ golang:
|
||||
- go.mod
|
||||
- .github/workflows/golang.yml
|
||||
rust:
|
||||
- wrappers/rust/**/*
|
||||
- wrappers/rust_v3/**/*
|
||||
- '!wrappers/rust/README.md'
|
||||
- .github/workflows/rust.yml
|
||||
- .github/workflows/v3_rust.yml
|
||||
cpp:
|
||||
- icicle/**/*.cu
|
||||
- icicle/**/*.cuh
|
||||
- icicle/**/*.cpp
|
||||
- icicle/**/*.hpp
|
||||
- icicle/**/*.c
|
||||
- icicle/**/*.h
|
||||
- icicle/CMakeLists.txt
|
||||
- .github/workflows/cpp_cuda.yml
|
||||
- icicle/cmake/Common.cmake
|
||||
- icicle/cmake/CurvesCommon.cmake
|
||||
- icicle/cmake/FieldsCommon.cmake
|
||||
- icicle_v3/**/*.cu
|
||||
- icicle_v3/**/*.cuh
|
||||
- icicle_v3/**/*.cpp
|
||||
- icicle_v3/**/*.hpp
|
||||
- icicle_v3/**/*.c
|
||||
- icicle_v3/**/*.h
|
||||
- icicle_v3/CMakeLists.txt
|
||||
- .github/workflows/v3.yml
|
||||
- icicle_v3/cmake/curve.cmake
|
||||
- icicle_v3/cmake/field.cmake
|
||||
examples:
|
||||
- examples/**/*
|
||||
- .github/workflows/examples.yml
|
||||
|
||||
3
.github/workflows/codespell.yml
vendored
3
.github/workflows/codespell.yml
vendored
@@ -4,7 +4,8 @@ on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- V3
|
||||
- yshekel/V3
|
||||
|
||||
jobs:
|
||||
spelling-checker:
|
||||
|
||||
6
.github/workflows/cpp_cuda.yml
vendored
6
.github/workflows/cpp_cuda.yml
vendored
@@ -3,12 +3,10 @@ name: C++/CUDA
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- main
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
55
.github/workflows/examples.yml
vendored
55
.github/workflows/examples.yml
vendored
@@ -1,8 +1,7 @@
|
||||
# This workflow is a demo of how to run all examples in the Icicle repository.
|
||||
# For each language directory (c++, Rust, etc.) the workflow
|
||||
# (1) loops over all examples (msm, ntt, etc.) and
|
||||
# (2) runs ./compile.sh and ./run.sh in each directory.
|
||||
# The script ./compile.sh should compile the example and ./run.sh should run it.
|
||||
# For each language directory (c++, Rust, etc.) the workflow
|
||||
# (1) loops over all examples (msm, ntt, etc.) and
|
||||
# (2) runs ./run.sh in each directory.
|
||||
# Each script should return 0 for success and 1 otherwise.
|
||||
|
||||
name: Examples
|
||||
@@ -10,12 +9,11 @@ name: Examples
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- V3
|
||||
- yshekel/V3 # TODO remove when merged to V3
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- V3
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
@@ -25,12 +23,42 @@ jobs:
|
||||
check-changed-files:
|
||||
uses: ./.github/workflows/check-changed-files.yml
|
||||
|
||||
extract-cuda-backend-branch:
|
||||
name: Extract cuda branch name
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
cuda-backend-branch: ${{ steps.extract.outputs.cuda-backend-branch }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Extract Private Branch from PR Description
|
||||
id: extract
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
DESCRIPTION=$(gh pr view ${{ github.event.pull_request.number }} --json body -q '.body')
|
||||
echo "PR Description: $DESCRIPTION"
|
||||
CUDA_BE_BRANCH=$(echo "$DESCRIPTION" | grep -oP 'cuda-backend-branch:\s*\K[^\s]+') || true
|
||||
if [ -z "$CUDA_BE_BRANCH" ]; then
|
||||
CUDA_BE_BRANCH="main" # Default branch if not specified
|
||||
fi
|
||||
echo "Extracted CUDA Backend Branch: $CUDA_BE_BRANCH"
|
||||
echo "::set-output name=cuda-backend-branch::$CUDA_BE_BRANCH"
|
||||
|
||||
run-examples:
|
||||
runs-on: [self-hosted, Linux, X64, icicle, examples]
|
||||
needs: check-changed-files
|
||||
needs: [check-changed-files, extract-cuda-backend-branch]
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Checkout CUDA Backend
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ingonyama-zk/icicle-cuda-backend
|
||||
path: ./icicle_v3/backend/cuda
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
ssh-key: ${{ secrets.CUDA_PULL_KEY }}
|
||||
ref: ${{ needs.extract-branch.outputs.cuda-backend-branch }}
|
||||
- name: c++ examples
|
||||
working-directory: ./examples/c++
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true' || needs.check-changed-files.outputs.examples == 'true'
|
||||
@@ -40,11 +68,10 @@ jobs:
|
||||
if [ -d "$dir" ]; then
|
||||
echo "Running command in $dir"
|
||||
cd $dir
|
||||
./compile.sh
|
||||
./run.sh
|
||||
./run.sh -d CUDA
|
||||
cd -
|
||||
fi
|
||||
done
|
||||
done
|
||||
- name: Rust examples
|
||||
working-directory: ./examples/rust
|
||||
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.examples == 'true'
|
||||
@@ -54,7 +81,7 @@ jobs:
|
||||
if [ -d "$dir" ]; then
|
||||
echo "Running command in $dir"
|
||||
cd $dir
|
||||
cargo run --release
|
||||
./run.sh -d CUDA
|
||||
cd -
|
||||
fi
|
||||
done
|
||||
done
|
||||
6
.github/workflows/golang.yml
vendored
6
.github/workflows/golang.yml
vendored
@@ -3,12 +3,10 @@ name: GoLang
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- main
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
6
.github/workflows/rust.yml
vendored
6
.github/workflows/rust.yml
vendored
@@ -3,12 +3,10 @@ name: Rust
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- main
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
- V2
|
||||
- main
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
||||
126
.github/workflows/v3.yml
vendored
Normal file
126
.github/workflows/v3.yml
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
name: C++/CUDA
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- V3
|
||||
- yshekel/V3 # TODO remove when merged to V3
|
||||
push:
|
||||
branches:
|
||||
- V3
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-changed-files:
|
||||
uses: ./.github/workflows/check-changed-files.yml
|
||||
|
||||
check-format:
|
||||
name: Check Code Format
|
||||
runs-on: ubuntu-22.04
|
||||
needs: check-changed-files
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Check clang-format
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: if [[ $(find ./ \( -path ./icicle/build -prune -o -path ./**/target -prune -o -path ./examples -prune \) -iname *.h -or -iname *.cuh -or -iname *.cu -or -iname *.c -or -iname *.cpp | xargs clang-format --dry-run -ferror-limit=1 -style=file 2>&1) ]]; then echo "Please run clang-format"; exit 1; fi
|
||||
|
||||
extract-cuda-backend-branch:
|
||||
name: Extract cuda branch name
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
cuda-backend-branch: ${{ steps.extract.outputs.cuda-backend-branch }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Extract Private Branch from PR Description
|
||||
id: extract
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
DESCRIPTION=$(gh pr view ${{ github.event.pull_request.number }} --json body -q '.body')
|
||||
echo "PR Description: $DESCRIPTION"
|
||||
CUDA_BE_BRANCH=$(echo "$DESCRIPTION" | grep -oP 'cuda-backend-branch:\s*\K[^\s]+') || true
|
||||
if [ -z "$CUDA_BE_BRANCH" ]; then
|
||||
CUDA_BE_BRANCH="main" # Default branch if not specified
|
||||
fi
|
||||
echo "Extracted CUDA Backend Branch: $CUDA_BE_BRANCH"
|
||||
echo "::set-output name=cuda-backend-branch::$CUDA_BE_BRANCH"
|
||||
|
||||
test-linux-curve:
|
||||
name: Test on Linux
|
||||
runs-on: [self-hosted, Linux, X64, icicle]
|
||||
needs: [check-changed-files, check-format, extract-cuda-backend-branch]
|
||||
strategy:
|
||||
matrix:
|
||||
curve:
|
||||
- name: bn254
|
||||
build_args: -DG2=ON -DECNTT=ON
|
||||
- name: bls12_381
|
||||
build_args: -DG2=ON -DECNTT=ON
|
||||
- name: bls12_377
|
||||
build_args: -DG2=ON -DECNTT=ON
|
||||
- name: bw6_761
|
||||
build_args: -DG2=ON -DECNTT=ON
|
||||
- name: grumpkin
|
||||
build_args:
|
||||
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Checkout CUDA Backend
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ingonyama-zk/icicle-cuda-backend
|
||||
path: ./icicle_v3/backend/cuda
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
ssh-key: ${{ secrets.CUDA_PULL_KEY }}
|
||||
ref: ${{ needs.extract-branch.outputs.cuda-backend-branch }}
|
||||
- name: Build curve
|
||||
working-directory: ./icicle_v3
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: |
|
||||
mkdir -p build && rm -rf build/*
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DCURVE=${{ matrix.curve.name }} ${{ matrix.curve.build_args }} -DCUDA_BACKEND=local -S . -B build
|
||||
cmake --build build -j
|
||||
- name: Run C++ curve Tests
|
||||
working-directory: ./icicle_v3/build/tests
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: ctest --verbose
|
||||
|
||||
test-linux-field:
|
||||
name: Test on Linux
|
||||
runs-on: [self-hosted, Linux, X64, icicle]
|
||||
needs: [check-changed-files, check-format, extract-cuda-backend-branch]
|
||||
strategy:
|
||||
matrix:
|
||||
field:
|
||||
- name: babybear
|
||||
build_args: -DEXT_FIELD=ON
|
||||
- name: stark252
|
||||
build_args: -DEXT_FIELD=OFF
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Checkout CUDA Backend
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ingonyama-zk/icicle-cuda-backend
|
||||
path: ./icicle_v3/backend/cuda
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
ssh-key: ${{ secrets.CUDA_PULL_KEY }}
|
||||
ref: ${{ needs.extract-branch.outputs.cuda-backend-branch }}
|
||||
- name: Build field
|
||||
working-directory: ./icicle_v3
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: |
|
||||
mkdir -p build && rm -rf build/*
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTS=ON -DFIELD=${{ matrix.field.name }} ${{ matrix.field.build_args }} -DCUDA_BACKEND=local -S . -B build
|
||||
cmake --build build -j
|
||||
- name: Run C++ field Tests
|
||||
working-directory: ./icicle_v3/build/tests
|
||||
if: needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
run: ctest --verbose
|
||||
82
.github/workflows/v3_rust.yml
vendored
Normal file
82
.github/workflows/v3_rust.yml
vendored
Normal file
@@ -0,0 +1,82 @@
|
||||
name: Rust
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- V3
|
||||
- yshekel/V3 # TODO remove when merged to V3
|
||||
push:
|
||||
branches:
|
||||
- V3
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-changed-files:
|
||||
uses: ./.github/workflows/check-changed-files.yml
|
||||
|
||||
check-format:
|
||||
name: Check Code Format
|
||||
runs-on: ubuntu-22.04
|
||||
needs: check-changed-files
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Check rustfmt
|
||||
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
working-directory: ./wrappers/rust
|
||||
# "-name target -prune" removes searching in any directory named "target"
|
||||
# Formatting by single file is necessary due to generated files not being present
|
||||
# before building the project.
|
||||
# e.g. icicle-cuda-runtime/src/bindings.rs is generated and icicle-cuda-runtime/src/lib.rs includes that module
|
||||
# causing rustfmt to fail.
|
||||
run: if [[ $(find . -path ./icicle-curves/icicle-curve-template -prune -o -name target -prune -o -iname *.rs -print | xargs cargo fmt --check --) ]]; then echo "Please run cargo fmt"; exit 1; fi
|
||||
|
||||
extract-cuda-backend-branch:
|
||||
name: Extract cuda branch name
|
||||
runs-on: ubuntu-22.04
|
||||
outputs:
|
||||
cuda-backend-branch: ${{ steps.extract.outputs.cuda-backend-branch }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Extract Private Branch from PR Description
|
||||
id: extract
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
run: |
|
||||
DESCRIPTION=$(gh pr view ${{ github.event.pull_request.number }} --json body -q '.body')
|
||||
echo "PR Description: $DESCRIPTION"
|
||||
CUDA_BE_BRANCH=$(echo "$DESCRIPTION" | grep -oP 'cuda-backend-branch:\s*\K[^\s]+') || true
|
||||
if [ -z "$CUDA_BE_BRANCH" ]; then
|
||||
CUDA_BE_BRANCH="main" # Default branch if not specified
|
||||
fi
|
||||
echo "Extracted CUDA Backend Branch: $CUDA_BE_BRANCH"
|
||||
echo "::set-output name=cuda-backend-branch::$CUDA_BE_BRANCH"
|
||||
|
||||
test-linux:
|
||||
name: Test on Linux
|
||||
runs-on: [self-hosted, Linux, X64, icicle]
|
||||
needs: [check-changed-files, check-format, extract-cuda-backend-branch]
|
||||
steps:
|
||||
- name: Checkout Repo
|
||||
uses: actions/checkout@v4
|
||||
- name: Checkout CUDA Backend
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ingonyama-zk/icicle-cuda-backend
|
||||
path: ./icicle_v3/backend/cuda
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
ssh-key: ${{ secrets.CUDA_PULL_KEY }}
|
||||
ref: ${{ needs.extract-branch.outputs.cuda-backend-branch }}
|
||||
- name: Run tests
|
||||
working-directory: ./wrappers/rust_v3
|
||||
if: needs.check-changed-files.outputs.rust == 'true' || needs.check-changed-files.outputs.cpp_cuda == 'true'
|
||||
# tests are split to phases since NTT domain is global but tests have conflicting requirements
|
||||
run: |
|
||||
cargo build --release --verbose --features=g2,ec_ntt
|
||||
cargo test --workspace --release --verbose --features=g2,ec_ntt,cuda_backend -- --skip phase
|
||||
cargo test phase2 --workspace --release --verbose --features=g2,ec_ntt,cuda_backend
|
||||
cargo test phase3 --workspace --release --verbose --features=g2,ec_ntt,cuda_backend
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -17,4 +17,4 @@
|
||||
**/Cargo.lock
|
||||
**/icicle/build/
|
||||
**/wrappers/rust/icicle-cuda-runtime/src/bindings.rs
|
||||
**/build*
|
||||
**/build/*
|
||||
|
||||
@@ -1,23 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
project(example)
|
||||
|
||||
add_executable(example example.cpp)
|
||||
target_include_directories(example PRIVATE "../../../icicle_v3/include" "..")
|
||||
target_link_directories(example PRIVATE "${CMAKE_SOURCE_DIR}/build/icicle")
|
||||
message("${CMAKE_BINARY_DIR}/icicle")
|
||||
target_link_libraries(example PRIVATE icicle_curve_bn254 icicle_field_bn254 icicle_device)
|
||||
if(BACKEND_DIR)
|
||||
add_compile_definitions(BACKEND_DIR="${BACKEND_DIR}")
|
||||
endif()
|
||||
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
@@ -17,17 +17,19 @@ Typically, you concurrently
|
||||
|
||||
## Best-Practices
|
||||
|
||||
1. Use three separate CUDA streams for Download, Upload, and Compute operations
|
||||
2. Use pinned (page-locked) memory on host to speed data bus transfers. Calling `cudaHostAlloc` allocates pinned memory.
|
||||
3. Use in-place NTT to save on device memory.
|
||||
1. Use three separate streams for Download to device, Upload from device, and Compute operations
|
||||
2. Future: Use pinned (page-locked) memory on host to speed data bus transfers.
|
||||
3. Compute in-place NTT.
|
||||
|
||||
## Running the example
|
||||
|
||||
To change the default curve BN254, edit `compile.sh` and `CMakeLists.txt`
|
||||
To change the default curve BN254, edit `run.sh` and `CMakeLists.txt`
|
||||
|
||||
```sh
|
||||
./compile.sh
|
||||
./run.sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
To compare with ICICLE baseline (i.e. non-concurrent) NTT, you can run [this example](../ntt/README.md).
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DG2=OFF -DMSM=OFF
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
|
||||
125
examples/c++/best-practice-ntt/example.cpp
Normal file
125
examples/c++/best-practice-ntt/example.cpp
Normal file
@@ -0,0 +1,125 @@
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
|
||||
#include "icicle/runtime.h"
|
||||
#include "icicle/api/bn254.h"
|
||||
using namespace bn254;
|
||||
|
||||
#include "examples_utils.h"
|
||||
|
||||
void initialize_input(const unsigned ntt_size, const unsigned batch_size, scalar_t* elements)
|
||||
{
|
||||
for (unsigned i = 0; i < ntt_size * batch_size; i++) {
|
||||
elements[i] = scalar_t::from(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
try_load_and_set_backend_device(argc, argv);
|
||||
|
||||
// set these parameters to match the desired NTT size and batch size
|
||||
const unsigned log_ntt_size = 20;
|
||||
const unsigned batch_size = 16;
|
||||
|
||||
scalar_t basic_root = scalar_t::omega(log_ntt_size);
|
||||
|
||||
const unsigned ntt_size = 1 << log_ntt_size;
|
||||
std::cout << "log NTT size: " << log_ntt_size << std::endl;
|
||||
std::cout << "Batch size: " << batch_size << std::endl;
|
||||
|
||||
// Create separate streams for overlapping data transfers and kernel execution.
|
||||
icicleStreamHandle stream_compute, stream_h2d, stream_d2h;
|
||||
ICICLE_CHECK(icicle_create_stream(&stream_compute));
|
||||
ICICLE_CHECK(icicle_create_stream(&stream_h2d));
|
||||
ICICLE_CHECK(icicle_create_stream(&stream_d2h));
|
||||
|
||||
// Initialize NTT domain
|
||||
std::cout << "Init NTT domain" << std::endl;
|
||||
auto ntt_init_domain_cfg = default_ntt_init_domain_config();
|
||||
// set CUDA backend specific flag for init_domain
|
||||
ConfigExtension backend_cfg_ext;
|
||||
backend_cfg_ext.set("fast_twiddles", true);
|
||||
ntt_init_domain_cfg.ext = &backend_cfg_ext;
|
||||
ICICLE_CHECK(bn254_ntt_init_domain(&basic_root, ntt_init_domain_cfg));
|
||||
|
||||
std::cout << "Concurrent Download, Upload, and Compute In-place NTT" << std::endl;
|
||||
int nof_blocks = 32;
|
||||
int block_size = ntt_size * batch_size / nof_blocks;
|
||||
std::cout << "Number of blocks: " << nof_blocks << ", block size: " << block_size << " Bytes" << std::endl;
|
||||
|
||||
// on-host pinned data
|
||||
scalar_t* h_inp[2];
|
||||
scalar_t* h_out[2];
|
||||
for (int i = 0; i < 2; i++) {
|
||||
h_inp[i] = new scalar_t[ntt_size * batch_size];
|
||||
h_out[i] = new scalar_t[ntt_size * batch_size];
|
||||
}
|
||||
|
||||
// on-device in-place data
|
||||
// we need two on-device vectors to overlap data transfers with NTT kernel execution
|
||||
scalar_t* d_vec[2];
|
||||
for (int i = 0; i < 2; i++) {
|
||||
ICICLE_CHECK(icicle_malloc((void**)&d_vec[i], sizeof(scalar_t) * ntt_size * batch_size));
|
||||
}
|
||||
|
||||
// initialize input data
|
||||
initialize_input(ntt_size, batch_size, h_inp[0]);
|
||||
initialize_input(ntt_size, batch_size, h_inp[1]);
|
||||
|
||||
// ntt configuration
|
||||
NTTConfig<scalar_t> config_compute = default_ntt_config<scalar_t>();
|
||||
config_compute.batch_size = batch_size;
|
||||
config_compute.are_inputs_on_device = true;
|
||||
config_compute.are_outputs_on_device = true;
|
||||
config_compute.is_async = true;
|
||||
config_compute.stream = stream_compute;
|
||||
// backend specific config extension
|
||||
ConfigExtension ntt_cfg_ext;
|
||||
ntt_cfg_ext.set("ntt_algorithm", 2); // mixed-radix
|
||||
config_compute.ext = &ntt_cfg_ext;
|
||||
|
||||
for (int run = 0; run < 10; run++) {
|
||||
int vec_compute = run % 2;
|
||||
int vec_transfer = (run + 1) % 2;
|
||||
std::cout << "Run: " << run << std::endl;
|
||||
std::cout << "Compute Vector: " << vec_compute << std::endl;
|
||||
std::cout << "Transfer Vector: " << vec_transfer << std::endl;
|
||||
START_TIMER(inplace);
|
||||
bn254_ntt(d_vec[vec_compute], ntt_size, NTTDir::kForward, config_compute, d_vec[vec_compute]);
|
||||
// we have to delay upload to device relative to download from device by one block: preserve write after read
|
||||
for (int i = 0; i <= nof_blocks; i++) {
|
||||
if (i < nof_blocks) {
|
||||
// copy result back from device to host
|
||||
ICICLE_CHECK(icicle_copy_async(
|
||||
&h_out[vec_transfer][i * block_size], &d_vec[vec_transfer][i * block_size], sizeof(scalar_t) * block_size,
|
||||
stream_d2h));
|
||||
}
|
||||
if (i > 0) {
|
||||
// copy next input from host to device to alternate buffer
|
||||
ICICLE_CHECK(icicle_copy_async(
|
||||
&d_vec[vec_transfer][(i - 1) * block_size], &h_inp[vec_transfer][(i - 1) * block_size],
|
||||
sizeof(scalar_t) * block_size, stream_h2d));
|
||||
}
|
||||
// synchronize upload and download at the end of the block to ensure data integrity
|
||||
ICICLE_CHECK(icicle_stream_synchronize(stream_d2h));
|
||||
ICICLE_CHECK(icicle_stream_synchronize(stream_h2d));
|
||||
}
|
||||
// synchronize compute stream with the end of the computation
|
||||
ICICLE_CHECK(icicle_stream_synchronize(stream_compute));
|
||||
END_TIMER(inplace, "Concurrent In-Place NTT");
|
||||
}
|
||||
|
||||
// Clean-up
|
||||
for (int i = 0; i < 2; i++) {
|
||||
ICICLE_CHECK(icicle_free(d_vec[i]));
|
||||
delete[](h_inp[i]);
|
||||
delete[](h_out[i]);
|
||||
}
|
||||
ICICLE_CHECK(icicle_destroy_stream(stream_compute));
|
||||
ICICLE_CHECK(icicle_destroy_stream(stream_d2h));
|
||||
ICICLE_CHECK(icicle_destroy_stream(stream_h2d));
|
||||
return 0;
|
||||
}
|
||||
@@ -1,149 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <chrono>
|
||||
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "api/bn254.h"
|
||||
using namespace bn254;
|
||||
using namespace ntt;
|
||||
|
||||
const std::string curve = "BN254";
|
||||
|
||||
typedef scalar_t S;
|
||||
typedef scalar_t E;
|
||||
|
||||
const unsigned max_log_ntt_size = 27;
|
||||
|
||||
void initialize_input(const unsigned ntt_size, const unsigned nof_ntts, E* elements)
|
||||
{
|
||||
for (unsigned i = 0; i < ntt_size * nof_ntts; i++) {
|
||||
elements[i] = E::from(i + 1);
|
||||
}
|
||||
}
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cudaDeviceReset();
|
||||
cudaDeviceProp deviceProperties;
|
||||
int deviceId = 0;
|
||||
cudaGetDeviceProperties(&deviceProperties, deviceId);
|
||||
std::string gpu_full_name = deviceProperties.name;
|
||||
std::cout << gpu_full_name << std::endl;
|
||||
std::string gpu_name = gpu_full_name;
|
||||
|
||||
std::cout << "Curve: " << curve << std::endl;
|
||||
|
||||
S basic_root = S::omega(max_log_ntt_size);
|
||||
|
||||
// change these parameters to match the desired NTT size and batch size
|
||||
const unsigned log_ntt_size = 22;
|
||||
const unsigned nof_ntts = 16;
|
||||
|
||||
std::cout << "log NTT size: " << log_ntt_size << std::endl;
|
||||
const unsigned ntt_size = 1 << log_ntt_size;
|
||||
|
||||
std::cout << "Batch size: " << nof_ntts << std::endl;
|
||||
|
||||
// Create separate CUDA streams for overlapping data transfers and kernel execution.
|
||||
cudaStream_t stream_compute, stream_h2d, stream_d2h;
|
||||
cudaStreamCreate(&stream_compute);
|
||||
cudaStreamCreate(&stream_h2d);
|
||||
cudaStreamCreate(&stream_d2h);
|
||||
|
||||
// Create device context for NTT computation
|
||||
auto ctx_compute = device_context::DeviceContext{
|
||||
stream_compute, // stream
|
||||
0, // device_id
|
||||
0, // mempool
|
||||
};
|
||||
|
||||
// Initialize NTT domain and configuration
|
||||
bn254_initialize_domain(&basic_root, ctx_compute, /* fast twiddles */ true);
|
||||
NTTConfig<S> config_compute = default_ntt_config<S>(ctx_compute);
|
||||
config_compute.ntt_algorithm = NttAlgorithm::MixedRadix;
|
||||
config_compute.batch_size = nof_ntts;
|
||||
config_compute.are_inputs_on_device = true;
|
||||
config_compute.are_outputs_on_device = true;
|
||||
config_compute.is_async = true;
|
||||
|
||||
std::cout << "Concurrent Download, Upload, and Compute In-place NTT" << std::endl;
|
||||
int nof_blocks = 32;
|
||||
std::cout << "Number of blocks: " << nof_blocks << std::endl;
|
||||
int block_size = ntt_size * nof_ntts / nof_blocks;
|
||||
|
||||
// on-host pinned data
|
||||
E* h_inp[2];
|
||||
E* h_out[2];
|
||||
for (int i = 0; i < 2; i++) {
|
||||
cudaHostAlloc((void**)&h_inp[i], sizeof(E) * ntt_size * nof_ntts, cudaHostAllocDefault);
|
||||
cudaHostAlloc((void**)&h_out[i], sizeof(E) * ntt_size * nof_ntts, cudaHostAllocDefault);
|
||||
}
|
||||
|
||||
// on-device in-place data
|
||||
// we need two on-device vectors to overlap data transfers with NTT kernel execution
|
||||
E* d_vec[2];
|
||||
for (int i = 0; i < 2; i++) {
|
||||
cudaMalloc((void**)&d_vec[i], sizeof(E) * ntt_size * nof_ntts);
|
||||
}
|
||||
|
||||
// initialize input data
|
||||
initialize_input(ntt_size, nof_ntts, h_inp[0]);
|
||||
initialize_input(ntt_size, nof_ntts, h_inp[1]);
|
||||
|
||||
cudaEvent_t compute_start, compute_stop;
|
||||
cudaEventCreate(&compute_start);
|
||||
cudaEventCreate(&compute_stop);
|
||||
|
||||
for (int run = 0; run < 10; run++) {
|
||||
int vec_compute = run % 2;
|
||||
int vec_transfer = (run + 1) % 2;
|
||||
std::cout << "Run: " << run << std::endl;
|
||||
std::cout << "Compute Vector: " << vec_compute << std::endl;
|
||||
std::cout << "Transfer Vector: " << vec_transfer << std::endl;
|
||||
START_TIMER(inplace);
|
||||
cudaEventRecord(compute_start, stream_compute);
|
||||
bn254_ntt_cuda(d_vec[vec_compute], ntt_size, NTTDir::kForward, config_compute, d_vec[vec_compute]);
|
||||
cudaEventRecord(compute_stop, stream_compute);
|
||||
// we have to delay upload to device relative to download from device by one block: preserve write after read
|
||||
for (int i = 0; i <= nof_blocks; i++) {
|
||||
if (i < nof_blocks) {
|
||||
cudaMemcpyAsync(
|
||||
&h_out[vec_transfer][i * block_size], &d_vec[vec_transfer][i * block_size], sizeof(E) * block_size,
|
||||
cudaMemcpyDeviceToHost, stream_d2h);
|
||||
}
|
||||
if (i > 0) {
|
||||
cudaMemcpyAsync(
|
||||
&d_vec[vec_transfer][(i - 1) * block_size], &h_inp[vec_transfer][(i - 1) * block_size],
|
||||
sizeof(E) * block_size, cudaMemcpyHostToDevice, stream_h2d);
|
||||
}
|
||||
// synchronize upload and download at the end of the block to ensure data integrity
|
||||
cudaStreamSynchronize(stream_d2h);
|
||||
cudaStreamSynchronize(stream_h2d);
|
||||
}
|
||||
// synchronize compute stream with the end of the computation
|
||||
cudaEventSynchronize(compute_stop);
|
||||
float milliseconds = 0;
|
||||
cudaEventElapsedTime(&milliseconds, compute_start, compute_stop);
|
||||
END_TIMER(inplace, "Concurrent In-Place NTT");
|
||||
std::cout << "NTT time: " << milliseconds << " ms" << std::endl;
|
||||
};
|
||||
|
||||
// Clean-up
|
||||
for (int i = 0; i < 2; i++) {
|
||||
cudaFree(d_vec[i]);
|
||||
cudaFreeHost(h_inp[i]);
|
||||
cudaFreeHost(h_out[i]);
|
||||
}
|
||||
cudaEventDestroy(compute_start);
|
||||
cudaEventDestroy(compute_stop);
|
||||
cudaStreamDestroy(stream_compute);
|
||||
cudaStreamDestroy(stream_d2h);
|
||||
cudaStreamDestroy(stream_h2d);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,2 +1,65 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DMSM=OFF -DCUDA_BACKEND=local -S "${ICILE_DIR}" -B build/icicle
|
||||
BACKEND_INSTALL_DIR=$(realpath "build/icicle/backend")
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DMSM=OFF -S "${ICILE_DIR}" -B build/icicle
|
||||
fi
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/example
|
||||
|
||||
cmake --build build/icicle -j
|
||||
cmake --build build/example -j
|
||||
|
||||
./build/example/example "$DEVICE_TYPE" "$BACKEND_INSTALL_DIR"
|
||||
|
||||
38
examples/c++/examples_utils.h
Normal file
38
examples/c++/examples_utils.h
Normal file
@@ -0,0 +1,38 @@
|
||||
#pragma once
|
||||
|
||||
#include <chrono>
|
||||
#include "icicle/runtime.h"
|
||||
|
||||
// Timer
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
// Load and choose backend
|
||||
void try_load_and_set_backend_device(int argc = 0, char** argv = nullptr)
|
||||
{
|
||||
if (argc > 2 && 0 != strcmp(argv[2], "")) {
|
||||
const char* backend_install_dir = argv[2];
|
||||
std::cout << "Trying to load and backend device from " << backend_install_dir << std::endl;
|
||||
ICICLE_CHECK(icicle_load_backend(backend_install_dir, true));
|
||||
}
|
||||
|
||||
const char* selected_device = argc > 1 ? argv[1] : nullptr;
|
||||
if (selected_device) {
|
||||
std::cout << "selecting " << selected_device << " device" << std::endl;
|
||||
ICICLE_CHECK(icicle_set_device(selected_device));
|
||||
return;
|
||||
}
|
||||
|
||||
// trying to choose CUDA if available, or fallback to CPU otherwise (default device)
|
||||
const bool is_cuda_device_available = (eIcicleError::SUCCESS == icicle_is_device_avialable("CUDA"));
|
||||
if (is_cuda_device_available) {
|
||||
Device device = {"CUDA", 0}; // GPU-0
|
||||
std::cout << "setting " << device << std::endl;
|
||||
ICICLE_CHECK(icicle_set_device(device));
|
||||
return;
|
||||
}
|
||||
|
||||
std::cout << "CUDA device not available, falling back to CPU" << std::endl;
|
||||
}
|
||||
@@ -1,23 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
project(example)
|
||||
|
||||
add_executable(example example.cpp)
|
||||
target_include_directories(example PRIVATE "../../../icicle_v3/include" "..")
|
||||
target_link_directories(example PRIVATE "${CMAKE_SOURCE_DIR}/build/icicle")
|
||||
message("${CMAKE_BINARY_DIR}/icicle")
|
||||
target_link_libraries(example PRIVATE icicle_curve_bn254 icicle_field_bn254 icicle_device)
|
||||
if(BACKEND_DIR)
|
||||
add_compile_definitions(BACKEND_DIR="${BACKEND_DIR}")
|
||||
endif()
|
||||
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_curve_bn254.a)
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
@@ -1,46 +1,43 @@
|
||||
# Icicle example: Muli-Scalar Multiplication (MSM)
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template function `MSM` to accelerate [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
|
||||
|
||||
## Concise Usage Explanation
|
||||
|
||||
1. Select the curve
|
||||
2. Include an MSM template
|
||||
3. Configure MSM
|
||||
4. Call the template
|
||||
1. Include the curve api
|
||||
2. Configure MSM
|
||||
3. Call msm api
|
||||
|
||||
```c++
|
||||
#define CURVE_ID 1
|
||||
#include "icicle/appUtils/msm/msm.cu"
|
||||
#include "icicle/api/bn254.h"
|
||||
...
|
||||
msm::MSMConfig config = {...};
|
||||
MSMConfig config = default_msm_config();
|
||||
...
|
||||
msm::MSM<scalar_t, affine_t, projective_t>(scalars, points, size, config, &result);
|
||||
bn254_msm(scalars, points, size, config, &result);
|
||||
```
|
||||
|
||||
In this example we use `BN254` curve (`CURVE_ID=1`). The function computes $result = \sum_{i=0}^{size-1} scalars[i] \cdot points[i]$, where input `points[]` use affine coordinates, and `result` uses projective coordinates.
|
||||
In this example we use `BN254` curve. The function computes $result = \sum_{i=0}^{size-1} scalars[i] \cdot points[i]$, where input `points[]` use affine coordinates, and `result` uses projective coordinates.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
The configuration is passed to the kernel as a structure of type `msm::MSMConfig`. Some of the most important fields are listed below:
|
||||
The configuration is passed to the kernel as a structure of type `MSMConfig`. Some of the most important fields are listed below:
|
||||
|
||||
- `are_scalars_on_device`, `are_points_on_device`, `are_results_on_device`: location of the data
|
||||
|
||||
- `is_async`: blocking vs. non-blocking kernel call
|
||||
|
||||
- `large_bucket_factor`: distinguishes between large bucket and normal bucket sizes. If there is a scalar distribution that is skewed heavily to a few values we can operate on those separately from the rest of the values. The ideal value here can vary by circuit (based on the distribution of scalars) but start with 10 and adjust to see if it improves performance.
|
||||
- In addition can pass backend-specific params via config.extConfig. For example CUDA backend accepts a `large_bucket_factor` param.
|
||||
|
||||
## Running the example
|
||||
|
||||
- `cd` to your example directory
|
||||
- compile with `./compile.sh`
|
||||
- run with `./run.sh`
|
||||
```sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
## What's in the example
|
||||
|
||||
@@ -49,4 +46,4 @@ The configuration is passed to the kernel as a structure of type `msm::MSMConfig
|
||||
3. Configure and execute MSM using on-host data
|
||||
4. Copy inputs on-device
|
||||
5. Configure and execute MSM using on-device data
|
||||
6. Repeat the above steps for G2 points
|
||||
6. Repeat step 3 G2 msm points
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DG2=ON
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
101
examples/c++/msm/example.cpp
Normal file
101
examples/c++/msm/example.cpp
Normal file
@@ -0,0 +1,101 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include "icicle/runtime.h"
|
||||
#include "icicle/api/bn254.h"
|
||||
using namespace bn254;
|
||||
|
||||
#include "examples_utils.h"
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
try_load_and_set_backend_device(argc, argv);
|
||||
|
||||
std::cout << "\nIcicle example: Muli-Scalar Multiplication (MSM)" << std::endl;
|
||||
std::cout << "Example parameters" << std::endl;
|
||||
|
||||
int batch_size = 1;
|
||||
unsigned msm_size = 1 << 10;
|
||||
int N = batch_size * msm_size;
|
||||
std::cout << "Batch size: " << batch_size << std::endl;
|
||||
std::cout << "MSM size: " << msm_size << std::endl;
|
||||
|
||||
std::cout << "\nPart I: use G1 points" << std::endl;
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
auto scalars = std::make_unique<scalar_t[]>(N);
|
||||
auto points = std::make_unique<affine_t[]>(N);
|
||||
projective_t result;
|
||||
scalar_t::rand_host_many(scalars.get(), N);
|
||||
projective_t::rand_host_many(points.get(), N);
|
||||
|
||||
std::cout << "Using default MSM configuration with on-host inputs" << std::endl;
|
||||
|
||||
auto config = default_msm_config();
|
||||
config.batch_size = batch_size;
|
||||
|
||||
std::cout << "\nRunning MSM kernel with on-host inputs" << std::endl;
|
||||
// Execute the MSM kernel
|
||||
START_TIMER(MSM_host_mem);
|
||||
ICICLE_CHECK(bn254_msm(scalars.get(), points.get(), msm_size, config, &result));
|
||||
END_TIMER(MSM_host_mem, "MSM from host-memory took");
|
||||
std::cout << projective_t::to_affine(result) << std::endl;
|
||||
|
||||
DeviceProperties device_props;
|
||||
ICICLE_CHECK(icicle_get_device_properties(device_props));
|
||||
// If device does not share memory with host, copy inputs explicitly and execute msm with device pointers
|
||||
if (!device_props.using_host_memory) {
|
||||
std::cout << "\nReconfiguring MSM to use on-device inputs" << std::endl;
|
||||
config.are_results_on_device = true;
|
||||
config.are_scalars_on_device = true;
|
||||
config.are_points_on_device = true;
|
||||
|
||||
std::cout << "Copying inputs to-device" << std::endl;
|
||||
scalar_t* scalars_d;
|
||||
affine_t* points_d;
|
||||
projective_t* result_d;
|
||||
|
||||
ICICLE_CHECK(icicle_malloc((void**)&scalars_d, sizeof(scalar_t) * N));
|
||||
ICICLE_CHECK(icicle_malloc((void**)&points_d, sizeof(affine_t) * N));
|
||||
ICICLE_CHECK(icicle_malloc((void**)&result_d, sizeof(projective_t)));
|
||||
ICICLE_CHECK(icicle_copy(scalars_d, scalars.get(), sizeof(scalar_t) * N));
|
||||
ICICLE_CHECK(icicle_copy(points_d, points.get(), sizeof(affine_t) * N));
|
||||
|
||||
std::cout << "Running MSM kernel with on-device inputs" << std::endl;
|
||||
// Execute the MSM kernel
|
||||
START_TIMER(MSM_device_mem);
|
||||
ICICLE_CHECK(msm(scalars_d, points_d, msm_size, config, result_d));
|
||||
END_TIMER(MSM_device_mem, "MSM from device-memory took");
|
||||
|
||||
// Copy the result back to the host
|
||||
icicle_copy(&result, result_d, sizeof(projective_t));
|
||||
// Print the result
|
||||
std::cout << projective_t::to_affine(result) << std::endl;
|
||||
// Free the device memory
|
||||
icicle_free(scalars_d);
|
||||
icicle_free(points_d);
|
||||
icicle_free(result_d);
|
||||
}
|
||||
|
||||
std::cout << "\nPart II: use G2 points" << std::endl;
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
// use the same scalars
|
||||
auto g2_points = std::make_unique<g2_affine_t[]>(N);
|
||||
g2_projective_t::rand_host_many(g2_points.get(), N);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-host inputs" << std::endl;
|
||||
config.are_results_on_device = false;
|
||||
config.are_scalars_on_device = false;
|
||||
config.are_points_on_device = false;
|
||||
g2_projective_t g2_result;
|
||||
START_TIMER(MSM_g2);
|
||||
ICICLE_CHECK(bn254_g2_msm(scalars.get(), g2_points.get(), msm_size, config, &g2_result));
|
||||
END_TIMER(MSM_g2, "MSM G2 from host-memory took");
|
||||
std::cout << g2_projective_t::to_affine(g2_result) << std::endl;
|
||||
|
||||
// Similar to G1 MSM, can explicitly copy to device and execute the G2 MSM using device pointers
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,124 +0,0 @@
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
|
||||
#include "api/bn254.h"
|
||||
using namespace bn254;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
std::cout << "Icicle example: Muli-Scalar Multiplication (MSM)" << std::endl;
|
||||
std::cout << "Example parameters" << std::endl;
|
||||
int batch_size = 1;
|
||||
std::cout << "Batch size: " << batch_size << std::endl;
|
||||
unsigned msm_size = 1048576;
|
||||
std::cout << "MSM size: " << msm_size << std::endl;
|
||||
int N = batch_size * msm_size;
|
||||
|
||||
std::cout << "Part I: use G1 points" << std::endl;
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
scalar_t* scalars = new scalar_t[N];
|
||||
affine_t* points = new affine_t[N];
|
||||
projective_t result;
|
||||
scalar_t::rand_host_many(scalars, N);
|
||||
projective_t::rand_host_many_affine(points, N);
|
||||
|
||||
std::cout << "Using default MSM configuration with on-host inputs" << std::endl;
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
msm::MSMConfig config = {
|
||||
ctx, // ctx
|
||||
0, // points_size
|
||||
1, // precompute_factor
|
||||
0, // c
|
||||
0, // bitsize
|
||||
10, // large_bucket_factor
|
||||
1, // batch_size
|
||||
false, // are_scalars_on_device
|
||||
false, // are_scalars_montgomery_form
|
||||
false, // are_points_on_device
|
||||
false, // are_points_montgomery_form
|
||||
false, // are_results_on_device
|
||||
false, // is_big_triangle
|
||||
false, // is_async
|
||||
};
|
||||
config.batch_size = batch_size;
|
||||
|
||||
std::cout << "Running MSM kernel with on-host inputs" << std::endl;
|
||||
cudaStream_t stream = config.ctx.stream;
|
||||
// Execute the MSM kernel
|
||||
bn254_msm_cuda(scalars, points, msm_size, config, &result);
|
||||
std::cout << projective_t::to_affine(result) << std::endl;
|
||||
|
||||
std::cout << "Copying inputs on-device" << std::endl;
|
||||
scalar_t* scalars_d;
|
||||
affine_t* points_d;
|
||||
projective_t* result_d;
|
||||
cudaMalloc(&scalars_d, sizeof(scalar_t) * N);
|
||||
cudaMalloc(&points_d, sizeof(affine_t) * N);
|
||||
cudaMalloc(&result_d, sizeof(projective_t));
|
||||
cudaMemcpy(scalars_d, scalars, sizeof(scalar_t) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(points_d, points, sizeof(affine_t) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-device inputs" << std::endl;
|
||||
config.are_results_on_device = true;
|
||||
config.are_scalars_on_device = true;
|
||||
config.are_points_on_device = true;
|
||||
|
||||
std::cout << "Running MSM kernel with on-device inputs" << std::endl;
|
||||
// Execute the MSM kernel
|
||||
bn254_msm_cuda(scalars_d, points_d, msm_size, config, result_d);
|
||||
|
||||
// Copy the result back to the host
|
||||
cudaMemcpy(&result, result_d, sizeof(projective_t), cudaMemcpyDeviceToHost);
|
||||
// Print the result
|
||||
std::cout << projective_t::to_affine(result) << std::endl;
|
||||
// Free the device memory
|
||||
cudaFree(scalars_d);
|
||||
cudaFree(points_d);
|
||||
cudaFree(result_d);
|
||||
// Free the host memory, keep scalars for G2 example
|
||||
delete[] points;
|
||||
|
||||
std::cout << "Part II: use G2 points" << std::endl;
|
||||
|
||||
std::cout << "Generating random inputs on-host" << std::endl;
|
||||
// use the same scalars
|
||||
g2_affine_t* g2_points = new g2_affine_t[N];
|
||||
g2_projective_t::rand_host_many_affine(g2_points, N);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-host inputs" << std::endl;
|
||||
config.are_results_on_device = false;
|
||||
config.are_scalars_on_device = false;
|
||||
config.are_points_on_device = false;
|
||||
g2_projective_t g2_result;
|
||||
bn254_g2_msm_cuda(scalars, g2_points, msm_size, config, &g2_result);
|
||||
std::cout << g2_projective_t::to_affine(g2_result) << std::endl;
|
||||
|
||||
std::cout << "Copying inputs on-device" << std::endl;
|
||||
g2_affine_t* g2_points_d;
|
||||
g2_projective_t* g2_result_d;
|
||||
cudaMalloc(&scalars_d, sizeof(scalar_t) * N);
|
||||
cudaMalloc(&g2_points_d, sizeof(g2_affine_t) * N);
|
||||
cudaMalloc(&g2_result_d, sizeof(g2_projective_t));
|
||||
cudaMemcpy(scalars_d, scalars, sizeof(scalar_t) * N, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(g2_points_d, g2_points, sizeof(g2_affine_t) * N, cudaMemcpyHostToDevice);
|
||||
|
||||
std::cout << "Reconfiguring MSM to use on-device inputs" << std::endl;
|
||||
config.are_results_on_device = true;
|
||||
config.are_scalars_on_device = true;
|
||||
config.are_points_on_device = true;
|
||||
|
||||
std::cout << "Running MSM kernel with on-device inputs" << std::endl;
|
||||
bn254_g2_msm_cuda(scalars_d, g2_points_d, msm_size, config, g2_result_d);
|
||||
cudaMemcpy(&g2_result, g2_result_d, sizeof(g2_projective_t), cudaMemcpyDeviceToHost);
|
||||
std::cout << g2_projective_t::to_affine(g2_result) << std::endl;
|
||||
|
||||
cudaFree(scalars_d);
|
||||
cudaFree(g2_points_d);
|
||||
cudaFree(g2_result_d);
|
||||
delete[] g2_points;
|
||||
delete[] scalars;
|
||||
cudaStreamDestroy(stream);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,2 +1,65 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DG2=ON -DCUDA_BACKEND=local -S "${ICILE_DIR}" -B build/icicle
|
||||
BACKEND_INSTALL_DIR=$(realpath "build/icicle/backend")
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DG2=ON -S "${ICILE_DIR}" -B build/icicle
|
||||
fi
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/example
|
||||
|
||||
cmake --build build/icicle -j
|
||||
cmake --build build/example -j
|
||||
|
||||
./build/example/example "$DEVICE_TYPE" "$BACKEND_INSTALL_DIR"
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
# TODO update for V3
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
# # Exit immediately on error
|
||||
# set -e
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
cmake --build build/icicle
|
||||
# mkdir -p build/example
|
||||
# mkdir -p build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
# # Configure and build Icicle
|
||||
# cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
# cmake --build build/icicle
|
||||
|
||||
# # Configure and build the example application
|
||||
# cmake -S . -B build/example
|
||||
# cmake --build build/example
|
||||
@@ -28,7 +28,7 @@ void threadPoseidon(
|
||||
unsigned size_partition,
|
||||
scalar_t* layers,
|
||||
scalar_t* column_hashes,
|
||||
Poseidon<scalar_t> * poseidon)
|
||||
Poseidon<scalar_t>* poseidon)
|
||||
{
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
@@ -36,7 +36,7 @@ void threadPoseidon(
|
||||
return;
|
||||
}
|
||||
HashConfig column_config = default_hash_config(ctx);
|
||||
cudaError_t err = poseidon->hash_many(layers, column_hashes, (size_t) size_partition, size_col, 1, column_config);
|
||||
cudaError_t err = poseidon->hash_many(layers, column_hashes, (size_t)size_partition, size_col, 1, column_config);
|
||||
checkCudaError(err);
|
||||
}
|
||||
|
||||
@@ -51,10 +51,11 @@ using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::p
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
#define CHECK_ALLOC(ptr) if ((ptr) == nullptr) { \
|
||||
std::cerr << "Memory allocation for '" #ptr "' failed." << std::endl; \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
#define CHECK_ALLOC(ptr) \
|
||||
if ((ptr) == nullptr) { \
|
||||
std::cerr << "Memory allocation for '" #ptr "' failed." << std::endl; \
|
||||
exit(EXIT_FAILURE); \
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
@@ -113,13 +114,13 @@ int main()
|
||||
scalar_t* column_hash1 = static_cast<scalar_t*>(malloc(size_partition * sizeof(scalar_t)));
|
||||
CHECK_ALLOC(column_hash1);
|
||||
|
||||
Poseidon<scalar_t> column_poseidon0(size_col, ctx0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
Poseidon<scalar_t> column_poseidon1(size_col, ctx1);
|
||||
Poseidon<scalar_t> column_poseidon0(size_col, ctx0);
|
||||
cudaError_t err_result = CHK_STICKY(cudaSetDevice(ctx1.device_id));
|
||||
if (err_result != cudaSuccess) {
|
||||
std::cerr << "CUDA error: " << cudaGetErrorString(err_result) << std::endl;
|
||||
return;
|
||||
}
|
||||
Poseidon<scalar_t> column_poseidon1(size_col, ctx1);
|
||||
|
||||
std::cout << "Parallel execution of Poseidon threads" << std::endl;
|
||||
START_TIMER(parallel);
|
||||
|
||||
@@ -1,2 +1,4 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
|
||||
# TODO update for V3
|
||||
# ./build/example/example
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
# Make sure NVIDIA Container Toolkit is installed on your host
|
||||
|
||||
# Use NVIDIA base image
|
||||
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04
|
||||
|
||||
# Update and install dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
nsight-systems-12.2 \
|
||||
cmake \
|
||||
protobuf-compiler \
|
||||
curl \
|
||||
build-essential \
|
||||
git \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Clone Icicle from a GitHub repository
|
||||
RUN git clone https://github.com/ingonyama-zk/icicle.git /icicle
|
||||
|
||||
# Set the working directory in the container
|
||||
WORKDIR /icicle-example
|
||||
|
||||
# Specify the default command for the container
|
||||
CMD ["/bin/bash"]
|
||||
@@ -1,24 +0,0 @@
|
||||
{
|
||||
"name": "Icicle Examples - Multiply",
|
||||
"build": {
|
||||
"dockerfile": "Dockerfile"
|
||||
},
|
||||
"workspaceMount": "source=${localWorkspaceFolder}/.,target=/icicle-example,type=bind",
|
||||
"workspaceFolder": "/icicle-example",
|
||||
"runArgs": [
|
||||
"--gpus",
|
||||
"all"
|
||||
],
|
||||
"postCreateCommand": [
|
||||
"nvidia-smi"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
"extensions": [
|
||||
"ms-vscode.cmake-tools",
|
||||
"ms-azuretools.vscode-docker",
|
||||
"ms-vscode.cpptools-extension-pack"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
|
||||
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
|
||||
target_link_libraries(example ${NVML_LIBRARY})
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
# Icicle example: Multiplication
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` accelerates multiplication operation `*` using [Karatsuba algorithm](https://en.wikipedia.org/wiki/Karatsuba_algorithm)
|
||||
|
||||
## Concise Usage Explanation
|
||||
|
||||
Define a `CURVE_ID` and include curve configuration header:
|
||||
|
||||
```c++
|
||||
#define CURVE_ID 1
|
||||
#include "curves/curve_config.cuh"
|
||||
```
|
||||
|
||||
The values of `CURVE_ID` for different curves are in the above header. Multiplication is accelerated both for field scalars and point fields.
|
||||
|
||||
```c++
|
||||
using namespace curve_config;
|
||||
scalar_t a;
|
||||
point_field_t b;
|
||||
```
|
||||
|
||||
## Running the example
|
||||
|
||||
- `cd` to your example directory
|
||||
- compile with `./compile.sh`
|
||||
- run with `./run.sh`
|
||||
|
||||
## What's in the example
|
||||
|
||||
1. Define the parameters for the example such as vector size
|
||||
2. Generate random vectors on-host
|
||||
3. Copy them on-device
|
||||
4. Execute element-wise vector multiplication on-device
|
||||
5. Copy results on-host
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
@@ -1,162 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <chrono>
|
||||
#include <nvml.h>
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "vec_ops/vec_ops.cuh"
|
||||
|
||||
using namespace vec_ops;
|
||||
using namespace bn254;
|
||||
|
||||
typedef scalar_t T;
|
||||
|
||||
int vector_mult(T* vec_b, T* vec_a, T* vec_result, size_t n_elments, device_context::DeviceContext ctx)
|
||||
{
|
||||
vec_ops::VecOpsConfig config = vec_ops::DefaultVecOpsConfig();
|
||||
config.is_a_on_device = true;
|
||||
config.is_b_on_device = true;
|
||||
config.is_result_on_device = true;
|
||||
cudaError_t err = bn254_mul_cuda(vec_a, vec_b, n_elments, config, vec_result);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to multiply vectors - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
const unsigned vector_size = 1 << 15;
|
||||
const unsigned repetitions = 1 << 15;
|
||||
|
||||
cudaError_t err;
|
||||
nvmlInit();
|
||||
nvmlDevice_t device;
|
||||
nvmlDeviceGetHandleByIndex(0, &device); // for GPU 0
|
||||
std::cout << "Icicle-Examples: vector multiplications" << std::endl;
|
||||
char name[NVML_DEVICE_NAME_BUFFER_SIZE];
|
||||
if (nvmlDeviceGetName(device, name, NVML_DEVICE_NAME_BUFFER_SIZE) == NVML_SUCCESS) {
|
||||
std::cout << "GPU Model: " << name << std::endl;
|
||||
} else {
|
||||
std::cerr << "Failed to get GPU model name." << std::endl;
|
||||
}
|
||||
unsigned power_limit;
|
||||
nvmlDeviceGetPowerManagementLimit(device, &power_limit);
|
||||
|
||||
std::cout << "Vector size: " << vector_size << std::endl;
|
||||
std::cout << "Repetitions: " << repetitions << std::endl;
|
||||
std::cout << "Power limit: " << std::fixed << std::setprecision(3) << 1.0e-3 * power_limit << " W" << std::endl;
|
||||
|
||||
unsigned int baseline_power;
|
||||
nvmlDeviceGetPowerUsage(device, &baseline_power);
|
||||
std::cout << "Baseline power: " << std::fixed << std::setprecision(3) << 1.0e-3 * baseline_power << " W" << std::endl;
|
||||
unsigned baseline_temperature;
|
||||
if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &baseline_temperature) == NVML_SUCCESS) {
|
||||
std::cout << "Baseline GPU Temperature: " << baseline_temperature << " C" << std::endl;
|
||||
} else {
|
||||
std::cerr << "Failed to get GPU temperature." << std::endl;
|
||||
}
|
||||
|
||||
// host data
|
||||
T* host_in1 = (T*)malloc(vector_size * sizeof(T));
|
||||
T* host_in2 = (T*)malloc(vector_size * sizeof(T));
|
||||
std::cout << "Initializing vectors with random data" << std::endl;
|
||||
T::rand_host_many(host_in1, vector_size);
|
||||
T::rand_host_many(host_in2, vector_size);
|
||||
// device data
|
||||
device_context::DeviceContext ctx = device_context::get_default_device_context();
|
||||
T* device_in1;
|
||||
T* device_in2;
|
||||
T* device_out;
|
||||
|
||||
err = cudaMalloc((void**)&device_in1, vector_size * sizeof(T));
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = cudaMalloc((void**)&device_in2, vector_size * sizeof(T));
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = cudaMalloc((void**)&device_out, vector_size * sizeof(T));
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to allocate device memory - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// copy from host to device
|
||||
err = cudaMemcpy(device_in1, host_in1, vector_size * sizeof(T), cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to copy data from host to device - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
err = cudaMemcpy(device_in2, host_in2, vector_size * sizeof(T), cudaMemcpyHostToDevice);
|
||||
if (err != cudaSuccess) {
|
||||
std::cerr << "Failed to copy data from host to device - " << cudaGetErrorString(err) << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::cout << "Starting warm-up" << std::endl;
|
||||
// Warm-up loop
|
||||
for (int i = 0; i < repetitions; i++) {
|
||||
vector_mult(device_in1, device_in2, device_out, vector_size, ctx);
|
||||
}
|
||||
|
||||
std::cout << "Starting benchmarking" << std::endl;
|
||||
unsigned power_before;
|
||||
nvmlDeviceGetPowerUsage(device, &power_before);
|
||||
std::cout << "Power before: " << std::fixed << std::setprecision(3) << 1.0e-3 * power_before << " W" << std::endl;
|
||||
std::cout << "Power utilization: " << std::fixed << std::setprecision(1) << (float)100.0 * power_before / power_limit
|
||||
<< " %" << std::endl;
|
||||
unsigned temperature_before;
|
||||
if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_before) == NVML_SUCCESS) {
|
||||
std::cout << "GPU Temperature before: " << temperature_before << " C" << std::endl;
|
||||
} else {
|
||||
std::cerr << "Failed to get GPU temperature." << std::endl;
|
||||
}
|
||||
auto start_time = std::chrono::high_resolution_clock::now();
|
||||
// Benchmark loop
|
||||
for (int i = 0; i < repetitions; i++) {
|
||||
vector_mult(device_in1, device_in2, device_out, vector_size, ctx);
|
||||
}
|
||||
auto end_time = std::chrono::high_resolution_clock::now();
|
||||
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time);
|
||||
std::cout << "Elapsed time: " << duration.count() << " microseconds" << std::endl;
|
||||
unsigned power_after;
|
||||
nvmlDeviceGetPowerUsage(device, &power_after);
|
||||
std::cout << "Power after: " << std::fixed << std::setprecision(3) << 1.0e-3 * power_after << " W" << std::endl;
|
||||
std::cout << "Power utilization: " << std::fixed << std::setprecision(1) << (float)100.0 * power_after / power_limit
|
||||
<< " %" << std::endl;
|
||||
unsigned temperature_after;
|
||||
if (nvmlDeviceGetTemperature(device, NVML_TEMPERATURE_GPU, &temperature_after) == NVML_SUCCESS) {
|
||||
std::cout << "GPU Temperature after: " << temperature_after << " C" << std::endl;
|
||||
} else {
|
||||
std::cerr << "Failed to get GPU temperature." << std::endl;
|
||||
}
|
||||
|
||||
// Report performance in GMPS: Giga Multiplications Per Second
|
||||
double GMPS = 1.0e-9 * repetitions * vector_size / (1.0e-6 * duration.count());
|
||||
std::cout << "Performance: " << GMPS << " Giga Multiplications Per Second" << std::endl;
|
||||
|
||||
// Optional: validate multiplication
|
||||
T* host_out = (T*)malloc(vector_size * sizeof(T));
|
||||
|
||||
cudaMemcpy(host_out, device_out, vector_size * sizeof(T), cudaMemcpyDeviceToHost);
|
||||
|
||||
// validate multiplication here...
|
||||
|
||||
// clean up and exit
|
||||
free(host_in1);
|
||||
free(host_in2);
|
||||
free(host_out);
|
||||
cudaFree(device_in1);
|
||||
cudaFree(device_in2);
|
||||
cudaFree(device_out);
|
||||
nvmlShutdown();
|
||||
return 0;
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
@@ -1,23 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
project(example)
|
||||
|
||||
add_executable(example example.cpp)
|
||||
target_include_directories(example PRIVATE "../../../icicle_v3/include" "..")
|
||||
target_link_directories(example PRIVATE "${CMAKE_SOURCE_DIR}/build/icicle")
|
||||
message("${CMAKE_BINARY_DIR}/icicle")
|
||||
target_link_libraries(example PRIVATE icicle_curve_bn254 icicle_field_bn254 icicle_device)
|
||||
if(BACKEND_DIR)
|
||||
add_compile_definitions(BACKEND_DIR="${BACKEND_DIR}")
|
||||
endif()
|
||||
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
@@ -1,33 +1,35 @@
|
||||
# Icicle example: Number-Theoretical Transform (NTT)
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`Icicle` provides CUDA C++ template function NTT for [Number Theoretical Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md), also known as Discrete Fourier Transform.
|
||||
|
||||
## Concise Usage Explanation
|
||||
|
||||
1. Include the curve api
|
||||
2. Init NTT domain
|
||||
3. Call ntt api
|
||||
|
||||
```c++
|
||||
// Select the curve
|
||||
#define CURVE_ID 1
|
||||
// Include NTT template
|
||||
#include "appUtils/ntt/ntt.cu"
|
||||
using namespace curve_config;
|
||||
using namespace ntt;
|
||||
// Configure NTT
|
||||
NTTConfig<S> config=DefaultNTTConfig<S>();
|
||||
// Call NTT
|
||||
NTT<S, E>(input, ntt_size, NTTDir::kForward, config, output);
|
||||
#include "icicle/api/bn254.h"
|
||||
...
|
||||
auto ntt_init_domain_cfg = default_ntt_init_domain_config();
|
||||
...
|
||||
bn254_ntt_init_domain(&basic_root, ntt_init_domain_cfg);
|
||||
NTTConfig<scalar_t> config = default_ntt_config<scalar_t>();
|
||||
...
|
||||
bn254_ntt(input.get(), ntt_size, NTTDir::kForward, config, output.get())
|
||||
```
|
||||
|
||||
|
||||
## Running the example
|
||||
|
||||
- `cd` to your example directory
|
||||
- compile with `./compile.sh`
|
||||
- run with `./run.sh`
|
||||
```sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
## What's in the example
|
||||
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
104
examples/c++/ntt/example.cpp
Normal file
104
examples/c++/ntt/example.cpp
Normal file
@@ -0,0 +1,104 @@
|
||||
#include <iostream>
|
||||
|
||||
#include "icicle/runtime.h"
|
||||
|
||||
#include "icicle/api/bn254.h"
|
||||
using namespace bn254;
|
||||
|
||||
#include "examples_utils.h"
|
||||
|
||||
void initialize_input(const unsigned ntt_size, const unsigned nof_ntts, scalar_t* elements);
|
||||
int validate_output(const unsigned ntt_size, const unsigned nof_ntts, scalar_t* elements);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
try_load_and_set_backend_device(argc, argv);
|
||||
|
||||
std::cout << "\nIcicle Examples: Number Theoretical Transform (NTT)" << std::endl;
|
||||
const unsigned log_ntt_size = 20;
|
||||
const unsigned ntt_size = 1 << log_ntt_size;
|
||||
const unsigned batch_size = 2;
|
||||
|
||||
std::cout << "Example parameters:" << std::endl;
|
||||
std::cout << "NTT size: " << ntt_size << std::endl;
|
||||
std::cout << "batch size: " << batch_size << std::endl;
|
||||
|
||||
std::cout << "\nGenerating input data for lowest and highest harmonics" << std::endl;
|
||||
auto input = std::make_unique<scalar_t[]>(batch_size * ntt_size);
|
||||
auto output = std::make_unique<scalar_t[]>(batch_size * ntt_size);
|
||||
initialize_input(ntt_size, batch_size, input.get());
|
||||
|
||||
// Initialize NTT domain
|
||||
std::cout << "\nInit NTT domain" << std::endl;
|
||||
scalar_t basic_root = scalar_t::omega(log_ntt_size /*NTT_LOG_SIZscalar_t*/);
|
||||
auto ntt_init_domain_cfg = default_ntt_init_domain_config();
|
||||
ConfigExtension backend_cfg_ext;
|
||||
backend_cfg_ext.set("fast_twiddles", true); // optionally construct fast_twiddles for CUDA backend
|
||||
ntt_init_domain_cfg.ext = &backend_cfg_ext;
|
||||
ICICLE_CHECK(bn254_ntt_init_domain(&basic_root, ntt_init_domain_cfg));
|
||||
|
||||
// ntt configuration
|
||||
NTTConfig<scalar_t> config = default_ntt_config<scalar_t>();
|
||||
ConfigExtension ntt_cfg_ext;
|
||||
config.ext = &ntt_cfg_ext;
|
||||
config.batch_size = batch_size;
|
||||
|
||||
// warmup
|
||||
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, config, output.get()));
|
||||
|
||||
// NTT radix-2 alg
|
||||
std::cout << "\nRunning NTT radix-2 alg with on-host data" << std::endl;
|
||||
ntt_cfg_ext.set("ntt_algorithm", 1); // radix-2
|
||||
START_TIMER(Radix2);
|
||||
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, config, output.get()));
|
||||
END_TIMER(Radix2, "Radix2 NTT");
|
||||
|
||||
std::cout << "Validating output" << std::endl;
|
||||
validate_output(ntt_size, batch_size, output.get());
|
||||
|
||||
// NTT mixed-radix alg
|
||||
std::cout << "\nRunning NTT mixed-radix alg with on-host data" << std::endl;
|
||||
ntt_cfg_ext.set("ntt_algorithm", 2); // mixed-radix
|
||||
START_TIMER(MixedRadix);
|
||||
ICICLE_CHECK(bn254_ntt(input.get(), ntt_size, NTTDir::kForward, config, output.get()));
|
||||
END_TIMER(MixedRadix, "MixedRadix NTT");
|
||||
|
||||
std::cout << "Validating output" << std::endl;
|
||||
validate_output(ntt_size, batch_size, output.get());
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void initialize_input(const unsigned ntt_size, const unsigned nof_ntts, scalar_t* elements)
|
||||
{
|
||||
// Lowest Harmonics
|
||||
for (unsigned i = 0; i < ntt_size; i = i + 1) {
|
||||
elements[i] = scalar_t::one();
|
||||
}
|
||||
// Highest Harmonics
|
||||
for (unsigned i = 1 * ntt_size; i < 2 * ntt_size; i = i + 2) {
|
||||
elements[i] = scalar_t::one();
|
||||
elements[i + 1] = scalar_t::neg(scalar_t::one());
|
||||
}
|
||||
}
|
||||
|
||||
int validate_output(const unsigned ntt_size, const unsigned nof_ntts, scalar_t* elements)
|
||||
{
|
||||
int nof_errors = 0;
|
||||
scalar_t amplitude = scalar_t::from((uint32_t)ntt_size);
|
||||
// Lowest Harmonics
|
||||
if (elements[0] != amplitude) {
|
||||
++nof_errors;
|
||||
std::cout << "Error in lowest harmonicscalar_t 0! " << std::endl;
|
||||
} else {
|
||||
std::cout << "Validated lowest harmonics" << std::endl;
|
||||
}
|
||||
// Highest Harmonics
|
||||
if (elements[1 * ntt_size + ntt_size / 2] != amplitude) {
|
||||
++nof_errors;
|
||||
std::cout << "Error in highest harmonics! " << std::endl;
|
||||
} else {
|
||||
std::cout << "Validated highest harmonics" << std::endl;
|
||||
}
|
||||
return nof_errors;
|
||||
}
|
||||
@@ -1,113 +0,0 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
|
||||
// include NTT template
|
||||
|
||||
#include "curves/params/bn254.cuh"
|
||||
#include "api/bn254.h"
|
||||
using namespace bn254;
|
||||
using namespace ntt;
|
||||
|
||||
// Operate on scalars
|
||||
typedef scalar_t S;
|
||||
typedef scalar_t E;
|
||||
|
||||
void print_elements(const unsigned n, E* elements)
|
||||
{
|
||||
for (unsigned i = 0; i < n; i++) {
|
||||
std::cout << i << ": " << elements[i] << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
void initialize_input(const unsigned ntt_size, const unsigned nof_ntts, E* elements)
|
||||
{
|
||||
// Lowest Harmonics
|
||||
for (unsigned i = 0; i < ntt_size; i = i + 1) {
|
||||
elements[i] = E::one();
|
||||
}
|
||||
// print_elements(ntt_size, elements );
|
||||
// Highest Harmonics
|
||||
for (unsigned i = 1 * ntt_size; i < 2 * ntt_size; i = i + 2) {
|
||||
elements[i] = E::one();
|
||||
elements[i + 1] = E::neg(scalar_t::one());
|
||||
}
|
||||
// print_elements(ntt_size, &elements[1*ntt_size] );
|
||||
}
|
||||
|
||||
int validate_output(const unsigned ntt_size, const unsigned nof_ntts, E* elements)
|
||||
{
|
||||
int nof_errors = 0;
|
||||
E amplitude = E::from((uint32_t)ntt_size);
|
||||
// std::cout << "Amplitude: " << amplitude << std::endl;
|
||||
// Lowest Harmonics
|
||||
if (elements[0] != amplitude) {
|
||||
++nof_errors;
|
||||
std::cout << "Error in lowest harmonics 0! " << std::endl;
|
||||
// print_elements(ntt_size, elements );
|
||||
} else {
|
||||
std::cout << "Validated lowest harmonics" << std::endl;
|
||||
}
|
||||
// Highest Harmonics
|
||||
if (elements[1 * ntt_size + ntt_size / 2] != amplitude) {
|
||||
++nof_errors;
|
||||
std::cout << "Error in highest harmonics! " << std::endl;
|
||||
// print_elements(ntt_size, &elements[1*ntt_size] );
|
||||
} else {
|
||||
std::cout << "Validated highest harmonics" << std::endl;
|
||||
}
|
||||
return nof_errors;
|
||||
}
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
std::cout << "Icicle Examples: Number Theoretical Transform (NTT)" << std::endl;
|
||||
std::cout << "Example parameters" << std::endl;
|
||||
const unsigned log_ntt_size = 20;
|
||||
std::cout << "Log2(NTT size): " << log_ntt_size << std::endl;
|
||||
const unsigned ntt_size = 1 << log_ntt_size;
|
||||
std::cout << "NTT size: " << ntt_size << std::endl;
|
||||
const unsigned nof_ntts = 2;
|
||||
std::cout << "Number of NTTs: " << nof_ntts << std::endl;
|
||||
const unsigned batch_size = nof_ntts * ntt_size;
|
||||
|
||||
std::cout << "Generating input data for lowest and highest harmonics" << std::endl;
|
||||
E* input;
|
||||
input = (E*)malloc(sizeof(E) * batch_size);
|
||||
initialize_input(ntt_size, nof_ntts, input);
|
||||
E* output;
|
||||
output = (E*)malloc(sizeof(E) * batch_size);
|
||||
|
||||
std::cout << "Running NTT with on-host data" << std::endl;
|
||||
// Create a device context
|
||||
auto ctx = device_context::get_default_device_context();
|
||||
S basic_root = S::omega(log_ntt_size /*NTT_LOG_SIZE*/);
|
||||
bn254_initialize_domain(&basic_root, ctx, true);
|
||||
// Create an NTTConfig instance
|
||||
NTTConfig<S> config = default_ntt_config<S>();
|
||||
config.ntt_algorithm = NttAlgorithm::MixedRadix;
|
||||
config.batch_size = nof_ntts;
|
||||
START_TIMER(MixedRadix);
|
||||
cudaError_t err = bn254_ntt_cuda(input, ntt_size, NTTDir::kForward, config, output);
|
||||
END_TIMER(MixedRadix, "MixedRadix NTT");
|
||||
|
||||
std::cout << "Validating output" << std::endl;
|
||||
validate_output(ntt_size, nof_ntts, output);
|
||||
|
||||
config.ntt_algorithm = NttAlgorithm::Radix2;
|
||||
START_TIMER(Radix2);
|
||||
err = bn254_ntt_cuda(input, ntt_size, NTTDir::kForward, config, output);
|
||||
END_TIMER(Radix2, "Radix2 NTT");
|
||||
|
||||
std::cout << "Validating output" << std::endl;
|
||||
validate_output(ntt_size, nof_ntts, output);
|
||||
|
||||
std::cout << "Cleaning-up memory" << std::endl;
|
||||
free(input);
|
||||
free(output);
|
||||
return 0;
|
||||
}
|
||||
@@ -1,2 +1,65 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DMSM=OFF -DCUDA_BACKEND=local -S "${ICILE_DIR}" -B build/icicle
|
||||
BACKEND_INSTALL_DIR=$(realpath "build/icicle/backend")
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DMSM=OFF -S "${ICILE_DIR}" -B build/icicle
|
||||
fi
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/example
|
||||
|
||||
cmake --build build/icicle -j
|
||||
cmake --build build/example -j
|
||||
|
||||
./build/example/example "$DEVICE_TYPE" "$BACKEND_INSTALL_DIR"
|
||||
|
||||
@@ -1,26 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
project(example)
|
||||
|
||||
add_executable(example example.cpp)
|
||||
target_include_directories(example PRIVATE "../../../icicle_v3/include" "..")
|
||||
target_link_directories(example PRIVATE "${CMAKE_SOURCE_DIR}/build/icicle")
|
||||
message("${CMAKE_BINARY_DIR}/icicle")
|
||||
target_link_libraries(example PRIVATE icicle_curve_bn254 icicle_field_bn254 icicle_device)
|
||||
if(BACKEND_DIR)
|
||||
add_compile_definitions(BACKEND_DIR="${BACKEND_DIR}")
|
||||
endif()
|
||||
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_curve_bn254.a)
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
|
||||
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
|
||||
target_link_libraries(example ${NVML_LIBRARY})
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
# ICICLE example: Pedersen Commitment
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
A Pedersen Commitment is a cryptographic primitive to commit to a value or a vector of values while keeping it hidden, yet enabling the committer to reveal the value later. It provides both hiding (the commitment does not reveal any information about the value) and binding properties (once a value is committed, it cannot be changed without detection).
|
||||
@@ -14,10 +10,12 @@ An example of MSM is [here](../msm/README.md).
|
||||
|
||||
## Running the example
|
||||
|
||||
- `cd` to your example directory
|
||||
- compile with `./compile.sh`
|
||||
- run with `./run.sh`
|
||||
|
||||
```sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
## Concise Explanation
|
||||
|
||||
We recommend this simple [explanation](https://www.rareskills.io/post/pedersen-commitment).
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
@@ -1,16 +1,16 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <chrono>
|
||||
#include <cassert>
|
||||
#include <nvml.h>
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "msm/msm.cuh"
|
||||
#include "icicle/runtime.h"
|
||||
#include "icicle/api/bn254.h"
|
||||
#include "icicle/curves/params/bn254.h"
|
||||
using namespace bn254;
|
||||
|
||||
typedef point_field_t T;
|
||||
#include "examples_utils.h"
|
||||
|
||||
// modular power
|
||||
template <typename T>
|
||||
T modPow(T base, T exp)
|
||||
{
|
||||
T r = T::one();
|
||||
@@ -27,10 +27,15 @@ T modPow(T base, T exp)
|
||||
}
|
||||
|
||||
// Check if y2 is a quadratic residue using Euler's Criterion
|
||||
bool quadratic_residue(T y2) { return modPow(y2, T::div2(T::zero() - T::one())) == T::one(); }
|
||||
template <typename T>
|
||||
bool quadratic_residue(T y2)
|
||||
{
|
||||
return modPow(y2, T::div2(T::zero() - T::one())) == T::one();
|
||||
}
|
||||
|
||||
// modular square root adapted from:
|
||||
// https://github.com/ShahjalalShohag/code-library/blob/main/Number%20Theory/Tonelli%20Shanks%20Algorithm.cpp
|
||||
template <typename T>
|
||||
bool mySQRT(T a, T* result)
|
||||
{
|
||||
if (a == T::zero()) {
|
||||
@@ -72,9 +77,10 @@ bool mySQRT(T a, T* result)
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void point_near_x(T x, affine_t* point)
|
||||
{
|
||||
const T wb = T{weierstrass_b};
|
||||
const T wb = T{G1::weierstrass_b};
|
||||
T y2;
|
||||
while (y2 = x * x * x + wb, quadratic_residue(y2) == false) {
|
||||
x = x + T::one();
|
||||
@@ -87,7 +93,8 @@ void point_near_x(T x, affine_t* point)
|
||||
}
|
||||
|
||||
static int seed = 0;
|
||||
static HOST_INLINE T rand_host_seed()
|
||||
template <typename T>
|
||||
static T rand_host_seed()
|
||||
{
|
||||
std::mt19937_64 generator(seed++);
|
||||
std::uniform_int_distribution<unsigned> distribution;
|
||||
@@ -101,16 +108,13 @@ static HOST_INLINE T rand_host_seed()
|
||||
return value;
|
||||
}
|
||||
|
||||
using FpMilliseconds = std::chrono::duration<float, std::chrono::milliseconds::period>;
|
||||
#define START_TIMER(timer) auto timer##_start = std::chrono::high_resolution_clock::now();
|
||||
#define END_TIMER(timer, msg) \
|
||||
printf("%s: %.0f ms\n", msg, FpMilliseconds(std::chrono::high_resolution_clock::now() - timer##_start).count());
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try_load_and_set_backend_device(argc, argv);
|
||||
|
||||
const unsigned N = pow(2, 10);
|
||||
std::cout << "Commitment vector size: " << N << "+1 for salt (a.k.a blinding factor)" << std::endl;
|
||||
T* xs = new T[N + 1];
|
||||
point_field_t* xs = new point_field_t[N + 1];
|
||||
|
||||
std::cout << "Generating random points transparently using publicly chosen seed" << std::endl;
|
||||
std::cout << "Public seed prevents committer from knowing the discrete logs of points used in the commitment"
|
||||
@@ -119,9 +123,8 @@ int main(int argc, char** argv)
|
||||
std::cout << "Using seed: " << seed << std::endl;
|
||||
std::cout << "Generating random field values" << std::endl;
|
||||
START_TIMER(gen);
|
||||
|
||||
for (unsigned i = 0; i < N; i++) {
|
||||
xs[i] = rand_host_seed();
|
||||
xs[i] = rand_host_seed<point_field_t>();
|
||||
}
|
||||
END_TIMER(gen, "Time to generate field values");
|
||||
std::cout << "xs[0]: " << xs[0] << std::endl;
|
||||
@@ -145,9 +148,9 @@ int main(int argc, char** argv)
|
||||
scalars[N] = scalar_t::rand_host();
|
||||
|
||||
std::cout << "Executing MSM" << std::endl;
|
||||
auto config = msm::default_msm_config();
|
||||
auto config = default_msm_config();
|
||||
START_TIMER(msm);
|
||||
bn254_msm_cuda(scalars, points, N + 1, config, &result);
|
||||
bn254_msm(scalars, points, N + 1, config, &result);
|
||||
END_TIMER(msm, "Time to execute MSM");
|
||||
|
||||
std::cout << "Computed commitment: " << result << std::endl;
|
||||
@@ -1,2 +1,65 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DCUDA_BACKEND=local -S "${ICILE_DIR}" -B build/icicle
|
||||
BACKEND_INSTALL_DIR=$(realpath "build/icicle/backend")
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -S "${ICILE_DIR}" -B build/icicle
|
||||
fi
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/example
|
||||
|
||||
cmake --build build/icicle -j
|
||||
cmake --build build/example -j
|
||||
|
||||
./build/example/example "$DEVICE_TYPE" "$BACKEND_INSTALL_DIR"
|
||||
|
||||
@@ -1,30 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -DCURVE_ID=BN254")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
project(example)
|
||||
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
add_executable(example example.cpp)
|
||||
target_include_directories(example PRIVATE "../../../icicle_v3/include" "..")
|
||||
target_link_directories(example PRIVATE "${CMAKE_SOURCE_DIR}/build/icicle")
|
||||
message("${CMAKE_BINARY_DIR}/icicle")
|
||||
target_link_libraries(example PRIVATE icicle_curve_bn254 icicle_field_bn254 icicle_device)
|
||||
if(BACKEND_DIR)
|
||||
add_compile_definitions(BACKEND_DIR="${BACKEND_DIR}")
|
||||
endif()
|
||||
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
|
||||
# can link to another curve/field by changing the following lib and FIELD_ID
|
||||
target_link_libraries(example
|
||||
${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_curve_bn254.a
|
||||
${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a
|
||||
)
|
||||
target_compile_definitions(example PUBLIC FIELD_ID BN254)
|
||||
@@ -1,9 +1,5 @@
|
||||
# ICICLE examples: computations with polynomials
|
||||
|
||||
## Best-Practices
|
||||
|
||||
We recommend to run our examples in [ZK-containers](../../ZK-containers.md) to save your time and mental energy.
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
Polynomials are crucial for Zero-Knowledge Proofs (ZKPs): they enable efficient representation and verification of computational statements, facilitate privacy-preserving protocols, and support complex mathematical operations essential for constructing and verifying proofs without revealing underlying data. Polynomial API is documented [here](https://dev.ingonyama.com/icicle/polynomials/overview)
|
||||
@@ -13,9 +9,10 @@ Polynomials are crucial for Zero-Knowledge Proofs (ZKPs): they enable efficient
|
||||
To run example, from project root directory:
|
||||
|
||||
```sh
|
||||
cd examples/c++/polynomial-api
|
||||
./compile.sh
|
||||
./run.sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
To change the scalar field, modify `compile.h` to build the corresponding lib and `CMakeLists.txt` to link to that lib and set `FIELD_ID` correspondingly.
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DG2=OFF
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
@@ -1,14 +1,13 @@
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include <chrono>
|
||||
#include "icicle/api/bn254.h"
|
||||
#include "icicle/polynomials/polynomials.h"
|
||||
|
||||
using namespace polynomials;
|
||||
using namespace bn254;
|
||||
#include "examples_utils.h"
|
||||
|
||||
using namespace icicle;
|
||||
using namespace bn254; // typedef scalar_t as bn254-scalar type
|
||||
|
||||
// define the polynomial type
|
||||
typedef Polynomial<scalar_t> Polynomial_t;
|
||||
@@ -22,27 +21,28 @@ const auto four = scalar_t::from(4);
|
||||
const auto five = scalar_t::from(5);
|
||||
const auto minus_one = zero - one;
|
||||
|
||||
static std::unique_ptr<scalar_t[]> generate_pows(scalar_t tau, uint32_t size){
|
||||
auto vec = std::make_unique<scalar_t[]>(size);
|
||||
vec[0] = scalar_t::one();
|
||||
for (size_t i = 1; i < size; ++i) {
|
||||
vec[i] = vec[i-1] * tau;
|
||||
static std::unique_ptr<scalar_t[]> generate_pows(scalar_t tau, uint32_t size)
|
||||
{
|
||||
auto vec = std::make_unique<scalar_t[]>(size);
|
||||
vec[0] = scalar_t::one();
|
||||
for (size_t i = 1; i < size; ++i) {
|
||||
vec[i] = vec[i - 1] * tau;
|
||||
}
|
||||
return std::move(vec);
|
||||
}
|
||||
|
||||
static std::unique_ptr<affine_t[]> generate_SRS(uint32_t size) {
|
||||
static std::unique_ptr<affine_t[]> generate_SRS(uint32_t size)
|
||||
{
|
||||
auto secret_scalar = scalar_t::rand_host();
|
||||
auto gen = projective_t::generator();
|
||||
auto pows_of_tau = generate_pows(secret_scalar,size);
|
||||
auto pows_of_tau = generate_pows(secret_scalar, size);
|
||||
auto SRS = std::make_unique<affine_t[]>(size);
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
SRS[i] = projective_t::to_affine(pows_of_tau[i] * gen);
|
||||
SRS[i] = projective_t::to_affine(pows_of_tau[i] * gen);
|
||||
}
|
||||
return std::move(SRS);
|
||||
}
|
||||
|
||||
|
||||
void example_evaluate()
|
||||
{
|
||||
std::cout << std::endl << "Example: Polynomial evaluation on random value" << std::endl;
|
||||
@@ -310,122 +310,120 @@ void example_device_memory_view()
|
||||
const int log_size = 6;
|
||||
const int size = 1 << log_size;
|
||||
auto f = randomize_polynomial(size);
|
||||
auto [d_coeffs, N, device_id] = f.get_coefficients_view();
|
||||
auto [d_coeffs, N] = f.get_coefficients_view();
|
||||
|
||||
// compute coset evaluations
|
||||
auto coset_evals = std::make_unique<scalar_t[]>(size);
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
auto ntt_config = default_ntt_config<scalar_t>();
|
||||
ntt_config.are_inputs_on_device = true; // using the device data directly as a view
|
||||
ntt_config.coset_gen = ntt::get_root_of_unity<scalar_t>(size * 2);
|
||||
ntt::ntt(d_coeffs.get(), size, ntt::NTTDir::kForward, ntt_config, coset_evals.get());
|
||||
ntt_config.coset_gen = get_root_of_unity<scalar_t>(size * 2);
|
||||
ntt(d_coeffs.get(), size, NTTDir::kForward, ntt_config, coset_evals.get());
|
||||
}
|
||||
|
||||
|
||||
void example_commit_with_device_memory_view()
|
||||
{
|
||||
//declare time vars
|
||||
// declare time vars
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> start, end;
|
||||
std::chrono::milliseconds duration;
|
||||
|
||||
std::cout << std::endl << "Example: a) commit with Polynomial views [(f1+f2)^2 + (f1-f2)^2 ]_1 = [4 (f1^2+ f_2^2)]_1" << std::endl;
|
||||
std::cout<< "Example: b) commit with Polynomial views [(f1+f2)^2 - (f1-f2)^2 ]_1 = [4 f1 *f_2]_1" << std::endl;
|
||||
std::cout << std::endl
|
||||
<< "Example: a) commit with Polynomial views [(f1+f2)^2 + (f1-f2)^2 ]_1 = [4 (f1^2+ f_2^2)]_1" << std::endl;
|
||||
std::cout << "Example: b) commit with Polynomial views [(f1+f2)^2 - (f1-f2)^2 ]_1 = [4 f1 *f_2]_1" << std::endl;
|
||||
int N = 1025;
|
||||
|
||||
//generate group elements string of length N: (1, beta,beta^2....,beta^{N-1}). g
|
||||
// generate group elements string of length N: (1, beta,beta^2....,beta^{N-1}). g
|
||||
std::cout << "Setup: Generating mock SRS" << std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
auto SRS = generate_SRS(2*N);
|
||||
//Allocate memory on device (points)
|
||||
auto SRS = generate_SRS(2 * N);
|
||||
// Allocate memory on device (points)
|
||||
affine_t* points_d;
|
||||
cudaMalloc(&points_d, sizeof(affine_t)* 2 * N);
|
||||
ICICLE_CHECK(icicle_malloc((void**)&points_d, sizeof(affine_t) * 2 * N));
|
||||
// copy SRS to device (could have generated on device, but gives an indicator)
|
||||
cudaMemcpy(points_d, SRS.get(), sizeof(affine_t)* 2 * N, cudaMemcpyHostToDevice);
|
||||
ICICLE_CHECK(icicle_copy(points_d, SRS.get(), sizeof(affine_t) * 2 * N));
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Setup: SRS of length "<< N << " generated and loaded to device. Took: " << duration.count() << " milliseconds" << std::endl;
|
||||
|
||||
//goal:
|
||||
//test commitment equality [(f1+f2)^2 + (f1-f2)^2 ]_1 = [4 (f1^2+ f_2^2)]_1
|
||||
//test commitment equality [(f1+f2)^2 - (f1-f2)^2 ]_1 = [4 f1 *f_2]_1
|
||||
//note: using polyapi to gen scalars: already on device.
|
||||
std::cout << "Setup: Generating polys (on device) f1,f2 of log degree " << log2(N-1) << std::endl;
|
||||
std::cout << "Setup: SRS of length " << N << " generated and loaded to device. Took: " << duration.count()
|
||||
<< " milliseconds" << std::endl;
|
||||
|
||||
// goal:
|
||||
// test commitment equality [(f1+f2)^2 + (f1-f2)^2 ]_1 = [4 (f1^2+ f_2^2)]_1
|
||||
// test commitment equality [(f1+f2)^2 - (f1-f2)^2 ]_1 = [4 f1 *f_2]_1
|
||||
// note: using polyapi to gen scalars: already on device.
|
||||
std::cout << "Setup: Generating polys (on device) f1,f2 of log degree " << log2(N - 1) << std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
auto f1 = randomize_polynomial(N);
|
||||
auto f2 = randomize_polynomial(N);
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Setup: Gen poly done. Took: " << duration.count() << " milliseconds" << std::endl;
|
||||
|
||||
//deg 2N constraints (f1+f2)^2 + (f1-f2)^2 = 2 (f1^2+ f_2^2)
|
||||
std::cout << "Computing constraints..start "<< std::endl;
|
||||
|
||||
// deg 2N constraints (f1+f2)^2 + (f1-f2)^2 = 2 (f1^2+ f_2^2)
|
||||
std::cout << "Computing constraints..start " << std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
auto L1 = (f1+f2)*(f1+f2) + (f1-f2)*(f1-f2);
|
||||
auto R1 = scalar_t::from(2) * (f1*f1 + f2*f2);
|
||||
//deg 2N constraints (f1+f2)^2 - (f1-f2)^2 = 4 f1 *f_2
|
||||
auto L2 = (f1+f2)*(f1+f2) - (f1-f2)*(f1-f2);
|
||||
auto L1 = (f1 + f2) * (f1 + f2) + (f1 - f2) * (f1 - f2);
|
||||
auto R1 = scalar_t::from(2) * (f1 * f1 + f2 * f2);
|
||||
// deg 2N constraints (f1+f2)^2 - (f1-f2)^2 = 4 f1 *f_2
|
||||
auto L2 = (f1 + f2) * (f1 + f2) - (f1 - f2) * (f1 - f2);
|
||||
auto R2 = scalar_t::from(4) * f1 * f2;
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Computing constraints..done. Took: " << duration.count() << " milliseconds"<< std::endl;
|
||||
|
||||
std::cout << "Computing constraints..done. Took: " << duration.count() << " milliseconds" << std::endl;
|
||||
|
||||
// extract coeff using coeff view
|
||||
auto [viewL1, sizeL1, device_idL1] = L1.get_coefficients_view();
|
||||
auto [viewL2, sizeL2, device_idL2] = L2.get_coefficients_view();
|
||||
auto [viewR1, sizeR1, device_idR1] = R1.get_coefficients_view();
|
||||
auto [viewR2, sizeR2, device_idR2] = R2.get_coefficients_view();
|
||||
|
||||
std::cout << "Computing Commitments with poly view"<< std::endl;
|
||||
auto [viewL1, sizeL1] = L1.get_coefficients_view();
|
||||
auto [viewL2, sizeL2] = L2.get_coefficients_view();
|
||||
auto [viewR1, sizeR1] = R1.get_coefficients_view();
|
||||
auto [viewR2, sizeR2] = R2.get_coefficients_view();
|
||||
|
||||
std::cout << "Computing Commitments with poly view" << std::endl;
|
||||
start = std::chrono::high_resolution_clock::now();
|
||||
msm::MSMConfig config = msm::default_msm_config();
|
||||
MSMConfig config = default_msm_config();
|
||||
config.are_points_on_device = true;
|
||||
config.are_scalars_on_device = true;
|
||||
|
||||
//host vars (for result)
|
||||
|
||||
// host vars (for result)
|
||||
projective_t hL1{}, hL2{}, hR1{}, hR2{};
|
||||
|
||||
//straightforward msm bn254 api: no batching
|
||||
bn254_msm_cuda(viewL1.get(),points_d,N,config,&hL1);
|
||||
bn254_msm_cuda(viewL2.get(),points_d,N,config,&hL2);
|
||||
bn254_msm_cuda(viewR1.get(),points_d,N,config,&hR1);
|
||||
bn254_msm_cuda(viewR2.get(),points_d,N,config,&hR2);
|
||||
// straightforward msm bn254 api: no batching
|
||||
msm(viewL1.get(), points_d, N, config, &hL1);
|
||||
msm(viewL2.get(), points_d, N, config, &hL2);
|
||||
msm(viewR1.get(), points_d, N, config, &hR1);
|
||||
msm(viewR2.get(), points_d, N, config, &hR2);
|
||||
|
||||
end = std::chrono::high_resolution_clock::now();
|
||||
duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
|
||||
std::cout << "Commitments done. Took: " << duration.count() << " milliseconds"<< std::endl;
|
||||
|
||||
//sanity checks
|
||||
std::cout << "Commitments done. Took: " << duration.count() << " milliseconds" << std::endl;
|
||||
|
||||
// sanity checks
|
||||
auto affL1 = projective_t::to_affine(hL1);
|
||||
auto affR1 = projective_t::to_affine(hR1);
|
||||
|
||||
auto affL2 = projective_t::to_affine(hL2);
|
||||
auto affR2 = projective_t::to_affine(hR2);
|
||||
|
||||
//test commitment equality [(f1+f2)^2 + (f1-f2)^2]_1 = [4 (f_1^2+f_2^2]_1
|
||||
assert(affL1.x==affR1.x && affL1.y==affR1.y);
|
||||
std::cout << "commitment [(f1+f2)^2 + (f1-f2)^2]_1:" << std::endl;
|
||||
// test commitment equality [(f1+f2)^2 + (f1-f2)^2]_1 = [4 (f_1^2+f_2^2]_1
|
||||
assert(affL1.x == affR1.x && affL1.y == affR1.y);
|
||||
std::cout << "commitment [(f1+f2)^2 + (f1-f2)^2]_1:" << std::endl;
|
||||
std::cout << "[x: " << affL1.x << ", y: " << affL1.y << "]" << std::endl;
|
||||
std::cout << "commitment [[2 (f_1^2+f_2^2]_1:" <<std::endl;
|
||||
std::cout << "commitment [[2 (f_1^2+f_2^2]_1:" << std::endl;
|
||||
std::cout << "[x: " << affR1.x << ", y: " << affR1.y << "]" << std::endl;
|
||||
|
||||
assert(affL2.x==affR2.x && affL2.y==affR2.y);
|
||||
std::cout << "commitment [(f1+f2)^2 - (f1-f2)^2]_1:"<< std::endl;
|
||||
assert(affL2.x == affR2.x && affL2.y == affR2.y);
|
||||
std::cout << "commitment [(f1+f2)^2 - (f1-f2)^2]_1:" << std::endl;
|
||||
std::cout << "[x: " << affL2.x << ", y: " << affL2.y << "]" << std::endl;
|
||||
std::cout << "commitment [4 f_1*f_2]_1:"<<std::endl;
|
||||
std::cout << "commitment [4 f_1*f_2]_1:" << std::endl;
|
||||
std::cout << "[x: " << affR2.x << ", y: " << affR2.y << "]" << std::endl;
|
||||
}
|
||||
|
||||
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
// Initialize NTT. TODO: can we hide this in the library?
|
||||
static const int MAX_NTT_LOG_SIZE = 24;
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
const scalar_t basic_root = scalar_t::omega(MAX_NTT_LOG_SIZE);
|
||||
ntt::init_domain(basic_root, ntt_config.ctx);
|
||||
try_load_and_set_backend_device(argc, argv);
|
||||
|
||||
// Virtual factory design pattern: initializing polynomimals factory for CUDA backend
|
||||
Polynomial_t::initialize(std::make_unique<CUDAPolynomialFactory<>>());
|
||||
static const int MAX_NTT_LOG_SIZE = 24;
|
||||
const scalar_t basic_root = scalar_t::omega(MAX_NTT_LOG_SIZE);
|
||||
ntt_init_domain(basic_root, default_ntt_init_domain_config());
|
||||
|
||||
START_TIMER(polyapi);
|
||||
|
||||
example_evaluate();
|
||||
example_clone(10);
|
||||
@@ -444,5 +442,7 @@ int main(int argc, char** argv)
|
||||
example_device_memory_view();
|
||||
example_commit_with_device_memory_view();
|
||||
|
||||
END_TIMER(polyapi, "polyapi example took");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,2 +1,65 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DCUDA_BACKEND=local -S "${ICILE_DIR}" -B build/icicle
|
||||
BACKEND_INSTALL_DIR=$(realpath "build/icicle/backend")
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -S "${ICILE_DIR}" -B build/icicle
|
||||
fi
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/example
|
||||
|
||||
cmake --build build/icicle -j
|
||||
cmake --build build/example -j
|
||||
|
||||
./build/example/example "$DEVICE_TYPE" "$BACKEND_INSTALL_DIR"
|
||||
|
||||
@@ -1,27 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
# change the path to your Icicle location
|
||||
include_directories("../../../icicle")
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
project(example)
|
||||
|
||||
add_executable(example example.cpp)
|
||||
target_include_directories(example PRIVATE "../../../icicle_v3/include" "..")
|
||||
target_link_directories(example PRIVATE "${CMAKE_SOURCE_DIR}/build/icicle")
|
||||
message("${CMAKE_BINARY_DIR}/icicle")
|
||||
target_link_libraries(example PRIVATE icicle_curve_bn254 icicle_field_bn254 icicle_device)
|
||||
if(BACKEND_DIR)
|
||||
add_compile_definitions(BACKEND_DIR="${BACKEND_DIR}")
|
||||
endif()
|
||||
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_bn254.a)
|
||||
find_library(NVML_LIBRARY nvidia-ml PATHS /usr/local/cuda/targets/x86_64-linux/lib/stubs/ )
|
||||
target_link_libraries(example ${NVML_LIBRARY})
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
30
examples/c++/polynomial_multiplication/README.md
Normal file
30
examples/c++/polynomial_multiplication/README.md
Normal file
@@ -0,0 +1,30 @@
|
||||
# Icicle Example: Polynomial Multiplication with NTT
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
Icicle provides polynomial multiplication using the Number Theoretical Transform (NTT), including forward and inverse transforms.
|
||||
|
||||
## Concise Usage Explanation
|
||||
|
||||
1. Include the necessary headers.
|
||||
2. Initialize the NTT domain.
|
||||
3. Prepare and transform the polynomials from host to device memory.
|
||||
4. Perform pointwise multiplication.
|
||||
5. Apply the inverse NTT.
|
||||
|
||||
## Running the example
|
||||
|
||||
```sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
## What's in the example
|
||||
|
||||
1. Define the size of the example.
|
||||
2. Initialize input polynomials.
|
||||
3. Perform Radix-2 or Mixed-Radix NTT.
|
||||
4. Perform pointwise polynomial multiplication.
|
||||
5. Apply the inverse NTT.
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
102
examples/c++/polynomial_multiplication/example.cpp
Normal file
102
examples/c++/polynomial_multiplication/example.cpp
Normal file
@@ -0,0 +1,102 @@
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "icicle/runtime.h"
|
||||
#include "icicle/api/bn254.h"
|
||||
using namespace bn254;
|
||||
|
||||
#include "examples_utils.h"
|
||||
|
||||
void random_samples(scalar_t* res, uint32_t count)
|
||||
{
|
||||
for (int i = 0; i < count; i++)
|
||||
res[i] = i < 1000 ? scalar_t::rand_host() : res[i - 1000];
|
||||
}
|
||||
|
||||
void incremental_values(scalar_t* res, uint32_t count)
|
||||
{
|
||||
for (int i = 0; i < count; i++) {
|
||||
res[i] = i ? res[i - 1] + scalar_t::one() * scalar_t::omega(4) : scalar_t::zero();
|
||||
}
|
||||
}
|
||||
|
||||
// calcaulting polynomial multiplication A*B via NTT,pointwise-multiplication and INTT
|
||||
// (1) allocate A,B on HOST. Randomize first half, zero second half
|
||||
// (2) allocate A,B,Res on device
|
||||
// (3) calc NTT for A and for B from host to device
|
||||
// (4) multiply d_polyRes = NttAGpu * NttBGpu (pointwise)
|
||||
// (5) INTT d_polyRes inplace
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try_load_and_set_backend_device(argc, argv);
|
||||
|
||||
int NTT_LOG_SIZE = 20;
|
||||
int NTT_SIZE = 1 << NTT_LOG_SIZE;
|
||||
|
||||
// init domain
|
||||
scalar_t basic_root = scalar_t::omega(NTT_LOG_SIZE);
|
||||
bn254_ntt_init_domain(&basic_root, default_ntt_init_domain_config());
|
||||
|
||||
// (1) cpu allocation
|
||||
auto polyA = std::make_unique<scalar_t[]>(NTT_SIZE);
|
||||
auto polyB = std::make_unique<scalar_t[]>(NTT_SIZE);
|
||||
random_samples(polyA.get(), NTT_SIZE >> 1); // second half zeros
|
||||
random_samples(polyB.get(), NTT_SIZE >> 1); // second half zeros
|
||||
|
||||
scalar_t *d_polyA, *d_polyB, *d_polyRes;
|
||||
|
||||
DeviceProperties device_props;
|
||||
ICICLE_CHECK(icicle_get_device_properties(device_props));
|
||||
|
||||
auto benchmark = [&](bool print) {
|
||||
// (2) device input allocation. If device does not share memory with host, copy inputs explicitly and
|
||||
ICICLE_CHECK(icicle_malloc((void**)&d_polyA, sizeof(scalar_t) * NTT_SIZE));
|
||||
ICICLE_CHECK(icicle_malloc((void**)&d_polyB, sizeof(scalar_t) * NTT_SIZE));
|
||||
ICICLE_CHECK(icicle_malloc((void**)&d_polyRes, sizeof(scalar_t) * NTT_SIZE));
|
||||
|
||||
// start recording
|
||||
START_TIMER(poly_multiply)
|
||||
|
||||
// (3) NTT for A,B from host memory to device-memory
|
||||
auto ntt_config = default_ntt_config<scalar_t>();
|
||||
ntt_config.are_inputs_on_device = false;
|
||||
ntt_config.are_outputs_on_device = true;
|
||||
ntt_config.ordering = Ordering::kNM;
|
||||
ICICLE_CHECK(bn254_ntt(polyA.get(), NTT_SIZE, NTTDir::kForward, ntt_config, d_polyA));
|
||||
ICICLE_CHECK(bn254_ntt(polyB.get(), NTT_SIZE, NTTDir::kForward, ntt_config, d_polyB));
|
||||
|
||||
// (4) multiply A,B
|
||||
VecOpsConfig config{
|
||||
nullptr,
|
||||
true, // is_a_on_device
|
||||
true, // is_b_on_device
|
||||
true, // is_result_on_device
|
||||
false, // is_async
|
||||
nullptr // ext
|
||||
};
|
||||
ICICLE_CHECK(bn254_vector_mul(d_polyA, d_polyB, NTT_SIZE, config, d_polyRes));
|
||||
|
||||
// (5) INTT (in place)
|
||||
ntt_config.are_inputs_on_device = true;
|
||||
ntt_config.are_outputs_on_device = true;
|
||||
ntt_config.ordering = Ordering::kMN;
|
||||
ICICLE_CHECK(bn254_ntt(d_polyRes, NTT_SIZE, NTTDir::kInverse, ntt_config, d_polyRes));
|
||||
|
||||
if (print) { END_TIMER(poly_multiply, "polynomial multiplication took"); }
|
||||
|
||||
ICICLE_CHECK(icicle_free(d_polyA));
|
||||
ICICLE_CHECK(icicle_free(d_polyB));
|
||||
ICICLE_CHECK(icicle_free(d_polyRes));
|
||||
|
||||
return eIcicleError::SUCCESS;
|
||||
};
|
||||
|
||||
benchmark(false); // warmup
|
||||
benchmark(true);
|
||||
|
||||
ICICLE_CHECK(bn254_ntt_release_domain());
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,119 +0,0 @@
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#include "api/bn254.h"
|
||||
#include "gpu-utils/error_handler.cuh"
|
||||
|
||||
using namespace bn254;
|
||||
typedef scalar_t test_scalar;
|
||||
typedef scalar_t test_data;
|
||||
|
||||
void random_samples(test_data* res, uint32_t count)
|
||||
{
|
||||
for (int i = 0; i < count; i++)
|
||||
res[i] = i < 1000 ? test_data::rand_host() : res[i - 1000];
|
||||
}
|
||||
|
||||
void incremental_values(test_scalar* res, uint32_t count)
|
||||
{
|
||||
for (int i = 0; i < count; i++) {
|
||||
res[i] = i ? res[i - 1] + test_scalar::one() * test_scalar::omega(4) : test_scalar::zero();
|
||||
}
|
||||
}
|
||||
|
||||
// calcaulting polynomial multiplication A*B via NTT,pointwise-multiplication and INTT
|
||||
// (1) allocate A,B on CPU. Randomize first half, zero second half
|
||||
// (2) allocate NttAGpu, NttBGpu on GPU
|
||||
// (3) calc NTT for A and for B from cpu to GPU
|
||||
// (4) multiply MulGpu = NttAGpu * NttBGpu (pointwise)
|
||||
// (5) INTT MulGpu inplace
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cudaEvent_t start, stop;
|
||||
float measured_time;
|
||||
|
||||
int NTT_LOG_SIZE = 23;
|
||||
int NTT_SIZE = 1 << NTT_LOG_SIZE;
|
||||
|
||||
CHK_IF_RETURN(cudaFree(nullptr)); // init GPU context
|
||||
|
||||
// init domain
|
||||
auto ntt_config = ntt::default_ntt_config<test_scalar>();
|
||||
const bool is_radix2_alg = (argc > 1) ? atoi(argv[1]) : false;
|
||||
ntt_config.ntt_algorithm = is_radix2_alg ? ntt::NttAlgorithm::Radix2 : ntt::NttAlgorithm::MixedRadix;
|
||||
|
||||
const char* ntt_alg_str = is_radix2_alg ? "Radix-2" : "Mixed-Radix";
|
||||
std::cout << "Polynomial multiplication with " << ntt_alg_str << " NTT: ";
|
||||
|
||||
CHK_IF_RETURN(cudaEventCreate(&start));
|
||||
CHK_IF_RETURN(cudaEventCreate(&stop));
|
||||
|
||||
test_scalar basic_root = test_scalar::omega(NTT_LOG_SIZE);
|
||||
bn254_initialize_domain(&basic_root, ntt_config.ctx, true /*=fast_twidddles_mode*/);
|
||||
|
||||
// (1) cpu allocation
|
||||
auto CpuA = std::make_unique<test_data[]>(NTT_SIZE);
|
||||
auto CpuB = std::make_unique<test_data[]>(NTT_SIZE);
|
||||
random_samples(CpuA.get(), NTT_SIZE >> 1); // second half zeros
|
||||
random_samples(CpuB.get(), NTT_SIZE >> 1); // second half zeros
|
||||
|
||||
test_data *GpuA, *GpuB, *MulGpu;
|
||||
|
||||
auto benchmark = [&](bool print, int iterations = 1) {
|
||||
// start recording
|
||||
CHK_IF_RETURN(cudaEventRecord(start, ntt_config.ctx.stream));
|
||||
|
||||
for (int iter = 0; iter < iterations; ++iter) {
|
||||
// (2) gpu input allocation
|
||||
CHK_IF_RETURN(cudaMallocAsync(&GpuA, sizeof(test_data) * NTT_SIZE, ntt_config.ctx.stream));
|
||||
CHK_IF_RETURN(cudaMallocAsync(&GpuB, sizeof(test_data) * NTT_SIZE, ntt_config.ctx.stream));
|
||||
|
||||
// (3) NTT for A,B from cpu to gpu
|
||||
ntt_config.are_inputs_on_device = false;
|
||||
ntt_config.are_outputs_on_device = true;
|
||||
ntt_config.ordering = ntt::Ordering::kNM;
|
||||
CHK_IF_RETURN(bn254_ntt_cuda(CpuA.get(), NTT_SIZE, ntt::NTTDir::kForward, ntt_config, GpuA));
|
||||
CHK_IF_RETURN(bn254_ntt_cuda(CpuB.get(), NTT_SIZE, ntt::NTTDir::kForward, ntt_config, GpuB));
|
||||
|
||||
// (4) multiply A,B
|
||||
CHK_IF_RETURN(cudaMallocAsync(&MulGpu, sizeof(test_data) * NTT_SIZE, ntt_config.ctx.stream));
|
||||
vec_ops::VecOpsConfig config{
|
||||
ntt_config.ctx,
|
||||
true, // is_a_on_device
|
||||
true, // is_b_on_device
|
||||
true, // is_result_on_device
|
||||
false // is_async
|
||||
};
|
||||
CHK_IF_RETURN(bn254_mul_cuda(GpuA, GpuB, NTT_SIZE, config, MulGpu));
|
||||
|
||||
// (5) INTT (in place)
|
||||
ntt_config.are_inputs_on_device = true;
|
||||
ntt_config.are_outputs_on_device = true;
|
||||
ntt_config.ordering = ntt::Ordering::kMN;
|
||||
CHK_IF_RETURN(bn254_ntt_cuda(MulGpu, NTT_SIZE, ntt::NTTDir::kInverse, ntt_config, MulGpu));
|
||||
|
||||
CHK_IF_RETURN(cudaFreeAsync(GpuA, ntt_config.ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(GpuB, ntt_config.ctx.stream));
|
||||
CHK_IF_RETURN(cudaFreeAsync(MulGpu, ntt_config.ctx.stream));
|
||||
}
|
||||
|
||||
CHK_IF_RETURN(cudaEventRecord(stop, ntt_config.ctx.stream));
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(ntt_config.ctx.stream));
|
||||
CHK_IF_RETURN(cudaEventElapsedTime(&measured_time, start, stop));
|
||||
|
||||
if (print) { std::cout << measured_time / iterations << " MS" << std::endl; }
|
||||
|
||||
return CHK_LAST();
|
||||
};
|
||||
|
||||
benchmark(false); // warmup
|
||||
benchmark(true, 20);
|
||||
|
||||
bn254_release_domain(ntt_config.ctx);
|
||||
CHK_IF_RETURN(cudaStreamSynchronize(ntt_config.ctx.stream));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,3 +1,65 @@
|
||||
#!/bin/bash
|
||||
./build/example/example 1 # radix2
|
||||
./build/example/example 0 # mixed-radix
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DMSM=OFF -DCUDA_BACKEND=local -S "${ICILE_DIR}" -B build/icicle
|
||||
BACKEND_INSTALL_DIR=$(realpath "build/icicle/backend")
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254 -DMSM=OFF -S "${ICILE_DIR}" -B build/icicle
|
||||
fi
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/example
|
||||
|
||||
cmake --build build/icicle -j
|
||||
cmake --build build/example -j
|
||||
|
||||
./build/example/example "$DEVICE_TYPE" "$BACKEND_INSTALL_DIR"
|
||||
|
||||
@@ -1,15 +1,17 @@
|
||||
#!/bin/bash
|
||||
# #!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
# TODO update for V3
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
# # Exit immediately on error
|
||||
# set -e
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
cmake --build build/icicle
|
||||
# mkdir -p build/example
|
||||
# mkdir -p build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
# # Configure and build Icicle
|
||||
# cmake -S ../../../icicle/ -B build/icicle -DMSM=OFF -DCMAKE_BUILD_TYPE=Release -DCURVE=bn254
|
||||
# cmake --build build/icicle
|
||||
|
||||
# # Configure and build the example application
|
||||
# cmake -S . -B build/example
|
||||
# cmake --build build/example
|
||||
@@ -1,2 +1,4 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
# #!/bin/bash
|
||||
|
||||
# TODO update for V3
|
||||
# ./build/example/example
|
||||
@@ -1,28 +1,16 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD 17)
|
||||
set(CMAKE_CUDA_STANDARD_REQUIRED TRUE)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED TRUE)
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.24.0")
|
||||
set(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH})
|
||||
else()
|
||||
set(CMAKE_CUDA_ARCHITECTURES native) # on 3.24+, on earlier it is ignored, and the target is not passed
|
||||
endif ()
|
||||
project(example LANGUAGES CUDA CXX)
|
||||
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr -DFIELD_ID=1001")
|
||||
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "${CMAKE_CUDA_FLAGS_DEBUG} -g -G -O0")
|
||||
project(example)
|
||||
|
||||
add_executable(
|
||||
example
|
||||
example.cu
|
||||
)
|
||||
add_executable(example example.cpp)
|
||||
target_include_directories(example PRIVATE "../../../icicle_v3/include" "..")
|
||||
target_link_directories(example PRIVATE "${CMAKE_SOURCE_DIR}/build/icicle")
|
||||
message("${CMAKE_BINARY_DIR}/icicle")
|
||||
target_link_libraries(example PRIVATE icicle_field_babybear icicle_device)
|
||||
if(BACKEND_DIR)
|
||||
add_compile_definitions(BACKEND_DIR="${BACKEND_DIR}")
|
||||
endif()
|
||||
|
||||
set_target_properties(example PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
target_include_directories(example PRIVATE "../../../icicle/include")
|
||||
|
||||
# can link to another curve/field by changing the following lib and FIELD_ID
|
||||
target_link_libraries(example ${CMAKE_SOURCE_DIR}/build/icicle/lib/libingo_field_babybear.a)
|
||||
# target_compile_definitions(example PUBLIC FIELD_ID babybear)
|
||||
@@ -21,9 +21,10 @@ The key enabler for *recursion* is the *redundancy* of polynomial commitments, h
|
||||
To run example, from project root directory:
|
||||
|
||||
```sh
|
||||
cd examples/c++/risc0
|
||||
./compile.sh
|
||||
./run.sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
## What's in the example
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately on error
|
||||
set -e
|
||||
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
# Configure and build Icicle
|
||||
cmake -S ../../../icicle/ -B build/icicle -DCMAKE_BUILD_TYPE=Release -DFIELD=babybear
|
||||
cmake --build build/icicle
|
||||
|
||||
# Configure and build the example application
|
||||
cmake -S . -B build/example
|
||||
cmake --build build/example
|
||||
@@ -4,11 +4,11 @@
|
||||
#include <vector>
|
||||
#include <list>
|
||||
|
||||
#include "polynomials/polynomials.h"
|
||||
#include "polynomials/cuda_backend/polynomial_cuda_backend.cuh"
|
||||
#include "ntt/ntt.cuh"
|
||||
#include "examples_utils.h"
|
||||
#include "icicle/polynomials/polynomials.h"
|
||||
#include "icicle/api/babybear.h"
|
||||
|
||||
using namespace polynomials;
|
||||
using namespace babybear;
|
||||
|
||||
// define the polynomial type
|
||||
typedef Polynomial<scalar_t> Polynomial_t;
|
||||
@@ -17,44 +17,49 @@ typedef Polynomial<scalar_t> Polynomial_t;
|
||||
typedef int64_t rv_t;
|
||||
|
||||
// Convert RISC-V registers to Finite Fields
|
||||
void to_ff(rv_t* rv, scalar_t* s, size_t n) {
|
||||
void to_ff(rv_t* rv, scalar_t* s, size_t n)
|
||||
{
|
||||
for (int i = 0; i < n; ++i) {
|
||||
s[i] = scalar_t::from(rv[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void p_print(Polynomial_t * p, int logn, scalar_t shift, std::string header = "Print Vector") {
|
||||
void p_print(Polynomial_t* p, int logn, scalar_t shift, std::string header = "Print Vector")
|
||||
{
|
||||
std::cout << header << std::endl;
|
||||
auto n = 1 << logn;
|
||||
auto omega = scalar_t::omega(logn);
|
||||
auto x = shift;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
std::cout << i << ": " << (*p)(x) << std::endl;
|
||||
x = x*omega;
|
||||
x = x * omega;
|
||||
}
|
||||
}
|
||||
|
||||
// value to polynomial
|
||||
Polynomial_t p_value(scalar_t value) {
|
||||
auto p_value = Polynomial_t::from_coefficients(&value , 1);
|
||||
Polynomial_t p_value(scalar_t value)
|
||||
{
|
||||
auto p_value = Polynomial_t::from_coefficients(&value, 1);
|
||||
return p_value;
|
||||
}
|
||||
|
||||
Polynomial_t p_rotate(Polynomial_t* p, int logn) {
|
||||
Polynomial_t p_rotate(Polynomial_t* p, int logn)
|
||||
{
|
||||
// rotate polynomial coefficients right by one position
|
||||
auto n = 1 << logn;
|
||||
auto evaluations_rou_domain = std::make_unique<scalar_t[]>(n);
|
||||
p->evaluate_on_rou_domain(logn, evaluations_rou_domain.get() );
|
||||
scalar_t tmp = evaluations_rou_domain[n-1];
|
||||
for (int i = n-1; i > 0; --i) {
|
||||
evaluations_rou_domain[i] = evaluations_rou_domain[i-1];
|
||||
p->evaluate_on_rou_domain(logn, evaluations_rou_domain.get());
|
||||
scalar_t tmp = evaluations_rou_domain[n - 1];
|
||||
for (int i = n - 1; i > 0; --i) {
|
||||
evaluations_rou_domain[i] = evaluations_rou_domain[i - 1];
|
||||
}
|
||||
evaluations_rou_domain[0] = tmp;
|
||||
return Polynomial_t::from_rou_evaluations(evaluations_rou_domain.get(), n);
|
||||
return Polynomial_t::from_rou_evaluations(evaluations_rou_domain.get(), n);
|
||||
}
|
||||
|
||||
// mix polynomials (c.f. mix polynomial evaluations)
|
||||
Polynomial_t p_mix(Polynomial_t* in[], size_t nmix, scalar_t mix_parameter) {
|
||||
Polynomial_t p_mix(Polynomial_t* in[], size_t nmix, scalar_t mix_parameter)
|
||||
{
|
||||
scalar_t factor = mix_parameter;
|
||||
Polynomial_t out = in[0]->clone();
|
||||
for (int i = 1; i < nmix; ++i) {
|
||||
@@ -64,47 +69,50 @@ Polynomial_t p_mix(Polynomial_t* in[], size_t nmix, scalar_t mix_parameter) {
|
||||
return out;
|
||||
}
|
||||
|
||||
void solve_linear(scalar_t xa, scalar_t ya, scalar_t xb, scalar_t yb, scalar_t * coeffs) {
|
||||
void solve_linear(scalar_t xa, scalar_t ya, scalar_t xb, scalar_t yb, scalar_t* coeffs)
|
||||
{
|
||||
coeffs[1] = (ya - yb) * scalar_t::inverse(xa - xb);
|
||||
coeffs[0] = ya - coeffs[1] * xa;
|
||||
}
|
||||
|
||||
std::unique_ptr<scalar_t[]> InterpolateOnLargerDomain(Polynomial_t * p, int n, scalar_t shift = scalar_t::one()) {
|
||||
std::unique_ptr<scalar_t[]> InterpolateOnLargerDomain(Polynomial_t* p, int n, scalar_t shift = scalar_t::one())
|
||||
{
|
||||
const int deg = p->degree();
|
||||
auto input = std::make_unique<scalar_t[]>(n);
|
||||
// TBD: check if scalar_t constructor initializes to zero
|
||||
for (int i = 0; i < n; ++i) {
|
||||
input[i] = scalar_t::zero();
|
||||
}
|
||||
p->copy_coeffs(input.get(), 0/*start*/, deg);
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
p->copy_coeffs(input.get(), 0 /*start*/, deg);
|
||||
auto ntt_config = default_ntt_config<scalar_t>();
|
||||
ntt_config.coset_gen = shift;
|
||||
auto evals_h = std::make_unique<scalar_t[]>(n);
|
||||
auto err = ntt::ntt(input.get(), n, ntt::NTTDir::kForward, ntt_config, evals_h.get());
|
||||
ICICLE_CHECK(ntt(input.get(), n, NTTDir::kForward, ntt_config, evals_h.get()));
|
||||
return evals_h;
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
try_load_and_set_backend_device(argc, argv);
|
||||
|
||||
START_TIMER(risc0_example);
|
||||
|
||||
std::cout << "This is an ICICLE C++ implementation of the STARK by Hand Explainer." << std::endl;
|
||||
std::cout << "https://dev.risczero.com/proof-system/stark-by-hand" << std::endl;
|
||||
|
||||
const int logn=3;
|
||||
const int logn = 3;
|
||||
const int n = 1 << logn;
|
||||
|
||||
|
||||
std::cout << "Initializing NTT" << std::endl;
|
||||
static const int MAX_NTT_LOG_SIZE = 24;
|
||||
auto ntt_config = ntt::default_ntt_config<scalar_t>();
|
||||
auto ntt_config = default_ntt_config<scalar_t>();
|
||||
const scalar_t basic_root = scalar_t::omega(MAX_NTT_LOG_SIZE);
|
||||
ntt::init_domain(basic_root, ntt_config.ctx);
|
||||
std::cout << "Initializing Polynomials" << std::endl;
|
||||
// Virtual factory design pattern: initializing polynomimals factory for CUDA backend
|
||||
Polynomial_t::initialize(std::make_unique<CUDAPolynomialFactory<>>());
|
||||
ntt_init_domain(basic_root, default_ntt_init_domain_config());
|
||||
|
||||
std::cout << std::endl << "Lesson 1: The Execution Trace" << std::endl;
|
||||
std::cout << std::endl << "Lesson 1: The Execution Trace" << std::endl;
|
||||
// Trace: Data Columns
|
||||
rv_t rv_d1_trace[] = {24, 30, 54, 84, 78, 15, 29, 50};
|
||||
rv_t rv_d2_trace[] = {30, 54, 84, 138, 2, 77, 21, 36};
|
||||
rv_t rv_d1_trace[] = {24, 30, 54, 84, 78, 15, 29, 50};
|
||||
rv_t rv_d2_trace[] = {30, 54, 84, 138, 2, 77, 21, 36};
|
||||
rv_t rv_d3_trace[] = {54, 84, 138, 222, 71, 17, 92, 33};
|
||||
auto d1_trace = std::make_unique<scalar_t[]>(n);
|
||||
auto d2_trace = std::make_unique<scalar_t[]>(n);
|
||||
@@ -151,57 +159,64 @@ int main(int argc, char** argv)
|
||||
auto p_c3 = Polynomial_t::from_rou_evaluations(c3_trace.get(), n);
|
||||
|
||||
std::cout << "Lesson 5: ZK Commitments of the Trace Data" << std::endl;
|
||||
std::cout << "To maintain a zk protocol, the trace polynomials are evaluated over a zk commitment domain" << std::endl;
|
||||
std::cout << "To maintain a zk protocol, the trace polynomials are evaluated over a zk commitment domain"
|
||||
<< std::endl;
|
||||
std::cout << "zk commitment domain is a coset of Reed Solomon domain shifted by a basic root of unity" << std::endl;
|
||||
scalar_t xzk = basic_root;
|
||||
p_print(&p_d1, logn, xzk, "ZK commitment for d1 polynomial");
|
||||
p_print(&p_d1, logn, xzk, "ZK commitment for d1 polynomial");
|
||||
std::cout << "Build Merkle Tree for ZK commitments (outside the scope of this example)" << std::endl;
|
||||
|
||||
std::cout << "Lesson 6: Constraint Polynomials" << std::endl;
|
||||
std::cout << "The constraints are used to check the correctness of the trace. In this example, we check 6 rules to establish the validity of the trace." << std::endl;
|
||||
auto p_fib_constraint = (p_d3 - p_d2 - p_d1) * (p_c1 + p_c2 + p_c3);
|
||||
auto fib_constraint_zkcommitment = InterpolateOnLargerDomain(&p_fib_constraint, 4*n, xzk);
|
||||
|
||||
std::cout << "The constraints are used to check the correctness of the trace. In this example, we check 6 rules to "
|
||||
"establish the validity of the trace."
|
||||
<< std::endl;
|
||||
auto p_fib_constraint = (p_d3 - p_d2 - p_d1) * (p_c1 + p_c2 + p_c3);
|
||||
auto fib_constraint_zkcommitment = InterpolateOnLargerDomain(&p_fib_constraint, 4 * n, xzk);
|
||||
|
||||
auto p_init1_constraint = (p_d1 - p_value(scalar_t::from(24))) * p_c1;
|
||||
// sanity checks printing
|
||||
p_print(&p_init1_constraint, logn+2, scalar_t::one(), "Reed-Solomon constraint polynomial gives 0s in every 4th row");
|
||||
p_print(&p_init1_constraint, logn+2, xzk, "ZK Commitment constraint polynomial gives no 0s");
|
||||
p_print(
|
||||
&p_init1_constraint, logn + 2, scalar_t::one(), "Reed-Solomon constraint polynomial gives 0s in every 4th row");
|
||||
p_print(&p_init1_constraint, logn + 2, xzk, "ZK Commitment constraint polynomial gives no 0s");
|
||||
auto p_init2_constraint = (p_d2 - p_value(scalar_t::from(30))) * p_c1;
|
||||
auto p_termination_constraint = (p_d3 - p_value(scalar_t::from(222))) * p_c3;
|
||||
auto p_recursion_constraint1 = (p_d1 - p_rotate(&p_d2, logn)) * p_c2;
|
||||
auto p_recursion_constraint2 = (p_d2 - p_rotate(&p_d3, logn)) * p_c2;
|
||||
|
||||
std::cout << std::endl << "Lesson 7: Mixing Constraint Polynomials" << std::endl;
|
||||
Polynomial_t * p_all_constraints[] = {&p_fib_constraint, &p_init1_constraint, &p_init2_constraint, &p_termination_constraint, &p_recursion_constraint1, &p_recursion_constraint2};
|
||||
std::cout << std::endl << "Lesson 7: Mixing Constraint Polynomials" << std::endl;
|
||||
Polynomial_t* p_all_constraints[] = {&p_fib_constraint, &p_init1_constraint, &p_init2_constraint,
|
||||
&p_termination_constraint, &p_recursion_constraint1, &p_recursion_constraint2};
|
||||
const size_t nmix = sizeof(p_all_constraints) / sizeof(p_all_constraints[0]);
|
||||
auto p_mixed_constraints = p_mix(p_all_constraints, nmix, scalar_t::from(5));
|
||||
std::cout << "All constraint polynomials are low-degree:" << std::endl;
|
||||
for( int i = 0; i < nmix; ++i) {
|
||||
for (int i = 0; i < nmix; ++i) {
|
||||
std::cout << i << ": " << p_all_constraints[i]->degree() << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Lesson 8: The Core of the RISC Zero STARK" << std::endl;
|
||||
std::cout << "Degree of the mixed constraints polynomial: " << p_mixed_constraints.degree() << std::endl;
|
||||
std::cout << "Degree of the mixed constraints polynomial: " << p_mixed_constraints.degree() << std::endl;
|
||||
auto p_validity = p_mixed_constraints.divide_by_vanishing_polynomial(n);
|
||||
std::cout << "Degree of the validity polynomial: " << p_validity.degree() << std::endl;
|
||||
std::cout << "The Verifier should provide the Merke commitment for the above" << std::endl;
|
||||
|
||||
std::cout << "Lesson 9: The DEEP Technique" << std::endl;
|
||||
std::cout << "The DEEP technique improves the security of a single query by sampling outside of the commitment domain." << std::endl;
|
||||
// In the original STARK protocol, the Verifier tests validity polynomial at a number of test points;
|
||||
// the soundness of the protocol depends on the number of tests.
|
||||
// The DEEP-ALI technique allows us to achieve a high degree of soundness with a single test.
|
||||
std::cout
|
||||
<< "The DEEP technique improves the security of a single query by sampling outside of the commitment domain."
|
||||
<< std::endl;
|
||||
// In the original STARK protocol, the Verifier tests validity polynomial at a number of test points;
|
||||
// the soundness of the protocol depends on the number of tests.
|
||||
// The DEEP-ALI technique allows us to achieve a high degree of soundness with a single test.
|
||||
// The details of DEEP are described in the following lesson.
|
||||
|
||||
auto DEEP_point = scalar_t::from(93);
|
||||
std::cout << "The prover convinces the verifier that V=C/Z at the DEEP_test_point, " << DEEP_point << std::endl;
|
||||
const scalar_t coeffs1[2] = {scalar_t::zero()-DEEP_point, scalar_t::one()};
|
||||
const scalar_t coeffs1[2] = {scalar_t::zero() - DEEP_point, scalar_t::one()};
|
||||
auto denom_DEEP1 = Polynomial_t::from_coefficients(coeffs1, 2);
|
||||
auto [p_d1_DEEP, r] = (p_d1 - p_value(DEEP_point)).divide(denom_DEEP1);
|
||||
std::cout << "The DEEP d1 degree is: " << p_d1_DEEP.degree() << std::endl;
|
||||
// d2, d3 use recursion constraints and need the point corresponding to the previous state (clock cycle)
|
||||
auto omega = scalar_t::omega(logn);
|
||||
auto DEEP_prev_point = DEEP_point*scalar_t::inverse(omega);
|
||||
auto DEEP_prev_point = DEEP_point * scalar_t::inverse(omega);
|
||||
auto coeffs2 = std::make_unique<scalar_t[]>(2);
|
||||
coeffs2[0] = scalar_t::zero() - DEEP_prev_point;
|
||||
coeffs2[1] = scalar_t::one();
|
||||
@@ -210,13 +225,13 @@ int main(int argc, char** argv)
|
||||
auto coeffs_d2bar = std::make_unique<scalar_t[]>(2);
|
||||
solve_linear(DEEP_point, p_d2(DEEP_point), DEEP_prev_point, p_d2(DEEP_prev_point), coeffs_d2bar.get());
|
||||
auto d2bar = Polynomial_t::from_coefficients(coeffs_d2bar.get(), 2);
|
||||
auto [p_d2_DEEP, r2] = (p_d2 - d2bar).divide(denom_DEEP1*denom_DEEP2);
|
||||
auto [p_d2_DEEP, r2] = (p_d2 - d2bar).divide(denom_DEEP1 * denom_DEEP2);
|
||||
std::cout << "The DEEP d2 degree is: " << p_d2_DEEP.degree() << std::endl;
|
||||
|
||||
auto coeffs_d3bar = std::make_unique<scalar_t[]>(2);
|
||||
solve_linear(DEEP_point, p_d3(DEEP_point), DEEP_prev_point, p_d3(DEEP_prev_point), coeffs_d3bar.get());
|
||||
auto d3bar = Polynomial_t::from_coefficients(coeffs_d3bar.get(), 2);
|
||||
auto [p_d3_DEEP, r3] = (p_d3 - d3bar).divide(denom_DEEP1*denom_DEEP2);
|
||||
auto [p_d3_DEEP, r3] = (p_d3 - d3bar).divide(denom_DEEP1 * denom_DEEP2);
|
||||
std::cout << "The DEEP d3 degree is: " << p_d3_DEEP.degree() << std::endl;
|
||||
|
||||
// DEEP c{1,2,3} polynomials
|
||||
@@ -246,17 +261,18 @@ int main(int argc, char** argv)
|
||||
std::cout << "The degree of the mixed DEEP polynomial is: " << fri_input.degree() << std::endl;
|
||||
|
||||
std::cout << "Lesson 11: FRI Protocol (Commit Phase)" << std::endl;
|
||||
std::cout << "The prover provides information to convince the verifier that the DEEP polynomials are low-degree." << std::endl;
|
||||
std::cout << "The prover provides information to convince the verifier that the DEEP polynomials are low-degree."
|
||||
<< std::endl;
|
||||
int nof_rounds = 3;
|
||||
Polynomial_t feven[nof_rounds], fodd[nof_rounds], fri[nof_rounds+1];
|
||||
Polynomial_t feven[nof_rounds], fodd[nof_rounds], fri[nof_rounds + 1];
|
||||
scalar_t rfri[nof_rounds];
|
||||
fri[0] = fri_input.clone();
|
||||
for (int i = 0; i < nof_rounds; ++i) {
|
||||
feven[i] = fri[i].even();
|
||||
fodd[i] = fri[i].odd();
|
||||
rfri[i] = scalar_t::rand_host();
|
||||
fri[i+1] = feven[i] + rfri[i]*fodd[i];
|
||||
std::cout << "The degree of the Round " << i << " polynomial is: " << fri[i+1].degree() << std::endl;
|
||||
rfri[i] = scalar_t::rand_host();
|
||||
fri[i + 1] = feven[i] + rfri[i] * fodd[i];
|
||||
std::cout << "The degree of the Round " << i << " polynomial is: " << fri[i + 1].degree() << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "Lesson 12: FRI Protocol (Query Phase)" << std::endl;
|
||||
@@ -266,10 +282,12 @@ int main(int argc, char** argv)
|
||||
auto xm = scalar_t::zero() - xp;
|
||||
scalar_t lhs[nof_rounds], rhs[nof_rounds];
|
||||
for (int i = 0; i < nof_rounds; ++i) {
|
||||
rhs[i] = (rfri[i]+xp)*fri[i](xp)*scalar_t::inverse(scalar_t::from(2)*xp) + (rfri[i]+xm)*fri[i](xm)*scalar_t::inverse(scalar_t::from(2)*xm);
|
||||
lhs[i] = fri[i+1](xp*xp);
|
||||
rhs[i] = (rfri[i] + xp) * fri[i](xp) * scalar_t::inverse(scalar_t::from(2) * xp) +
|
||||
(rfri[i] + xm) * fri[i](xm) * scalar_t::inverse(scalar_t::from(2) * xm);
|
||||
lhs[i] = fri[i + 1](xp * xp);
|
||||
std::cout << "Round " << i << std::endl << "rhs: " << rhs[i] << std::endl << "lhs: " << lhs[i] << std::endl;
|
||||
}
|
||||
|
||||
END_TIMER(risc0_example, "risc0 example");
|
||||
return 0;
|
||||
}
|
||||
@@ -1,2 +1,65 @@
|
||||
#!/bin/bash
|
||||
./build/example/example
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_BACKEND_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_BACKEND_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DFIELD=babybear -DCUDA_BACKEND=local -S "${ICILE_DIR}" -B build/icicle
|
||||
BACKEND_INSTALL_DIR=$(realpath "build/icicle/backend")
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -DFIELD=babybear -S "${ICILE_DIR}" -B build/icicle
|
||||
fi
|
||||
cmake -DCMAKE_BUILD_TYPE=Release -S . -B build/example
|
||||
|
||||
cmake --build build/icicle -j
|
||||
cmake --build build/example -j
|
||||
|
||||
./build/example/example "$DEVICE_TYPE" "$BACKEND_INSTALL_DIR"
|
||||
|
||||
@@ -4,15 +4,11 @@ version = "1.2.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
icicle-cuda-runtime = { path = "../../../wrappers/rust/icicle-cuda-runtime" }
|
||||
icicle-core = { path = "../../../wrappers/rust/icicle-core" }
|
||||
icicle-bn254 = { path = "../../../wrappers/rust/icicle-curves/icicle-bn254", features = ["g2"] }
|
||||
icicle-bls12-377 = { path = "../../../wrappers/rust/icicle-curves/icicle-bls12-377" }
|
||||
ark-bn254 = { version = "0.4.0", optional = true }
|
||||
ark-bls12-377 = { version = "0.4.0", optional = true }
|
||||
ark-ec = { version = "0.4.0", optional = true }
|
||||
icicle-runtime = { path = "../../../wrappers/rust_v3/icicle-runtime" }
|
||||
icicle-core = { path = "../../../wrappers/rust_v3/icicle-core" }
|
||||
icicle-bn254 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bn254", features = ["g2"] }
|
||||
icicle-bls12-377 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bls12-377" }
|
||||
clap = { version = "<=4.4.12", features = ["derive"] }
|
||||
|
||||
[features]
|
||||
arkworks = ["ark-bn254", "ark-bls12-377", "ark-ec", "icicle-core/arkworks", "icicle-bn254/arkworks", "icicle-bls12-377/arkworks"]
|
||||
profile = []
|
||||
cuda = ["icicle-runtime/cuda_backend", "icicle-bn254/cuda_backend", "icicle-bls12-377/cuda_backend"]
|
||||
|
||||
@@ -2,10 +2,6 @@
|
||||
|
||||
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Multi-Scalar Multiplication](https://github.com/ingonyama-zk/ingopedia/blob/master/src/msm.md).
|
||||
|
||||
## Best Practices
|
||||
|
||||
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
@@ -28,29 +24,9 @@ In this example we use `BN254` curve. The function computes $result = \sum_{i=0}
|
||||
|
||||
Running the example:
|
||||
```sh
|
||||
cargo run --release
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
You can add the `--feature arkworks,profile` flag to measure times of both ICICLE and arkworks.
|
||||
|
||||
> [!NOTE]
|
||||
> The default sizes are 2^19 - 2^23. You can change this by passing the `--lower_bound_log_size <size> --upper_bound_log_size <size>` options. To change the size range to 2^21 - 2^24, run the example like this:
|
||||
> ```sh
|
||||
> cargo run --release -- -l 21 -u 24
|
||||
> ```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
These benchmarks were run on a 16 core 24 thread i9-12900k CPU and an RTX 3090 Ti GPU
|
||||
|
||||
### Single BN254 MSM
|
||||
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|
||||
|--------------|------|------|------|------|------|
|
||||
| ICICLE | 10 ms | 11 ms | 21 ms | 39 ms | 77 ms |
|
||||
| Arkworks | 284 ms | 540 ms | 1,152 ms | 2,320 ms | 4,491 ms |
|
||||
|
||||
### Single BLS12377 MSM
|
||||
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|
||||
|--------------|------|------|------|------|------|
|
||||
| ICICLE | 9 ms | 14 ms | 25 ms | 48 ms | 93 ms |
|
||||
| Arkworks | 490 ms | 918 ms | 1,861 ms | 3,624 ms | 7,191 ms |
|
||||
|
||||
59
examples/rust/msm/run.sh
Executable file
59
examples/rust/msm/run.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_SOURCE_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_SOURCE_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
BACKEND_INSTALL_DIR="./target/release/deps/icicle/lib/backend"
|
||||
cargo run --release --features=cuda -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cargo run --release -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
|
||||
fi
|
||||
@@ -1,48 +1,60 @@
|
||||
use icicle_bn254::curve::{CurveCfg, G1Projective, G2CurveCfg, G2Projective, ScalarCfg};
|
||||
use icicle_runtime::{
|
||||
memory::{DeviceVec, HostSlice},
|
||||
stream::IcicleStream,
|
||||
};
|
||||
|
||||
// Using both bn254 and bls12-377 curves
|
||||
use icicle_bls12_377::curve::{
|
||||
CurveCfg as BLS12377CurveCfg, G1Projective as BLS12377G1Projective, ScalarCfg as BLS12377ScalarCfg,
|
||||
};
|
||||
|
||||
use icicle_cuda_runtime::{
|
||||
memory::{DeviceVec, HostSlice},
|
||||
stream::CudaStream,
|
||||
};
|
||||
|
||||
use icicle_core::{curve::Curve, msm, traits::GenerateRandom};
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
use icicle_core::traits::ArkConvertible;
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_bls12_377::{Fr as Bls12377Fr, G1Affine as Bls12377G1Affine, G1Projective as Bls12377ArkG1Projective};
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_bn254::{Fr as Bn254Fr, G1Affine as Bn254G1Affine, G1Projective as Bn254ArkG1Projective};
|
||||
#[cfg(feature = "arkworks")]
|
||||
use ark_ec::scalar_mul::variable_base::VariableBaseMSM;
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
use std::time::Instant;
|
||||
use icicle_bn254::curve::{CurveCfg, G1Projective, G2CurveCfg, G2Projective, ScalarCfg};
|
||||
|
||||
use clap::Parser;
|
||||
use icicle_core::{curve::Curve, msm, traits::GenerateRandom};
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct Args {
|
||||
/// Lower bound (inclusive) of MSM sizes to run for
|
||||
#[arg(short, long, default_value_t = 19)]
|
||||
#[arg(short, long, default_value_t = 10)]
|
||||
lower_bound_log_size: u8,
|
||||
|
||||
/// Upper bound of MSM sizes to run for
|
||||
#[arg(short, long, default_value_t = 22)]
|
||||
#[arg(short, long, default_value_t = 10)]
|
||||
upper_bound_log_size: u8,
|
||||
|
||||
/// Device type (e.g., "CPU", "CUDA")
|
||||
#[arg(short, long, default_value = "CPU")]
|
||||
device_type: String,
|
||||
|
||||
/// Backend installation directory
|
||||
#[arg(short, long, default_value = "")]
|
||||
backend_install_dir: String,
|
||||
}
|
||||
|
||||
// Load backend and set device
|
||||
fn try_load_and_set_backend_device(args: &Args) {
|
||||
if !args
|
||||
.backend_install_dir
|
||||
.is_empty()
|
||||
{
|
||||
println!("Trying to load backend from {}", &args.backend_install_dir);
|
||||
icicle_runtime::runtime::load_backend(&args.backend_install_dir).unwrap();
|
||||
}
|
||||
println!("Setting device {}", args.device_type);
|
||||
icicle_runtime::set_device(&icicle_runtime::Device::new(&args.device_type, 0)).unwrap();
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
println!("{:?}", args);
|
||||
|
||||
try_load_and_set_backend_device(&args);
|
||||
|
||||
let lower_bound = args.lower_bound_log_size;
|
||||
let upper_bound = args.upper_bound_log_size;
|
||||
println!("Running Icicle Examples: Rust MSM");
|
||||
let upper_size = 1 << (upper_bound);
|
||||
let upper_size = 1 << upper_bound;
|
||||
|
||||
println!("Generating random inputs on host for bn254...");
|
||||
let upper_points = CurveCfg::generate_random_affine_points(upper_size);
|
||||
let g2_upper_points = G2CurveCfg::generate_random_affine_points(upper_size);
|
||||
@@ -56,59 +68,43 @@ fn main() {
|
||||
let log_size = i;
|
||||
let size = 1 << log_size;
|
||||
println!(
|
||||
"---------------------- MSM size 2^{}={} ------------------------",
|
||||
"---------------------- MSM size 2^{} = {} ------------------------",
|
||||
log_size, size
|
||||
);
|
||||
|
||||
// Setting Bn254 points and scalars
|
||||
let points = HostSlice::from_slice(&upper_points[..size]);
|
||||
let g2_points = HostSlice::from_slice(&g2_upper_points[..size]);
|
||||
let scalars = HostSlice::from_slice(&upper_scalars[..size]);
|
||||
|
||||
// Setting bls12377 points and scalars
|
||||
// let points_bls12377 = &upper_points_bls12377[..size];
|
||||
let points_bls12377 = HostSlice::from_slice(&upper_points_bls12377[..size]); // &upper_points_bls12377[..size];
|
||||
let points_bls12377 = HostSlice::from_slice(&upper_points_bls12377[..size]);
|
||||
let scalars_bls12377 = HostSlice::from_slice(&upper_scalars_bls12377[..size]);
|
||||
|
||||
println!("Configuring bn254 MSM...");
|
||||
let mut msm_results = DeviceVec::<G1Projective>::cuda_malloc(1).unwrap();
|
||||
let mut g2_msm_results = DeviceVec::<G2Projective>::cuda_malloc(1).unwrap();
|
||||
let stream = CudaStream::create().unwrap();
|
||||
let g2_stream = CudaStream::create().unwrap();
|
||||
let mut msm_results = DeviceVec::<G1Projective>::device_malloc(1).unwrap();
|
||||
let mut g2_msm_results = DeviceVec::<G2Projective>::device_malloc(1).unwrap();
|
||||
let mut stream = IcicleStream::create().unwrap();
|
||||
let mut g2_stream = IcicleStream::create().unwrap();
|
||||
let mut cfg = msm::MSMConfig::default();
|
||||
let mut g2_cfg = msm::MSMConfig::default();
|
||||
cfg.ctx
|
||||
.stream = &stream;
|
||||
g2_cfg
|
||||
.ctx
|
||||
.stream = &g2_stream;
|
||||
cfg.stream_handle = *stream;
|
||||
cfg.is_async = true;
|
||||
g2_cfg.stream_handle = *g2_stream;
|
||||
g2_cfg.is_async = true;
|
||||
|
||||
println!("Configuring bls12377 MSM...");
|
||||
let mut msm_results_bls12377 = DeviceVec::<BLS12377G1Projective>::cuda_malloc(1).unwrap();
|
||||
let stream_bls12377 = CudaStream::create().unwrap();
|
||||
let mut msm_results_bls12377 = DeviceVec::<BLS12377G1Projective>::device_malloc(1).unwrap();
|
||||
let mut stream_bls12377 = IcicleStream::create().unwrap();
|
||||
let mut cfg_bls12377 = msm::MSMConfig::default();
|
||||
cfg_bls12377
|
||||
.ctx
|
||||
.stream = &stream_bls12377;
|
||||
cfg_bls12377.stream_handle = *stream_bls12377;
|
||||
cfg_bls12377.is_async = true;
|
||||
|
||||
println!("Executing bn254 MSM on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
msm::msm(scalars, points, &cfg, &mut msm_results[..]).unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"ICICLE BN254 MSM on size 2^{log_size} took: {} ms",
|
||||
start
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
msm::msm(scalars, g2_points, &g2_cfg, &mut g2_msm_results[..]).unwrap();
|
||||
|
||||
println!("Executing bls12377 MSM on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
msm::msm(
|
||||
scalars_bls12377,
|
||||
points_bls12377,
|
||||
@@ -116,15 +112,8 @@ fn main() {
|
||||
&mut msm_results_bls12377[..],
|
||||
)
|
||||
.unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"ICICLE BLS12377 MSM on size 2^{log_size} took: {} ms",
|
||||
start
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
println!("Moving results to host..");
|
||||
println!("Moving results to host...");
|
||||
let mut msm_host_result = vec![G1Projective::zero(); 1];
|
||||
let mut g2_msm_host_result = vec![G2Projective::zero(); 1];
|
||||
let mut msm_host_result_bls12377 = vec![BLS12377G1Projective::zero(); 1];
|
||||
@@ -132,16 +121,17 @@ fn main() {
|
||||
stream
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
g2_stream
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
msm_results
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut msm_host_result[..]))
|
||||
.unwrap();
|
||||
println!("bn254 result: {:#?}", msm_host_result);
|
||||
|
||||
g2_stream
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
g2_msm_results
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut g2_msm_host_result[..]))
|
||||
.unwrap();
|
||||
println!("bn254 result: {:#?}", msm_host_result);
|
||||
println!("G2 bn254 result: {:#?}", g2_msm_host_result);
|
||||
|
||||
stream_bls12377
|
||||
@@ -152,69 +142,14 @@ fn main() {
|
||||
.unwrap();
|
||||
println!("bls12377 result: {:#?}", msm_host_result_bls12377);
|
||||
|
||||
#[cfg(feature = "arkworks")]
|
||||
{
|
||||
println!("Checking against arkworks...");
|
||||
let ark_points: Vec<Bn254G1Affine> = points
|
||||
.iter()
|
||||
.map(|&point| point.to_ark())
|
||||
.collect();
|
||||
let ark_scalars: Vec<Bn254Fr> = scalars
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
|
||||
let ark_points_bls12377: Vec<Bls12377G1Affine> = points_bls12377
|
||||
.iter()
|
||||
.map(|point| point.to_ark())
|
||||
.collect();
|
||||
let ark_scalars_bls12377: Vec<Bls12377Fr> = scalars_bls12377
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
let bn254_ark_msm_res = Bn254ArkG1Projective::msm(&ark_points, &ark_scalars).unwrap();
|
||||
println!("Arkworks Bn254 result: {:#?}", bn254_ark_msm_res);
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"Ark BN254 MSM on size 2^{log_size} took: {} ms",
|
||||
start
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
let bls12377_ark_msm_res =
|
||||
Bls12377ArkG1Projective::msm(&ark_points_bls12377, &ark_scalars_bls12377).unwrap();
|
||||
println!("Arkworks Bls12377 result: {:#?}", bls12377_ark_msm_res);
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"Ark BLS12377 MSM on size 2^{log_size} took: {} ms",
|
||||
start
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
let bn254_icicle_msm_res_as_ark = msm_host_result[0].to_ark();
|
||||
let bls12377_icicle_msm_res_as_ark = msm_host_result_bls12377[0].to_ark();
|
||||
|
||||
println!(
|
||||
"Bn254 MSM is correct: {}",
|
||||
bn254_ark_msm_res.eq(&bn254_icicle_msm_res_as_ark)
|
||||
);
|
||||
println!(
|
||||
"Bls12377 MSM is correct: {}",
|
||||
bls12377_ark_msm_res.eq(&bls12377_icicle_msm_res_as_ark)
|
||||
);
|
||||
}
|
||||
|
||||
println!("Cleaning up bn254...");
|
||||
stream
|
||||
.destroy()
|
||||
.unwrap();
|
||||
g2_stream
|
||||
.destroy()
|
||||
.unwrap();
|
||||
|
||||
println!("Cleaning up bls12377...");
|
||||
stream_bls12377
|
||||
.destroy()
|
||||
|
||||
@@ -4,17 +4,12 @@ version = "1.2.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
icicle-cuda-runtime = { path = "../../../wrappers/rust/icicle-cuda-runtime" }
|
||||
icicle-core = { path = "../../../wrappers/rust/icicle-core", features = ["arkworks"] }
|
||||
icicle-bn254 = { path = "../../../wrappers/rust/icicle-curves/icicle-bn254", features = ["arkworks"] }
|
||||
icicle-bls12-377 = { path = "../../../wrappers/rust/icicle-curves/icicle-bls12-377", features = ["arkworks"] }
|
||||
icicle-runtime = { path = "../../../wrappers/rust_v3/icicle-runtime" }
|
||||
icicle-core = { path = "../../../wrappers/rust_v3/icicle-core" }
|
||||
icicle-bn254 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bn254", features = ["g2"] }
|
||||
icicle-bls12-377 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bls12-377" }
|
||||
|
||||
ark-ff = { version = "0.4.0" }
|
||||
ark-poly = "0.4.0"
|
||||
ark-std = "0.4.0"
|
||||
ark-bn254 = { version = "0.4.0" }
|
||||
ark-bls12-377 = { version = "0.4.0" }
|
||||
clap = { version = "<=4.4.12", features = ["derive"] }
|
||||
|
||||
[features]
|
||||
profile = []
|
||||
cuda = ["icicle-runtime/cuda_backend", "icicle-bn254/cuda_backend", "icicle-bls12-377/cuda_backend"]
|
||||
|
||||
@@ -4,10 +4,6 @@
|
||||
|
||||
`ICICLE` provides Rust bindings to CUDA-accelerated C++ implementation of [Number Theoretic Transform](https://github.com/ingonyama-zk/ingopedia/blob/master/src/fft.md).
|
||||
|
||||
## Best Practices
|
||||
|
||||
In order to save time and setting up prerequisites manually, we recommend running this example in our [ZKContainer](../../ZKContainer.md).
|
||||
|
||||
## Usage
|
||||
|
||||
```rust
|
||||
@@ -32,34 +28,9 @@ In this example we use the `BN254` and `BLS12377` fields.
|
||||
7. Compare results with arkworks
|
||||
|
||||
Running the example:
|
||||
|
||||
```sh
|
||||
cargo run --release
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
|
||||
You can add the `--feature profile` flag to measure times of both ICICLE and arkworks.
|
||||
|
||||
> [!NOTE]
|
||||
> The default size is 2^20. You can change this by passing the `--size <size>` option. To change the size to 2^23, run the example like this:
|
||||
|
||||
```sh
|
||||
cargo run --release -- -s 23
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
These benchmarks were run on a 16 core 24 thread i9-12900k CPU and an RTX 3090 Ti GPU
|
||||
|
||||
### Single BN254 NTT
|
||||
|
||||
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|
||||
|--------------|------|------|------|------|------|
|
||||
| ICICLE | 1.263 ms | 2.986 ms | 4.651 ms | 9.308 ms | 18.618 ms |
|
||||
| Arkworks | 138 ms | 290 ms | 611 ms | 1,295 ms | 2,715 ms |
|
||||
|
||||
### Single BLS12377 NTT
|
||||
|
||||
| Library\Size | 2^19 | 2^20 | 2^21 | 2^22 | 2^23 |
|
||||
|--------------|------|------|------|------|------|
|
||||
| ICICLE | 1.272 ms | 2.893 ms | 4.728 ms | 9.211 ms | 18.319 ms |
|
||||
| Arkworks | 135 ms | 286 ms | 605 ms | 1,279 ms | 2,682 ms |
|
||||
|
||||
59
examples/rust/ntt/run.sh
Executable file
59
examples/rust/ntt/run.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_SOURCE_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_SOURCE_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
BACKEND_INSTALL_DIR="./target/release/deps/icicle/lib/backend"
|
||||
cargo run --release --features=cuda -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cargo run --release -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
|
||||
fi
|
||||
@@ -1,93 +1,94 @@
|
||||
use icicle_bn254::curve::{ScalarCfg, ScalarField};
|
||||
|
||||
use icicle_bls12_377::curve::{ScalarCfg as BLS12377ScalarCfg, ScalarField as BLS12377ScalarField};
|
||||
use icicle_bn254::curve::{ScalarCfg as Bn254ScalarCfg, ScalarField as Bn254ScalarField};
|
||||
use icicle_runtime::memory::{DeviceVec, HostSlice};
|
||||
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
memory::{DeviceVec, HostSlice},
|
||||
stream::CudaStream,
|
||||
};
|
||||
|
||||
use clap::Parser;
|
||||
use icicle_core::{
|
||||
ntt::{self, initialize_domain},
|
||||
traits::{FieldImpl, GenerateRandom},
|
||||
};
|
||||
|
||||
use icicle_core::traits::ArkConvertible;
|
||||
|
||||
use ark_bls12_377::Fr as Bls12377Fr;
|
||||
use ark_bn254::Fr as Bn254Fr;
|
||||
use ark_ff::FftField;
|
||||
use ark_poly::{EvaluationDomain, Radix2EvaluationDomain};
|
||||
use ark_std::cmp::{Ord, Ordering};
|
||||
use std::convert::TryInto;
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
use std::time::Instant;
|
||||
|
||||
use clap::Parser;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct Args {
|
||||
/// Size of NTT to run (20 for 2^20)
|
||||
#[arg(short, long, default_value_t = 20)]
|
||||
size: u8,
|
||||
|
||||
/// Device type (e.g., "CPU", "CUDA")
|
||||
#[arg(short, long, default_value = "CPU")]
|
||||
device_type: String,
|
||||
|
||||
/// Backend installation directory
|
||||
#[arg(short, long, default_value = "")]
|
||||
backend_install_dir: String,
|
||||
}
|
||||
|
||||
// Load backend and set device
|
||||
fn try_load_and_set_backend_device(args: &Args) {
|
||||
if !args
|
||||
.backend_install_dir
|
||||
.is_empty()
|
||||
{
|
||||
println!("Trying to load backend from {}", &args.backend_install_dir);
|
||||
icicle_runtime::runtime::load_backend(&args.backend_install_dir).unwrap();
|
||||
}
|
||||
println!("Setting device {}", args.device_type);
|
||||
icicle_runtime::set_device(&icicle_runtime::Device::new(&args.device_type, 0)).unwrap();
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
println!("{:?}", args);
|
||||
|
||||
try_load_and_set_backend_device(&args);
|
||||
|
||||
println!("Running Icicle Examples: Rust NTT");
|
||||
let log_size = args.size;
|
||||
let size = 1 << log_size;
|
||||
println!(
|
||||
"---------------------- NTT size 2^{}={} ------------------------",
|
||||
"---------------------- NTT size 2^{} = {} ------------------------",
|
||||
log_size, size
|
||||
);
|
||||
|
||||
// Setting Bn254 points and scalars
|
||||
println!("Generating random inputs on host for bn254...");
|
||||
let scalars = ScalarCfg::generate_random(size);
|
||||
let mut ntt_results = DeviceVec::<ScalarField>::cuda_malloc(size).unwrap();
|
||||
let scalars = Bn254ScalarCfg::generate_random(size);
|
||||
let mut ntt_results = DeviceVec::<Bn254ScalarField>::device_malloc(size).unwrap();
|
||||
|
||||
// Setting bls12377 points and scalars
|
||||
println!("Generating random inputs on host for bls12377...");
|
||||
let scalars_bls12377 = BLS12377ScalarCfg::generate_random(size);
|
||||
let mut ntt_results_bls12377 = DeviceVec::<BLS12377ScalarField>::cuda_malloc(size).unwrap();
|
||||
let mut ntt_results_bls12377 = DeviceVec::<BLS12377ScalarField>::device_malloc(size).unwrap();
|
||||
|
||||
println!("Setting up bn254 Domain...");
|
||||
let icicle_omega = <Bn254Fr as FftField>::get_root_of_unity(
|
||||
size.try_into()
|
||||
.unwrap(),
|
||||
initialize_domain(
|
||||
ntt::get_root_of_unity::<Bn254ScalarField>(
|
||||
size.try_into()
|
||||
.unwrap(),
|
||||
),
|
||||
&ntt::NTTInitDomainConfig::default(),
|
||||
)
|
||||
.unwrap();
|
||||
let ctx = DeviceContext::default();
|
||||
initialize_domain(ScalarField::from_ark(icicle_omega), &ctx, true).unwrap();
|
||||
|
||||
println!("Configuring bn254 NTT...");
|
||||
let stream = CudaStream::create().unwrap();
|
||||
let mut cfg = ntt::NTTConfig::<'_, ScalarField>::default();
|
||||
cfg.ctx
|
||||
.stream = &stream;
|
||||
cfg.is_async = true;
|
||||
let cfg = ntt::NTTConfig::<Bn254ScalarField>::default();
|
||||
|
||||
println!("Setting up bls12377 Domain...");
|
||||
let icicle_omega = <Bls12377Fr as FftField>::get_root_of_unity(
|
||||
size.try_into()
|
||||
.unwrap(),
|
||||
initialize_domain(
|
||||
ntt::get_root_of_unity::<BLS12377ScalarField>(
|
||||
size.try_into()
|
||||
.unwrap(),
|
||||
),
|
||||
&ntt::NTTInitDomainConfig::default(),
|
||||
)
|
||||
.unwrap();
|
||||
// reusing ctx from above
|
||||
initialize_domain(BLS12377ScalarField::from_ark(icicle_omega), &ctx, true).unwrap();
|
||||
|
||||
println!("Configuring bls12377 NTT...");
|
||||
let stream_bls12377 = CudaStream::create().unwrap();
|
||||
let mut cfg_bls12377 = ntt::NTTConfig::<'_, BLS12377ScalarField>::default();
|
||||
cfg_bls12377
|
||||
.ctx
|
||||
.stream = &stream_bls12377;
|
||||
cfg_bls12377.is_async = true;
|
||||
let cfg_bls12377 = ntt::NTTConfig::<BLS12377ScalarField>::default();
|
||||
|
||||
println!("Executing bn254 NTT on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
ntt::ntt(
|
||||
HostSlice::from_slice(&scalars),
|
||||
@@ -96,7 +97,6 @@ fn main() {
|
||||
&mut ntt_results[..],
|
||||
)
|
||||
.unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"ICICLE BN254 NTT on size 2^{log_size} took: {} μs",
|
||||
start
|
||||
@@ -105,7 +105,6 @@ fn main() {
|
||||
);
|
||||
|
||||
println!("Executing bls12377 NTT on device...");
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
ntt::ntt(
|
||||
HostSlice::from_slice(&scalars_bls12377),
|
||||
@@ -114,7 +113,6 @@ fn main() {
|
||||
&mut ntt_results_bls12377[..],
|
||||
)
|
||||
.unwrap();
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"ICICLE BLS12377 NTT on size 2^{log_size} took: {} μs",
|
||||
start
|
||||
@@ -122,82 +120,14 @@ fn main() {
|
||||
.as_micros()
|
||||
);
|
||||
|
||||
println!("Moving results to host..");
|
||||
stream
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
let mut host_bn254_results = vec![ScalarField::zero(); size];
|
||||
println!("Moving results to host...");
|
||||
let mut host_bn254_results = vec![Bn254ScalarField::zero(); size];
|
||||
ntt_results
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut host_bn254_results[..]))
|
||||
.unwrap();
|
||||
|
||||
stream_bls12377
|
||||
.synchronize()
|
||||
.unwrap();
|
||||
let mut host_bls12377_results = vec![BLS12377ScalarField::zero(); size];
|
||||
ntt_results_bls12377
|
||||
.copy_to_host(HostSlice::from_mut_slice(&mut host_bls12377_results[..]))
|
||||
.unwrap();
|
||||
|
||||
println!("Checking against arkworks...");
|
||||
let mut ark_scalars: Vec<Bn254Fr> = scalars
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
let bn254_domain = <Radix2EvaluationDomain<Bn254Fr> as EvaluationDomain<Bn254Fr>>::new(size).unwrap();
|
||||
|
||||
let mut ark_scalars_bls12377: Vec<Bls12377Fr> = scalars_bls12377
|
||||
.iter()
|
||||
.map(|scalar| scalar.to_ark())
|
||||
.collect();
|
||||
let bls12_377_domain = <Radix2EvaluationDomain<Bls12377Fr> as EvaluationDomain<Bls12377Fr>>::new(size).unwrap();
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
bn254_domain.fft_in_place(&mut ark_scalars);
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"Ark BN254 NTT on size 2^{log_size} took: {} ms",
|
||||
start
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
let start = Instant::now();
|
||||
bls12_377_domain.fft_in_place(&mut ark_scalars_bls12377);
|
||||
#[cfg(feature = "profile")]
|
||||
println!(
|
||||
"Ark BLS12377 NTT on size 2^{log_size} took: {} ms",
|
||||
start
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
|
||||
host_bn254_results
|
||||
.iter()
|
||||
.zip(ark_scalars.iter())
|
||||
.for_each(|(icicle_scalar, &ark_scalar)| {
|
||||
assert_eq!(ark_scalar.cmp(&icicle_scalar.to_ark()), Ordering::Equal);
|
||||
});
|
||||
println!("Bn254 NTT is correct");
|
||||
|
||||
host_bls12377_results
|
||||
.iter()
|
||||
.zip(ark_scalars_bls12377.iter())
|
||||
.for_each(|(icicle_scalar, &ark_scalar)| {
|
||||
assert_eq!(ark_scalar.cmp(&icicle_scalar.to_ark()), Ordering::Equal);
|
||||
});
|
||||
|
||||
println!("Bls12377 NTT is correct");
|
||||
|
||||
println!("Cleaning up bn254...");
|
||||
stream
|
||||
.destroy()
|
||||
.unwrap();
|
||||
println!("Cleaning up bls12377...");
|
||||
stream_bls12377
|
||||
.destroy()
|
||||
.unwrap();
|
||||
println!("");
|
||||
}
|
||||
|
||||
@@ -4,11 +4,13 @@ version = "1.2.0"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
icicle-cuda-runtime = { path = "../../../wrappers/rust/icicle-cuda-runtime" }
|
||||
icicle-core = { path = "../../../wrappers/rust/icicle-core" }
|
||||
icicle-bn254 = { path = "../../../wrappers/rust/icicle-curves/icicle-bn254" }
|
||||
icicle-babybear = { path = "../../../wrappers/rust/icicle-fields/icicle-babybear" }
|
||||
icicle-runtime = { path = "../../../wrappers/rust_v3/icicle-runtime" }
|
||||
icicle-core = { path = "../../../wrappers/rust_v3/icicle-core" }
|
||||
icicle-bn254 = { path = "../../../wrappers/rust_v3/icicle-curves/icicle-bn254", features = ["g2"] }
|
||||
icicle-babybear = { path = "../../../wrappers/rust_v3/icicle-fields/icicle-babybear" }
|
||||
|
||||
clap = { version = "<=4.4.12", features = ["derive"] }
|
||||
|
||||
[features]
|
||||
profile = []
|
||||
cuda = ["icicle-runtime/cuda_backend", "icicle-bn254/cuda_backend", "icicle-babybear/cuda_backend"]
|
||||
|
||||
|
||||
15
examples/rust/polynomials/README.md
Normal file
15
examples/rust/polynomials/README.md
Normal file
@@ -0,0 +1,15 @@
|
||||
# ICICLE example: Polynomial API
|
||||
|
||||
## Key-Takeaway
|
||||
|
||||
`ICICLE` provides Rust bindings to Polynomial API [https://dev.ingonyama.com/icicle/rust-bindings/polynomials]
|
||||
|
||||
In this example we use the `BN254` and `babybear` fields to demonstrate how to compute on polynomials.
|
||||
|
||||
Running the example:
|
||||
```sh
|
||||
# for CPU
|
||||
./run.sh -d CPU
|
||||
# for CUDA
|
||||
./run.sh -d CUDA -b /path/to/cuda/backend/install/dir
|
||||
```
|
||||
59
examples/rust/polynomials/run.sh
Executable file
59
examples/rust/polynomials/run.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Exit immediately if a command exits with a non-zero status
|
||||
set -e
|
||||
|
||||
# Function to display usage information
|
||||
show_help() {
|
||||
echo "Usage: $0 [-d DEVICE_TYPE] [-b BACKEND_INSTALL_DIR]"
|
||||
echo
|
||||
echo "Options:"
|
||||
echo " -d DEVICE_TYPE Specify the device type (default: CPU)"
|
||||
echo " -b BACKEND_INSTALL_DIR Specify the backend installation directory (default: empty)"
|
||||
echo " -h Show this help message"
|
||||
exit 0
|
||||
}
|
||||
|
||||
# Parse command line options
|
||||
while getopts ":d:b:h" opt; do
|
||||
case ${opt} in
|
||||
d )
|
||||
DEVICE_TYPE=$OPTARG
|
||||
;;
|
||||
b )
|
||||
BACKEND_INSTALL_DIR="$(realpath ${OPTARG})"
|
||||
;;
|
||||
h )
|
||||
show_help
|
||||
;;
|
||||
\? )
|
||||
echo "Invalid option: -$OPTARG" 1>&2
|
||||
show_help
|
||||
;;
|
||||
: )
|
||||
echo "Invalid option: -$OPTARG requires an argument" 1>&2
|
||||
show_help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Set default values if not provided
|
||||
: "${DEVICE_TYPE:=CPU}"
|
||||
: "${BACKEND_INSTALL_DIR:=}"
|
||||
|
||||
# Create necessary directories
|
||||
mkdir -p build/example
|
||||
mkdir -p build/icicle
|
||||
|
||||
ICILE_DIR=$(realpath "../../../icicle_v3/")
|
||||
ICICLE_CUDA_SOURCE_DIR="${ICILE_DIR}/backend/cuda"
|
||||
|
||||
# Build Icicle and the example app that links to it
|
||||
if [ "$DEVICE_TYPE" == "CUDA" ] && [ ! -d "${BACKEND_INSTALL_DIR}" ] && [ -d "${ICICLE_CUDA_SOURCE_DIR}" ]; then
|
||||
echo "Building icicle with CUDA backend"
|
||||
BACKEND_INSTALL_DIR="./target/release/deps/icicle/lib/backend"
|
||||
cargo run --release --features=cuda -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
|
||||
else
|
||||
echo "Building icicle without CUDA backend, BACKEND_INSTALL_DIR=${BACKEND_INSTALL_DIR}"
|
||||
cargo run --release -- --device-type "${DEVICE_TYPE}" --backend-install-dir "${BACKEND_INSTALL_DIR}"
|
||||
fi
|
||||
@@ -3,21 +3,16 @@ use icicle_babybear::polynomials::DensePolynomial as PolynomialBabyBear;
|
||||
use icicle_bn254::curve::ScalarField as bn254Scalar;
|
||||
use icicle_bn254::polynomials::DensePolynomial as PolynomialBn254;
|
||||
|
||||
use icicle_cuda_runtime::{
|
||||
device_context::DeviceContext,
|
||||
memory::{DeviceVec, HostSlice},
|
||||
};
|
||||
use icicle_runtime::memory::{DeviceVec, HostSlice};
|
||||
|
||||
use icicle_core::{
|
||||
ntt::{get_root_of_unity, initialize_domain},
|
||||
ntt::{get_root_of_unity, initialize_domain, NTTInitDomainConfig},
|
||||
polynomials::UnivariatePolynomial,
|
||||
traits::{FieldImpl, GenerateRandom},
|
||||
};
|
||||
|
||||
#[cfg(feature = "profile")]
|
||||
use std::time::Instant;
|
||||
|
||||
use clap::Parser;
|
||||
use std::time::Instant;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
struct Args {
|
||||
@@ -26,21 +21,41 @@ struct Args {
|
||||
max_ntt_log_size: u8,
|
||||
#[arg(short, long, default_value_t = 15)]
|
||||
poly_log_size: u8,
|
||||
|
||||
/// Device type (e.g., "CPU", "CUDA")
|
||||
#[arg(short, long, default_value = "CPU")]
|
||||
device_type: String,
|
||||
|
||||
/// Backend installation directory
|
||||
#[arg(short, long, default_value = "/opt/icicle/backend")]
|
||||
backend_install_dir: String,
|
||||
}
|
||||
|
||||
fn init(max_ntt_size: u64) {
|
||||
// initialize NTT domain for all fields!. Polynomials ops relies on NTT.
|
||||
// Load backend and set device
|
||||
fn try_load_and_set_backend_device(args: &Args) {
|
||||
if !args
|
||||
.backend_install_dir
|
||||
.is_empty()
|
||||
{
|
||||
println!("Trying to load backend from {}", &args.backend_install_dir);
|
||||
icicle_runtime::runtime::load_backend(&args.backend_install_dir).unwrap();
|
||||
}
|
||||
println!("Setting device {}", args.device_type);
|
||||
let device = icicle_runtime::Device::new(&args.device_type, 0 /* =device_id*/);
|
||||
icicle_runtime::set_device(&device).unwrap();
|
||||
}
|
||||
|
||||
fn init_ntt_domain(max_ntt_size: u64) {
|
||||
// Initialize NTT domain for all fields. Polynomial operations rely on NTT.
|
||||
println!(
|
||||
"Initializing NTT domain for max size 2^{}",
|
||||
max_ntt_size.trailing_zeros()
|
||||
);
|
||||
let rou_bn254: bn254Scalar = get_root_of_unity(max_ntt_size);
|
||||
let ctx = DeviceContext::default();
|
||||
initialize_domain(rou_bn254, &ctx, false /*=fast twiddles mode*/).unwrap();
|
||||
initialize_domain(rou_bn254, &NTTInitDomainConfig::default()).unwrap();
|
||||
|
||||
let rou_babybear: babybearScalar = get_root_of_unity(max_ntt_size);
|
||||
initialize_domain(rou_babybear, &ctx, false /*=fast twiddles mode*/).unwrap();
|
||||
|
||||
// initialize the cuda backend for polynomials
|
||||
// make sure to initialize it per field
|
||||
PolynomialBn254::init_cuda_backend();
|
||||
PolynomialBabyBear::init_cuda_backend();
|
||||
initialize_domain(rou_babybear, &NTTInitDomainConfig::default()).unwrap();
|
||||
}
|
||||
|
||||
fn randomize_poly<P>(size: usize, from_coeffs: bool) -> P
|
||||
@@ -49,6 +64,7 @@ where
|
||||
P::Field: FieldImpl,
|
||||
P::FieldConfig: GenerateRandom<P::Field>,
|
||||
{
|
||||
println!("Randomizing polynomial of size {} (from_coeffs: {})", size, from_coeffs);
|
||||
let coeffs_or_evals = P::FieldConfig::generate_random(size);
|
||||
let p = if from_coeffs {
|
||||
P::from_coeffs(HostSlice::from_slice(&coeffs_or_evals), size)
|
||||
@@ -60,42 +76,61 @@ where
|
||||
|
||||
fn main() {
|
||||
let args = Args::parse();
|
||||
init(1 << args.max_ntt_log_size);
|
||||
println!("{:?}", args);
|
||||
|
||||
try_load_and_set_backend_device(&args);
|
||||
|
||||
init_ntt_domain(1 << args.max_ntt_log_size);
|
||||
|
||||
// randomize three polynomials f,g,h over bn254 scalar field
|
||||
let poly_size = 1 << args.poly_log_size;
|
||||
|
||||
println!("Randomizing polynomials [f(x),g(x),h(x)] over bn254 scalar field...");
|
||||
let f = randomize_poly::<PolynomialBn254>(poly_size, true /*from random coeffs*/);
|
||||
let g = randomize_poly::<PolynomialBn254>(poly_size / 2, true /*from random coeffs*/);
|
||||
let h = randomize_poly::<PolynomialBn254>(poly_size / 4, false /*from random evaluations on rou*/);
|
||||
|
||||
// randomize two polynomials over babybear field
|
||||
println!("Randomizing polynomials [f_babybear(x), g_babyber(x)] over babybear field...");
|
||||
let f_babybear = randomize_poly::<PolynomialBabyBear>(poly_size, true /*from random coeffs*/);
|
||||
let g_babybear = randomize_poly::<PolynomialBabyBear>(poly_size / 2, true /*from random coeffs*/);
|
||||
|
||||
let start = Instant::now();
|
||||
// Arithmetic
|
||||
println!("Computing t0(x) = f(x) + g(x)");
|
||||
let t0 = &f + &g;
|
||||
println!("Computing t1(x) f(x) * h(x)");
|
||||
let t1 = &f * &h;
|
||||
let (q, r) = t1.divide(&t0); // computes q,r for t1(x)=q(x)*t0(x)+r(x)
|
||||
println!("Computing q(x),r(x) = t1(x)/t0(x) (where t1(x) = q(x) * t0(x) + r(x))");
|
||||
let (q, r) = t1.divide(&t0);
|
||||
|
||||
println!("Computing f_babybear(x) * g_babybear(x)");
|
||||
let _r_babybear = &f_babybear * &g_babybear;
|
||||
|
||||
// check degree
|
||||
let _r_degree = r.degree();
|
||||
// Check degree
|
||||
println!("Degree of r(x): {}", r.degree());
|
||||
|
||||
// evaluate in single domain point
|
||||
// Evaluate in single domain point
|
||||
let five = bn254Scalar::from_u32(5);
|
||||
println!("Evaluating q(5)");
|
||||
let q_at_five = q.eval(&five);
|
||||
|
||||
// evaluate on domain. Note: domain and image can be either Host or Device slice.
|
||||
// in this example domain in on host and evals on device.
|
||||
// Evaluate on domain
|
||||
let host_domain = [five, bn254Scalar::from_u32(30)];
|
||||
let mut device_image = DeviceVec::<bn254Scalar>::cuda_malloc(host_domain.len()).unwrap();
|
||||
t1.eval_on_domain(HostSlice::from_slice(&host_domain), &mut device_image[..]);
|
||||
let mut device_image = DeviceVec::<bn254Scalar>::device_malloc(host_domain.len()).unwrap();
|
||||
println!("Evaluating t1(x) on domain {:?}", host_domain);
|
||||
t1.eval_on_domain(HostSlice::from_slice(&host_domain), &mut device_image[..]); // for NTT use eval_on_rou_domain()
|
||||
|
||||
// slicing
|
||||
// Slicing
|
||||
println!("Performing slicing operations on h");
|
||||
let o = h.odd();
|
||||
let e = h.even();
|
||||
let fold = &e + &(&o * &q_at_five); // e(x) + o(x)*scalar
|
||||
let fold = &e + &(&o * &q_at_five); // e(x) + o(x) * scalar
|
||||
|
||||
let _coeff = fold.get_coeff(2); // coeff of x^2
|
||||
let _coeff = fold.get_coeff(2); // Coeff of x^2
|
||||
|
||||
println!(
|
||||
"Polynomial computation on selected device took: {} ms",
|
||||
start
|
||||
.elapsed()
|
||||
.as_millis()
|
||||
);
|
||||
}
|
||||
|
||||
3
examples/rust/poseidon/run.sh
Executable file
3
examples/rust/poseidon/run.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
# TODO implement
|
||||
93
icicle_v3/CMakeLists.txt
Normal file
93
icicle_v3/CMakeLists.txt
Normal file
@@ -0,0 +1,93 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
project(icicle_v3)
|
||||
|
||||
# Specify the C++ standard
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED True)
|
||||
|
||||
include(cmake/field.cmake)
|
||||
include(cmake/curve.cmake)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
option(NTT "Build NTT" ON)
|
||||
option(MSM "Build MSM" ON)
|
||||
option(EXT_FIELD "Build extension field" OFF)
|
||||
option(ECNTT "Build ECNTT" OFF)
|
||||
option(G2 "Build G2" OFF)
|
||||
option(BUILD_TESTS "Build unit tests" OFF)
|
||||
option(CPU_BACKEND "Build CPU backend" ON)
|
||||
option(CUDA_BACKEND "Branch/commit to pull for CUDA backend, local path or OFF to disable pulling" OFF)
|
||||
|
||||
# device API library
|
||||
add_library(icicle_device SHARED
|
||||
src/device_api.cpp
|
||||
src/runtime.cpp
|
||||
src/config_extension.cpp
|
||||
)
|
||||
target_link_libraries(icicle_device PUBLIC dl)
|
||||
include_directories(include)
|
||||
|
||||
# Define the install directory (default is /usr/local)
|
||||
if(NOT DEFINED CMAKE_INSTALL_PREFIX)
|
||||
set(CMAKE_INSTALL_PREFIX "${CMAKE_BINARY_DIR}/install" CACHE PATH "Install path prefix")
|
||||
endif()
|
||||
message("-- CMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}")
|
||||
set(CMAKE_INSTALL_RPATH ${CMAKE_INSTALL_PREFIX}/lib)
|
||||
# set(CMAKE_BUILD_WITH_INSTALL_RPATH TRUE)
|
||||
|
||||
# Specify the installation rules
|
||||
install(TARGETS icicle_device
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib)
|
||||
|
||||
if((DEFINED CURVE) AND (DEFINED FIELD))
|
||||
if(NOT ("${CURVE}" STREQUAL "${FIELD}"))
|
||||
message(FATAL_ERROR "CURVE and FIELD should be defined at the same time unless they are equal")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# curve is building the scalar field too
|
||||
if(CURVE)
|
||||
check_curve()
|
||||
setup_curve_target()
|
||||
elseif(FIELD)
|
||||
check_field()
|
||||
setup_field_target()
|
||||
endif()
|
||||
|
||||
if (CPU_BACKEND)
|
||||
add_subdirectory(backend/cpu)
|
||||
endif()
|
||||
|
||||
if (CUDA_BACKEND)
|
||||
string(TOLOWER "${CUDA_BACKEND}" CUDA_BACKEND_LOWER)
|
||||
if (CUDA_BACKEND_LOWER STREQUAL "local")
|
||||
# CUDA backend is local, no need to pull
|
||||
message(STATUS "Adding CUDA backend from local path: icicle/backend/cuda")
|
||||
add_subdirectory(backend/cuda)
|
||||
|
||||
# Set the compile definition for the backend build directory
|
||||
add_compile_definitions(BACKEND_BUILD_DIR="${CMAKE_BINARY_DIR}/backend")
|
||||
else()
|
||||
set(CUDA_BACKEND_URL "git@github.com:ingonyama-zk/icicle-cuda-backend.git")
|
||||
|
||||
include(FetchContent)
|
||||
message("-- Fetching cuda backend from ${CUDA_BACKEND_URL}:${CUDA_BACKEND}")
|
||||
FetchContent_Declare(
|
||||
cuda_backend
|
||||
GIT_REPOSITORY ${CUDA_BACKEND_URL}
|
||||
GIT_TAG ${CUDA_BACKEND}
|
||||
)
|
||||
FetchContent_MakeAvailable(cuda_backend)
|
||||
# Set the compile definition for the backend build directory
|
||||
add_compile_definitions(BACKEND_BUILD_DIR="${CMAKE_BINARY_DIR}/_deps/cuda_backend-build")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_TESTS)
|
||||
add_subdirectory(tests)
|
||||
endif()
|
||||
|
||||
31
icicle_v3/backend/cpu/CMakeLists.txt
Normal file
31
icicle_v3/backend/cpu/CMakeLists.txt
Normal file
@@ -0,0 +1,31 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
# CPU backend is built directly into icicle library
|
||||
|
||||
target_sources(icicle_device PRIVATE src/cpu_device_api.cpp)
|
||||
|
||||
# field API library
|
||||
if (FIELD)
|
||||
target_sources(icicle_field PRIVATE
|
||||
src/field/cpu_vec_ops.cpp
|
||||
)
|
||||
if (NTT)
|
||||
target_sources(icicle_field PRIVATE src/field/cpu_ntt.cpp src/polynomials/cpu_polynomial_backend.cpp)
|
||||
endif()
|
||||
target_include_directories(icicle_field PRIVATE include)
|
||||
endif() # FIELD
|
||||
|
||||
# curve API library
|
||||
if (CURVE)
|
||||
target_sources(icicle_curve PRIVATE
|
||||
src/curve/cpu_msm.cpp
|
||||
src/curve/cpu_mont_conversion.cpp
|
||||
)
|
||||
if (ECNTT)
|
||||
target_sources(icicle_curve PRIVATE src/curve/cpu_ecntt.cpp)
|
||||
endif()
|
||||
# incdir is PUBLIC because config-extension headers are exposed to app
|
||||
target_include_directories(icicle_curve PUBLIC include)
|
||||
endif()
|
||||
|
||||
|
||||
357
icicle_v3/backend/cpu/include/cpu_ntt.h
Normal file
357
icicle_v3/backend/cpu/include/cpu_ntt.h
Normal file
@@ -0,0 +1,357 @@
|
||||
#pragma once
|
||||
#include "icicle/backend/ntt_backend.h"
|
||||
#include "icicle/errors.h"
|
||||
#include "icicle/runtime.h"
|
||||
#include "icicle/utils/log.h"
|
||||
#include "icicle/fields/field_config.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
||||
using namespace field_config;
|
||||
using namespace icicle;
|
||||
|
||||
namespace ntt_cpu {
|
||||
|
||||
template <typename S>
|
||||
class CpuNttDomain
|
||||
{
|
||||
int max_size = 0;
|
||||
int max_log_size = 0;
|
||||
std::unique_ptr<S[]> twiddles;
|
||||
std::mutex domain_mutex;
|
||||
|
||||
public:
|
||||
static eIcicleError
|
||||
cpu_ntt_init_domain(const Device& device, const S& primitive_root, const NTTInitDomainConfig& config);
|
||||
static eIcicleError cpu_ntt_release_domain(const Device& device);
|
||||
static eIcicleError get_root_of_unity_from_domain(const Device& device, uint64_t logn, S* rou /*OUT*/);
|
||||
|
||||
template <typename U, typename E>
|
||||
eIcicleError
|
||||
cpu_ntt_ref(const Device& device, const E* input, uint64_t size, NTTDir dir, NTTConfig<S>& config, E* output);
|
||||
|
||||
template <typename U, typename E>
|
||||
eIcicleError
|
||||
cpu_ntt(const Device& device, const E* input, uint64_t size, NTTDir dir, NTTConfig<S>& config, E* output);
|
||||
|
||||
const S* get_twiddles() const { return twiddles.get(); }
|
||||
const int get_max_size() const { return max_size; }
|
||||
|
||||
static inline CpuNttDomain<S> s_ntt_domain;
|
||||
};
|
||||
|
||||
template <typename S>
|
||||
eIcicleError
|
||||
CpuNttDomain<S>::cpu_ntt_init_domain(const Device& device, const S& primitive_root, const NTTInitDomainConfig& config)
|
||||
{
|
||||
// (1) check if need to refresh domain. This need to be checked before locking the mutex to avoid unnecessary
|
||||
// locking
|
||||
if (s_ntt_domain.twiddles != nullptr) { return eIcicleError::SUCCESS; }
|
||||
|
||||
// Lock the mutex to ensure thread safety during initialization
|
||||
std::lock_guard<std::mutex> lock(s_ntt_domain.domain_mutex);
|
||||
|
||||
// Check if domain is already initialized by another thread
|
||||
if (s_ntt_domain.twiddles == nullptr) {
|
||||
// (2) build the domain
|
||||
|
||||
bool found_logn = false;
|
||||
S omega = primitive_root;
|
||||
const unsigned omegas_count = S::get_omegas_count();
|
||||
for (int i = 0; i < omegas_count; i++) {
|
||||
omega = S::sqr(omega);
|
||||
if (!found_logn) {
|
||||
++s_ntt_domain.max_log_size;
|
||||
found_logn = omega == S::one();
|
||||
if (found_logn) break;
|
||||
}
|
||||
}
|
||||
|
||||
s_ntt_domain.max_size = (int)pow(2, s_ntt_domain.max_log_size);
|
||||
if (omega != S::one()) {
|
||||
ICICLE_LOG_ERROR << "Primitive root provided to the InitDomain function is not a root-of-unity";
|
||||
return eIcicleError::INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// calculate twiddles
|
||||
// Note: radix-2 INTT needs ONE in last element (in addition to first element), therefore have n+1 elements
|
||||
|
||||
// Using temp_twiddles to store twiddles before assigning to twiddles using unique_ptr.
|
||||
// This is to ensure that twiddles are nullptr during calculation,
|
||||
// otherwise the init domain function might return on another thread before twiddles are calculated.
|
||||
auto temp_twiddles = std::make_unique<S[]>(s_ntt_domain.max_size + 1);
|
||||
|
||||
S tw_omega = primitive_root;
|
||||
temp_twiddles[0] = S::one();
|
||||
for (int i = 1; i <= s_ntt_domain.max_size; i++) {
|
||||
temp_twiddles[i] = temp_twiddles[i - 1] * tw_omega;
|
||||
}
|
||||
s_ntt_domain.twiddles = std::move(temp_twiddles); // Assign twiddles using unique_ptr
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
eIcicleError CpuNttDomain<S>::cpu_ntt_release_domain(const Device& device)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(s_ntt_domain.domain_mutex);
|
||||
s_ntt_domain.twiddles.reset(); // Set twiddles to nullptr
|
||||
s_ntt_domain.max_size = 0;
|
||||
s_ntt_domain.max_log_size = 0;
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
eIcicleError CpuNttDomain<S>::get_root_of_unity_from_domain(const Device& device, uint64_t logn, S* rou /*OUT*/)
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(s_ntt_domain.domain_mutex); // not ideal to lock here but safer
|
||||
ICICLE_ASSERT(logn <= s_ntt_domain.max_log_size)
|
||||
<< "NTT log_size=" << logn << " is too large for the domain (logsize=" << s_ntt_domain.max_log_size
|
||||
<< "). Consider generating your domain with a higher order root of unity";
|
||||
|
||||
const size_t twiddles_idx = 1ULL << (s_ntt_domain.max_log_size - logn);
|
||||
*rou = s_ntt_domain.twiddles[twiddles_idx];
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
int bit_reverse(int n, int logn)
|
||||
{
|
||||
int rev = 0;
|
||||
for (int j = 0; j < logn; ++j) {
|
||||
if (n & (1 << j)) { rev |= 1 << (logn - 1 - j); }
|
||||
}
|
||||
return rev;
|
||||
}
|
||||
|
||||
template <typename E = scalar_t>
|
||||
eIcicleError reorder_by_bit_reverse(int logn, E* output, int batch_size)
|
||||
{
|
||||
uint64_t size = 1 << logn;
|
||||
for (int batch = 0; batch < batch_size; ++batch) {
|
||||
E* current_output = output + batch * size;
|
||||
int rev;
|
||||
for (int i = 0; i < size; ++i) {
|
||||
rev = bit_reverse(i, logn);
|
||||
if (i < rev) { std::swap(current_output[i], current_output[rev]); }
|
||||
}
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename S = scalar_t, typename E = scalar_t>
|
||||
void dit_ntt(E* elements, uint64_t size, int batch_size, const S* twiddles, NTTDir dir, int domain_max_size)
|
||||
{
|
||||
for (int batch = 0; batch < batch_size; ++batch) {
|
||||
E* current_elements = elements + batch * size;
|
||||
for (int len = 2; len <= size; len <<= 1) {
|
||||
int half_len = len / 2;
|
||||
int step = (size / len) * (domain_max_size / size);
|
||||
for (int i = 0; i < size; i += len) {
|
||||
for (int j = 0; j < half_len; ++j) {
|
||||
int tw_idx = (dir == NTTDir::kForward) ? j * step : domain_max_size - j * step;
|
||||
E u = current_elements[i + j];
|
||||
E v = current_elements[i + j + half_len] * twiddles[tw_idx];
|
||||
current_elements[i + j] = u + v;
|
||||
current_elements[i + j + half_len] = u - v;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S = scalar_t, typename E = scalar_t>
|
||||
void dif_ntt(E* elements, uint64_t size, int batch_size, const S* twiddles, NTTDir dir, int domain_max_size)
|
||||
{
|
||||
for (int batch = 0; batch < batch_size; ++batch) {
|
||||
E* current_elements = elements + batch * size;
|
||||
for (int len = size; len >= 2; len >>= 1) {
|
||||
int half_len = len / 2;
|
||||
int step = (size / len) * (domain_max_size / size);
|
||||
for (int i = 0; i < size; i += len) {
|
||||
for (int j = 0; j < half_len; ++j) {
|
||||
int tw_idx = (dir == NTTDir::kForward) ? j * step : domain_max_size - j * step;
|
||||
E u = current_elements[i + j];
|
||||
E v = current_elements[i + j + half_len];
|
||||
current_elements[i + j] = u + v;
|
||||
current_elements[i + j + half_len] = (u - v) * twiddles[tw_idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename E = scalar_t>
|
||||
void transpose(const E* input, E* output, int rows, int cols)
|
||||
{
|
||||
for (int col = 0; col < cols; ++col) {
|
||||
for (int row = 0; row < rows; ++row) {
|
||||
output[col * rows + row] = input[row * cols + col];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename S = scalar_t, typename E = scalar_t>
|
||||
eIcicleError coset_mul(
|
||||
int logn,
|
||||
int domain_max_size,
|
||||
E* elements,
|
||||
int batch_size,
|
||||
const S* twiddles = nullptr,
|
||||
int stride = 0,
|
||||
const std::unique_ptr<S[]>& arbitrary_coset = nullptr,
|
||||
bool bit_rev = false,
|
||||
NTTDir dir = NTTDir::kForward,
|
||||
bool columns_batch = false)
|
||||
{
|
||||
uint64_t size = 1 << logn;
|
||||
int idx;
|
||||
for (int batch = 0; batch < batch_size; ++batch) {
|
||||
E* current_elements = elements + batch * size;
|
||||
if (arbitrary_coset) {
|
||||
for (int i = 1; i < size; ++i) {
|
||||
idx = columns_batch ? batch : i;
|
||||
idx = bit_rev ? bit_reverse(idx, logn) : idx;
|
||||
current_elements[i] = current_elements[i] * arbitrary_coset[idx];
|
||||
}
|
||||
} else if (stride != 0) {
|
||||
for (int i = 1; i < size; ++i) {
|
||||
idx = bit_rev ? stride * (bit_reverse(i, logn)) : stride * i;
|
||||
idx = dir == NTTDir::kForward ? idx : domain_max_size - idx;
|
||||
current_elements[i] = current_elements[i] * twiddles[idx];
|
||||
}
|
||||
}
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename S = scalar_t, typename E = scalar_t>
|
||||
eIcicleError
|
||||
cpu_ntt_ref(const Device& device, const E* input, uint64_t size, NTTDir dir, NTTConfig<S>& config, E* output)
|
||||
{
|
||||
if (size & (size - 1)) {
|
||||
ICICLE_LOG_ERROR << "Size must be a power of 2. Size = " << size;
|
||||
return eIcicleError::INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
// Copy input to "temp_elements" instead of pointing temp_elements to input to ensure freeing temp_elements does not
|
||||
// free the input, preventing a potential double-free error.
|
||||
// TODO [SHANIE]: Later, remove temp_elements and perform all calculations in-place
|
||||
// (implement NTT for the case where columns_batch=true, in-place).
|
||||
|
||||
const uint64_t total_size = size * config.batch_size;
|
||||
auto temp_elements = std::make_unique<E[]>(total_size);
|
||||
auto vec_ops_config = default_vec_ops_config();
|
||||
if (config.columns_batch) {
|
||||
transpose(input, temp_elements.get(), size, config.batch_size);
|
||||
} else {
|
||||
std::copy(input, input + total_size, temp_elements.get());
|
||||
}
|
||||
const int logn = int(log2(size));
|
||||
const S* twiddles = CpuNttDomain<S>::s_ntt_domain.get_twiddles();
|
||||
const int domain_max_size = CpuNttDomain<S>::s_ntt_domain.get_max_size();
|
||||
std::unique_ptr<S[]> arbitrary_coset = nullptr;
|
||||
int coset_stride = 0;
|
||||
|
||||
if (domain_max_size < size) {
|
||||
ICICLE_LOG_ERROR << "NTT domain size is less than input size. Domain size = " << domain_max_size
|
||||
<< ", Input size = " << size;
|
||||
return eIcicleError::INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (config.coset_gen != S::one()) { // TODO SHANIE - implement more efficient way to find coset_stride
|
||||
for (int i = 1; i <= domain_max_size; i++) {
|
||||
if (twiddles[i] == config.coset_gen) {
|
||||
coset_stride = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (coset_stride == 0) { // if the coset_gen is not found in the twiddles, calculate arbitrary coset
|
||||
ICICLE_LOG_DEBUG << "Coset generator not found in twiddles. Calculating arbitrary coset.";
|
||||
auto temp_cosets = std::make_unique<S[]>(domain_max_size + 1);
|
||||
arbitrary_coset = std::make_unique<S[]>(domain_max_size + 1);
|
||||
arbitrary_coset[0] = S::one();
|
||||
S coset_gen = dir == NTTDir::kForward ? config.coset_gen : S::inverse(config.coset_gen); // inverse for INTT
|
||||
for (int i = 1; i <= domain_max_size; i++) {
|
||||
arbitrary_coset[i] = arbitrary_coset[i - 1] * coset_gen;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool dit = true;
|
||||
bool input_rev = false;
|
||||
bool output_rev = false;
|
||||
bool need_to_reorder = false;
|
||||
bool coset = (config.coset_gen != S::one() && dir == NTTDir::kForward);
|
||||
switch (config.ordering) { // kNN, kNR, kRN, kRR, kNM, kMN
|
||||
case Ordering::kNN:
|
||||
need_to_reorder = true;
|
||||
break;
|
||||
case Ordering::kNR:
|
||||
case Ordering::kNM:
|
||||
dit = false; // dif
|
||||
output_rev = true;
|
||||
break;
|
||||
case Ordering::kRR:
|
||||
input_rev = true;
|
||||
output_rev = true;
|
||||
need_to_reorder = true;
|
||||
dit = false; // dif
|
||||
break;
|
||||
case Ordering::kRN:
|
||||
case Ordering::kMN:
|
||||
input_rev = true;
|
||||
break;
|
||||
default:
|
||||
return eIcicleError::INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
if (coset) {
|
||||
coset_mul(
|
||||
logn, domain_max_size, temp_elements.get(), config.batch_size, twiddles, coset_stride, arbitrary_coset,
|
||||
input_rev);
|
||||
}
|
||||
|
||||
if (need_to_reorder) { reorder_by_bit_reverse(logn, temp_elements.get(), config.batch_size); }
|
||||
|
||||
// NTT/INTT
|
||||
if (dit) {
|
||||
dit_ntt<S, E>(temp_elements.get(), size, config.batch_size, twiddles, dir, domain_max_size);
|
||||
} else {
|
||||
dif_ntt<S, E>(temp_elements.get(), size, config.batch_size, twiddles, dir, domain_max_size);
|
||||
}
|
||||
|
||||
if (dir == NTTDir::kInverse) {
|
||||
// Normalize results
|
||||
S inv_size = S::inv_log_size(logn);
|
||||
for (int i = 0; i < total_size; ++i) {
|
||||
temp_elements[i] = temp_elements[i] * inv_size;
|
||||
}
|
||||
if (config.coset_gen != S::one()) {
|
||||
coset_mul(
|
||||
logn, domain_max_size, temp_elements.get(), config.batch_size, twiddles, coset_stride, arbitrary_coset,
|
||||
output_rev, dir);
|
||||
}
|
||||
}
|
||||
|
||||
if (config.columns_batch) {
|
||||
transpose(temp_elements.get(), output, config.batch_size, size);
|
||||
} else {
|
||||
std::copy(temp_elements.get(), temp_elements.get() + total_size, output);
|
||||
}
|
||||
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename S = scalar_t, typename E = scalar_t>
|
||||
eIcicleError cpu_ntt(const Device& device, const E* input, uint64_t size, NTTDir dir, NTTConfig<S>& config, E* output)
|
||||
{
|
||||
return cpu_ntt_ref(device, input, size, dir, config, output);
|
||||
}
|
||||
} // namespace ntt_cpu
|
||||
107
icicle_v3/backend/cpu/src/cpu_device_api.cpp
Normal file
107
icicle_v3/backend/cpu/src/cpu_device_api.cpp
Normal file
@@ -0,0 +1,107 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <cstring>
|
||||
#include "icicle/device_api.h"
|
||||
#include "icicle/errors.h"
|
||||
#include "icicle/utils/log.h"
|
||||
|
||||
using namespace icicle;
|
||||
|
||||
class CpuDeviceAPI : public DeviceAPI
|
||||
{
|
||||
public:
|
||||
eIcicleError set_device(const Device& device) override
|
||||
{
|
||||
return (device.id == 0) ? eIcicleError::SUCCESS : eIcicleError::INVALID_DEVICE;
|
||||
}
|
||||
|
||||
eIcicleError get_device_count(int& device_count) const override
|
||||
{
|
||||
device_count = 1;
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
// Memory management
|
||||
eIcicleError allocate_memory(void** ptr, size_t size) const override
|
||||
{
|
||||
*ptr = malloc(size);
|
||||
return (*ptr == nullptr) ? eIcicleError::ALLOCATION_FAILED : eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
eIcicleError allocate_memory_async(void** ptr, size_t size, icicleStreamHandle stream) const override
|
||||
{
|
||||
return CpuDeviceAPI::allocate_memory(ptr, size);
|
||||
}
|
||||
|
||||
eIcicleError free_memory(void* ptr) const override
|
||||
{
|
||||
free(ptr);
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
eIcicleError free_memory_async(void* ptr, icicleStreamHandle stream) const override
|
||||
{
|
||||
return CpuDeviceAPI::free_memory(ptr);
|
||||
}
|
||||
|
||||
eIcicleError get_available_memory(size_t& total /*OUT*/, size_t& free /*OUT*/) const override
|
||||
{
|
||||
// TODO implement this
|
||||
return eIcicleError::API_NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
eIcicleError memset(void* ptr, int value, size_t size) const override
|
||||
{
|
||||
std::memset(ptr, value, size);
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
eIcicleError memset_async(void* ptr, int value, size_t size, icicleStreamHandle stream) const override
|
||||
{
|
||||
std::memset(ptr, value, size);
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
eIcicleError memCopy(void* dst, const void* src, size_t size) const
|
||||
{
|
||||
std::memcpy(dst, src, size);
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
// Data transfer
|
||||
eIcicleError copy(void* dst, const void* src, size_t size, eCopyDirection direction) const override
|
||||
{
|
||||
return memCopy(dst, src, size);
|
||||
}
|
||||
|
||||
eIcicleError copy_async(
|
||||
void* dst, const void* src, size_t size, eCopyDirection direction, icicleStreamHandle stream) const override
|
||||
{
|
||||
return memCopy(dst, src, size);
|
||||
}
|
||||
|
||||
// Synchronization
|
||||
eIcicleError synchronize(icicleStreamHandle stream = nullptr) const override { return eIcicleError::SUCCESS; }
|
||||
|
||||
// Stream management
|
||||
eIcicleError create_stream(icicleStreamHandle* stream) const override
|
||||
{
|
||||
*stream = nullptr; // no streams for CPU
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
eIcicleError destroy_stream(icicleStreamHandle stream) const override
|
||||
{
|
||||
return (nullptr == stream) ? eIcicleError::SUCCESS : eIcicleError::STREAM_DESTRUCTION_FAILED;
|
||||
}
|
||||
|
||||
eIcicleError get_device_properties(DeviceProperties& properties) const override
|
||||
{
|
||||
properties.using_host_memory = true;
|
||||
properties.num_memory_regions = 0;
|
||||
properties.supports_pinned_memory = false;
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_DEVICE_API("CPU", CpuDeviceAPI);
|
||||
19
icicle_v3/backend/cpu/src/curve/cpu_ecntt.cpp
Normal file
19
icicle_v3/backend/cpu/src/curve/cpu_ecntt.cpp
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
#include "icicle/backend/ecntt_backend.h"
|
||||
#include "icicle/errors.h"
|
||||
#include "icicle/runtime.h"
|
||||
#include "cpu_ntt.h"
|
||||
|
||||
#include "icicle/curves/curve_config.h"
|
||||
|
||||
using namespace curve_config;
|
||||
using namespace icicle;
|
||||
|
||||
template <typename S, typename E>
|
||||
eIcicleError cpu_ntt(const Device& device, const E* input, int size, NTTDir dir, NTTConfig<S>& config, E* output)
|
||||
{
|
||||
auto err = ntt_cpu::cpu_ntt<S, E>(device, input, size, dir, config, output);
|
||||
return err;
|
||||
}
|
||||
|
||||
REGISTER_ECNTT_BACKEND("CPU", (cpu_ntt<scalar_t, projective_t>));
|
||||
28
icicle_v3/backend/cpu/src/curve/cpu_mont_conversion.cpp
Normal file
28
icicle_v3/backend/cpu/src/curve/cpu_mont_conversion.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
|
||||
#include "icicle/curves/montgomery_conversion.h"
|
||||
#include "icicle/errors.h"
|
||||
#include "icicle/runtime.h"
|
||||
#include "icicle/utils/log.h"
|
||||
|
||||
#include "icicle/curves/curve_config.h"
|
||||
|
||||
using namespace curve_config;
|
||||
using namespace icicle;
|
||||
|
||||
template <typename T>
|
||||
eIcicleError
|
||||
cpu_convert_mont(const Device& device, const T* input, size_t n, bool is_into, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
output[i] = is_into ? T::to_montgomery(input[i]) : T::from_montgomery(input[i]);
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_AFFINE_CONVERT_MONTGOMERY_BACKEND("CPU", cpu_convert_mont<affine_t>);
|
||||
REGISTER_PROJECTIVE_CONVERT_MONTGOMERY_BACKEND("CPU", cpu_convert_mont<projective_t>);
|
||||
|
||||
#ifdef G2
|
||||
REGISTER_AFFINE_G2_CONVERT_MONTGOMERY_BACKEND("CPU", cpu_convert_mont<g2_affine_t>);
|
||||
REGISTER_PROJECTIVE_G2_CONVERT_MONTGOMERY_BACKEND("CPU", cpu_convert_mont<g2_projective_t>);
|
||||
#endif // G2
|
||||
43
icicle_v3/backend/cpu/src/curve/cpu_msm.cpp
Normal file
43
icicle_v3/backend/cpu/src/curve/cpu_msm.cpp
Normal file
@@ -0,0 +1,43 @@
|
||||
|
||||
#include "icicle/backend/msm_backend.h"
|
||||
#include "icicle/errors.h"
|
||||
#include "icicle/runtime.h"
|
||||
|
||||
#include "icicle/curves/projective.h"
|
||||
#include "icicle/curves/curve_config.h"
|
||||
|
||||
using namespace curve_config;
|
||||
using namespace icicle;
|
||||
|
||||
template <typename S, typename A, typename P>
|
||||
eIcicleError
|
||||
cpu_msm(const Device& device, const S* scalars, const A* bases, int msm_size, const MSMConfig& config, P* results)
|
||||
{
|
||||
for (auto batch_idx = 0; batch_idx < config.batch_size; ++batch_idx) {
|
||||
P res = P::zero();
|
||||
const S* batch_scalars = scalars + msm_size * batch_idx;
|
||||
const A* batch_bases = config.are_bases_shared ? bases : bases + msm_size * batch_idx;
|
||||
for (auto i = 0; i < msm_size; ++i) {
|
||||
res = res + P::from_affine(batch_bases[i]) * batch_scalars[i];
|
||||
}
|
||||
results[batch_idx] = res;
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
template <typename A>
|
||||
eIcicleError cpu_msm_precompute_bases(
|
||||
const Device& device, const A* input_bases, int nof_bases, const MSMConfig& config, A* output_bases)
|
||||
{
|
||||
ICICLE_ASSERT(!config.are_points_on_device && !config.are_scalars_on_device);
|
||||
memcpy(output_bases, input_bases, sizeof(A) * nof_bases);
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_MSM_BACKEND("CPU", (cpu_msm<scalar_t, affine_t, projective_t>));
|
||||
REGISTER_MSM_PRE_COMPUTE_BASES_BACKEND("CPU", cpu_msm_precompute_bases<affine_t>);
|
||||
|
||||
#ifdef G2
|
||||
REGISTER_MSM_G2_BACKEND("CPU", (cpu_msm<scalar_t, g2_affine_t, g2_projective_t>));
|
||||
REGISTER_MSM_G2_PRE_COMPUTE_BASES_BACKEND("CPU", cpu_msm_precompute_bases<g2_affine_t>);
|
||||
#endif // G2
|
||||
41
icicle_v3/backend/cpu/src/field/cpu_ntt.cpp
Normal file
41
icicle_v3/backend/cpu/src/field/cpu_ntt.cpp
Normal file
@@ -0,0 +1,41 @@
|
||||
#include "cpu_ntt.h"
|
||||
|
||||
using namespace field_config;
|
||||
using namespace icicle;
|
||||
|
||||
eIcicleError
|
||||
cpu_ntt_init_domain(const Device& device, const scalar_t& primitive_root, const NTTInitDomainConfig& config)
|
||||
{
|
||||
auto err = ntt_cpu::CpuNttDomain<scalar_t>::cpu_ntt_init_domain(device, primitive_root, config);
|
||||
return err;
|
||||
}
|
||||
|
||||
template <typename S = scalar_t>
|
||||
eIcicleError cpu_ntt_release_domain(const Device& device, const S& dummy)
|
||||
{
|
||||
auto err = ntt_cpu::CpuNttDomain<scalar_t>::cpu_ntt_release_domain(device);
|
||||
return err;
|
||||
}
|
||||
|
||||
template <typename S = scalar_t>
|
||||
eIcicleError cpu_get_root_of_unity_from_domain(const Device& device, uint64_t logn, S* rou)
|
||||
{
|
||||
auto err = ntt_cpu::CpuNttDomain<scalar_t>::get_root_of_unity_from_domain(device, logn, rou);
|
||||
return err;
|
||||
}
|
||||
|
||||
template <typename S, typename E>
|
||||
eIcicleError cpu_ntt(const Device& device, const E* input, uint64_t size, NTTDir dir, NTTConfig<S>& config, E* output)
|
||||
{
|
||||
auto err = ntt_cpu::cpu_ntt<S, E>(device, input, size, dir, config, output);
|
||||
return err;
|
||||
}
|
||||
|
||||
REGISTER_NTT_INIT_DOMAIN_BACKEND("CPU", (cpu_ntt_init_domain));
|
||||
REGISTER_NTT_RELEASE_DOMAIN_BACKEND("CPU", cpu_ntt_release_domain<scalar_t>);
|
||||
REGISTER_NTT_GET_ROU_FROM_DOMAIN_BACKEND("CPU", cpu_get_root_of_unity_from_domain<scalar_t>);
|
||||
REGISTER_NTT_BACKEND("CPU", (cpu_ntt<scalar_t, scalar_t>));
|
||||
|
||||
#ifdef EXT_FIELD
|
||||
REGISTER_NTT_EXT_FIELD_BACKEND("CPU", (cpu_ntt<scalar_t, extension_t>));
|
||||
#endif // EXT_FIELD
|
||||
312
icicle_v3/backend/cpu/src/field/cpu_vec_ops.cpp
Normal file
312
icicle_v3/backend/cpu/src/field/cpu_vec_ops.cpp
Normal file
@@ -0,0 +1,312 @@
|
||||
|
||||
#include "icicle/backend/vec_ops_backend.h"
|
||||
#include "icicle/errors.h"
|
||||
#include "icicle/runtime.h"
|
||||
#include "icicle/utils/log.h"
|
||||
|
||||
#include "icicle/fields/field_config.h"
|
||||
|
||||
using namespace field_config;
|
||||
using namespace icicle;
|
||||
|
||||
/*********************************** ADD ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError
|
||||
cpu_vector_add(const Device& device, const T* vec_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = vec_a[i] + vec_b[i];
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_VECTOR_ADD_BACKEND("CPU", cpu_vector_add<scalar_t>);
|
||||
|
||||
/*********************************** SUB ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError
|
||||
cpu_vector_sub(const Device& device, const T* vec_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = vec_a[i] - vec_b[i];
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_VECTOR_SUB_BACKEND("CPU", cpu_vector_sub<scalar_t>);
|
||||
|
||||
/*********************************** MUL ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError
|
||||
cpu_vector_mul(const Device& device, const T* vec_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = vec_a[i] * vec_b[i];
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_VECTOR_MUL_BACKEND("CPU", cpu_vector_mul<scalar_t>);
|
||||
|
||||
/*********************************** DIV ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError
|
||||
cpu_vector_div(const Device& device, const T* vec_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = vec_a[i] * T::inverse(vec_b[i]);
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_VECTOR_DIV_BACKEND("CPU", cpu_vector_div<scalar_t>);
|
||||
|
||||
/*********************************** MUL BY SCALAR***********************************/
|
||||
template <typename T>
|
||||
eIcicleError cpu_scalar_mul(
|
||||
const Device& device, const T* scalar_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = *scalar_a * vec_b[i];
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_SCALAR_MUL_VEC_BACKEND("CPU", cpu_scalar_mul<scalar_t>);
|
||||
|
||||
/*********************************** Scalar + Vector***********************************/
|
||||
template <typename T>
|
||||
eIcicleError cpu_scalar_add(
|
||||
const Device& device, const T* scalar_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = *scalar_a + vec_b[i];
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_SCALAR_ADD_VEC_BACKEND("CPU", cpu_scalar_add<scalar_t>);
|
||||
|
||||
/*********************************** Scalar - Vector***********************************/
|
||||
template <typename T>
|
||||
eIcicleError cpu_scalar_sub(
|
||||
const Device& device, const T* scalar_a, const T* vec_b, uint64_t n, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = *scalar_a - vec_b[i];
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_SCALAR_SUB_VEC_BACKEND("CPU", cpu_scalar_sub<scalar_t>);
|
||||
|
||||
/*********************************** CONVERT MONTGOMERY ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError cpu_convert_montgomery(
|
||||
const Device& device, const T* input, uint64_t n, bool is_into, const VecOpsConfig& config, T* output)
|
||||
{
|
||||
for (uint64_t i = 0; i < n; ++i) {
|
||||
output[i] = is_into ? T::to_montgomery(input[i]) : T::from_montgomery(input[i]);
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_CONVERT_MONTGOMERY_BACKEND("CPU", cpu_convert_montgomery<scalar_t>);
|
||||
|
||||
#ifdef EXT_FIELD
|
||||
REGISTER_VECTOR_ADD_EXT_FIELD_BACKEND("CPU", cpu_vector_add<extension_t>);
|
||||
REGISTER_VECTOR_SUB_EXT_FIELD_BACKEND("CPU", cpu_vector_sub<extension_t>);
|
||||
REGISTER_VECTOR_MUL_EXT_FIELD_BACKEND("CPU", cpu_vector_mul<extension_t>);
|
||||
REGISTER_CONVERT_MONTGOMERY_EXT_FIELD_BACKEND("CPU", cpu_convert_montgomery<extension_t>);
|
||||
#endif // EXT_FIELD
|
||||
|
||||
/*********************************** TRANSPOSE ***********************************/
|
||||
|
||||
template <typename T>
|
||||
eIcicleError cpu_matrix_transpose(
|
||||
const Device& device, const T* mat_in, uint32_t nof_rows, uint32_t nof_cols, const VecOpsConfig& config, T* mat_out)
|
||||
{
|
||||
// Check for invalid arguments
|
||||
if (!mat_in || !mat_out || nof_rows == 0 || nof_cols == 0) { return eIcicleError::INVALID_ARGUMENT; }
|
||||
|
||||
// Perform the matrix transpose
|
||||
for (uint32_t i = 0; i < nof_rows; ++i) {
|
||||
for (uint32_t j = 0; j < nof_cols; ++j) {
|
||||
mat_out[j * nof_rows + i] = mat_in[i * nof_cols + j];
|
||||
}
|
||||
}
|
||||
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_MATRIX_TRANSPOSE_BACKEND("CPU", cpu_matrix_transpose<scalar_t>);
|
||||
#ifdef EXT_FIELD
|
||||
REGISTER_MATRIX_TRANSPOSE_EXT_FIELD_BACKEND("CPU", cpu_matrix_transpose<extension_t>);
|
||||
#endif // EXT_FIELD
|
||||
|
||||
/*********************************** BIT REVERSE ***********************************/
|
||||
|
||||
template <typename T>
|
||||
eIcicleError
|
||||
cpu_bit_reverse(const Device& device, const T* vec_in, uint64_t size, const VecOpsConfig& config, T* vec_out)
|
||||
{
|
||||
// Check for invalid arguments
|
||||
if (!vec_in || !vec_out || size == 0) { return eIcicleError::INVALID_ARGUMENT; }
|
||||
|
||||
// Calculate log2(size)
|
||||
int logn = static_cast<int>(std::floor(std::log2(size)));
|
||||
if ((1ULL << logn) != size) {
|
||||
return eIcicleError::INVALID_ARGUMENT; // Ensure size is a power of 2
|
||||
}
|
||||
|
||||
// If vec_in and vec_out are not the same, copy input to output
|
||||
if (vec_in != vec_out) {
|
||||
for (uint64_t i = 0; i < size; ++i) {
|
||||
vec_out[i] = vec_in[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Perform the bit reverse
|
||||
for (uint64_t i = 0; i < size; ++i) {
|
||||
uint64_t rev = 0;
|
||||
for (int j = 0; j < logn; ++j) {
|
||||
if (i & (1ULL << j)) { rev |= 1ULL << (logn - 1 - j); }
|
||||
}
|
||||
if (i < rev) { std::swap(vec_out[i], vec_out[rev]); }
|
||||
}
|
||||
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_BIT_REVERSE_BACKEND("CPU", cpu_bit_reverse<scalar_t>);
|
||||
#ifdef EXT_FIELD
|
||||
REGISTER_BIT_REVERSE_EXT_FIELD_BACKEND("CPU", cpu_bit_reverse<extension_t>);
|
||||
#endif // EXT_FIELD
|
||||
|
||||
/*********************************** SLICE ***********************************/
|
||||
|
||||
template <typename T>
|
||||
eIcicleError cpu_slice(
|
||||
const Device& device,
|
||||
const T* vec_in,
|
||||
uint64_t offset,
|
||||
uint64_t stride,
|
||||
uint64_t size,
|
||||
const VecOpsConfig& config,
|
||||
T* vec_out)
|
||||
{
|
||||
if (vec_in == nullptr || vec_out == nullptr) {
|
||||
ICICLE_LOG_ERROR << "Error: Invalid argument - input or output vector is null";
|
||||
return eIcicleError::INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
for (uint64_t i = 0; i < size; ++i) {
|
||||
uint64_t index = offset + i * stride;
|
||||
vec_out[i] = vec_in[index];
|
||||
}
|
||||
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_SLICE_BACKEND("CPU", cpu_slice<scalar_t>);
|
||||
#ifdef EXT_FIELD
|
||||
REGISTER_SLICE_EXT_FIELD_BACKEND("CPU", cpu_slice<extension_t>);
|
||||
#endif // EXT_FIELD
|
||||
|
||||
/*********************************** Polynomial evaluation ***********************************/
|
||||
|
||||
template <typename T>
|
||||
eIcicleError cpu_poly_eval(
|
||||
const Device& device,
|
||||
const T* coeffs,
|
||||
uint64_t coeffs_size,
|
||||
const T* domain,
|
||||
uint64_t domain_size,
|
||||
const VecOpsConfig& config,
|
||||
T* evals /*OUT*/)
|
||||
{
|
||||
// using Horner's method
|
||||
// example: ax^2+bx+c is computed as (1) r=a, (2) r=r*x+b, (3) r=r*x+c
|
||||
for (uint64_t eval_idx = 0; eval_idx < domain_size; ++eval_idx) {
|
||||
evals[eval_idx] = coeffs[coeffs_size - 1];
|
||||
for (int64_t coeff_idx = coeffs_size - 2; coeff_idx >= 0; --coeff_idx) {
|
||||
evals[eval_idx] = evals[eval_idx] * domain[eval_idx] + coeffs[coeff_idx];
|
||||
}
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_POLYNOMIAL_EVAL("CPU", cpu_poly_eval<scalar_t>);
|
||||
|
||||
/*********************************** Highest non-zero idx ***********************************/
|
||||
template <typename T>
|
||||
eIcicleError cpu_highest_non_zero_idx(
|
||||
const Device& device, const T* input, uint64_t size, const VecOpsConfig& config, int64_t* out_idx /*OUT*/)
|
||||
{
|
||||
*out_idx = -1; // zero vector is considered '-1' since 0 would be zero in vec[0]
|
||||
for (int64_t i = size - 1; i >= 0; --i) {
|
||||
if (input[i] != T::zero()) {
|
||||
*out_idx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_HIGHEST_NON_ZERO_IDX_BACKEND("CPU", cpu_highest_non_zero_idx<scalar_t>);
|
||||
|
||||
/*============================== polynomial division ==============================*/
|
||||
template <typename T>
|
||||
void school_book_division_step_cpu(T* r, T* q, const T* b, int deg_r, int deg_b, const T& lc_b_inv)
|
||||
{
|
||||
int64_t monomial = deg_r - deg_b; // monomial=1 is 'x', monomial=2 is x^2 etc.
|
||||
|
||||
T lc_r = r[deg_r];
|
||||
T monomial_coeff = lc_r * lc_b_inv; // lc_r / lc_b
|
||||
|
||||
// adding monomial s to q (q=q+s)
|
||||
q[monomial] = monomial_coeff;
|
||||
|
||||
for (int i = monomial; i <= deg_r; ++i) {
|
||||
T b_coeff = b[i - monomial];
|
||||
r[i] = r[i] - monomial_coeff * b_coeff;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
eIcicleError cpu_poly_divide(
|
||||
const Device& device,
|
||||
const T* numerator,
|
||||
int64_t numerator_deg,
|
||||
const T* denumerator,
|
||||
int64_t denumerator_deg,
|
||||
const VecOpsConfig& config,
|
||||
T* q_out /*OUT*/,
|
||||
uint64_t q_size,
|
||||
T* r_out /*OUT*/,
|
||||
uint64_t r_size)
|
||||
{
|
||||
ICICLE_ASSERT(r_size >= (1 + denumerator_deg))
|
||||
<< "polynomial division expects r(x) size to be similar to numerator(x)";
|
||||
ICICLE_ASSERT(q_size >= (numerator_deg - denumerator_deg + 1))
|
||||
<< "polynomial division expects q(x) size to be at least deg(numerator)-deg(denumerator)+1";
|
||||
|
||||
ICICLE_CHECK(icicle_copy_async(r_out, numerator, (1 + numerator_deg) * sizeof(T), config.stream));
|
||||
|
||||
// invert largest coeff of b
|
||||
const T& lc_b_inv = T::inverse(denumerator[denumerator_deg]);
|
||||
|
||||
int64_t deg_r = numerator_deg;
|
||||
while (deg_r >= denumerator_deg) {
|
||||
// each iteration is removing the largest monomial in r until deg(r)<deg(b)
|
||||
school_book_division_step_cpu(r_out, q_out, denumerator, deg_r, denumerator_deg, lc_b_inv);
|
||||
|
||||
// compute degree of r
|
||||
auto degree_config = default_vec_ops_config();
|
||||
cpu_highest_non_zero_idx(device, r_out, deg_r + 1 /*size of R*/, degree_config, °_r);
|
||||
}
|
||||
|
||||
return eIcicleError::SUCCESS;
|
||||
}
|
||||
|
||||
REGISTER_POLYNOMIAL_DIVISION("CPU", cpu_poly_divide<scalar_t>);
|
||||
@@ -0,0 +1,39 @@
|
||||
|
||||
#include "icicle/backend/polynomial_backend.h"
|
||||
#include "icicle/polynomials/default_backend/default_poly_context.h"
|
||||
#include "icicle/polynomials/default_backend/default_poly_backend.h"
|
||||
|
||||
namespace polynomials {
|
||||
|
||||
using icicle::DefaultPolynomialBackend;
|
||||
using icicle::DefaultPolynomialContext;
|
||||
|
||||
/*============================== Polynomial CPU-factory ==============================*/
|
||||
|
||||
template <typename C = scalar_t, typename D = C, typename I = C>
|
||||
class CPUPolynomialFactory : public AbstractPolynomialFactory<C, D, I>
|
||||
{
|
||||
public:
|
||||
CPUPolynomialFactory() {}
|
||||
~CPUPolynomialFactory() {}
|
||||
std::shared_ptr<IPolynomialContext<C, D, I>> create_context() override;
|
||||
std::shared_ptr<IPolynomialBackend<C, D, I>> create_backend() override;
|
||||
};
|
||||
|
||||
template <typename C, typename D, typename I>
|
||||
std::shared_ptr<IPolynomialContext<C, D, I>> CPUPolynomialFactory<C, D, I>::create_context()
|
||||
{
|
||||
return std::make_shared<DefaultPolynomialContext<C, D, I>>(nullptr);
|
||||
}
|
||||
|
||||
template <typename C, typename D, typename I>
|
||||
std::shared_ptr<IPolynomialBackend<C, D, I>> CPUPolynomialFactory<C, D, I>::create_backend()
|
||||
{
|
||||
return std::make_shared<DefaultPolynomialBackend<C, D, I>>(nullptr);
|
||||
}
|
||||
|
||||
/************************************** BACKEND REGISTRATION **************************************/
|
||||
|
||||
REGISTER_SCALAR_POLYNOMIAL_FACTORY_BACKEND("CPU", CPUPolynomialFactory<scalar_t>)
|
||||
|
||||
} // namespace polynomials
|
||||
49
icicle_v3/cmake/curve.cmake
Normal file
49
icicle_v3/cmake/curve.cmake
Normal file
@@ -0,0 +1,49 @@
|
||||
function(check_curve)
|
||||
set(SUPPORTED_CURVES bn254;bls12_381;bls12_377;bw6_761;grumpkin)
|
||||
|
||||
set(IS_CURVE_SUPPORTED FALSE)
|
||||
set(I 0)
|
||||
foreach (SUPPORTED_CURVE ${SUPPORTED_CURVES})
|
||||
math(EXPR I "${I} + 1")
|
||||
if (CURVE STREQUAL SUPPORTED_CURVE)
|
||||
add_compile_definitions(FIELD_ID=${I})
|
||||
add_compile_definitions(CURVE_ID=${I})
|
||||
set(IS_CURVE_SUPPORTED TRUE)
|
||||
endif ()
|
||||
endforeach()
|
||||
|
||||
if (NOT IS_CURVE_SUPPORTED)
|
||||
message( FATAL_ERROR "The value of CURVE variable: ${CURVE} is not one of the supported curves: ${SUPPORTED_CURVES}" )
|
||||
endif ()
|
||||
endfunction()
|
||||
|
||||
function(setup_curve_target)
|
||||
set(FIELD ${CURVE})
|
||||
setup_field_target()
|
||||
|
||||
add_library(icicle_curve SHARED
|
||||
src/msm.cpp
|
||||
src/curves/ffi_extern.cpp
|
||||
src/curves/montgomery_conversion.cpp
|
||||
)
|
||||
target_link_libraries(icicle_curve PUBLIC icicle_device icicle_field)
|
||||
set_target_properties(icicle_curve PROPERTIES OUTPUT_NAME "icicle_curve_${CURVE}")
|
||||
|
||||
# Make sure CURVE is defined in the cache for backends to see
|
||||
set(CURVE "${CURVE}" CACHE STRING "")
|
||||
target_compile_definitions(icicle_curve PUBLIC CURVE=${CURVE})
|
||||
if (G2)
|
||||
set(G2 "${G2}" CACHE STRING "")
|
||||
target_compile_definitions(icicle_curve PUBLIC G2=${G2})
|
||||
endif()
|
||||
if (ECNTT)
|
||||
target_sources(icicle_curve PRIVATE src/ecntt.cpp)
|
||||
set(ECNTT "${ECNTT}" CACHE STRING "")
|
||||
target_compile_definitions(icicle_curve PUBLIC ECNTT=${ECNTT})
|
||||
endif()
|
||||
|
||||
install(TARGETS icicle_curve
|
||||
RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/lib"
|
||||
LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/lib"
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_PREFIX}/lib")
|
||||
endfunction()
|
||||
59
icicle_v3/cmake/field.cmake
Normal file
59
icicle_v3/cmake/field.cmake
Normal file
@@ -0,0 +1,59 @@
|
||||
function(check_field)
|
||||
set(SUPPORTED_FIELDS babybear;stark252)
|
||||
|
||||
set(IS_FIELD_SUPPORTED FALSE)
|
||||
set(I 1000)
|
||||
foreach (SUPPORTED_FIELD ${SUPPORTED_FIELDS})
|
||||
math(EXPR I "${I} + 1")
|
||||
if (FIELD STREQUAL SUPPORTED_FIELD)
|
||||
add_compile_definitions(FIELD_ID=${I})
|
||||
set(IS_FIELD_SUPPORTED TRUE)
|
||||
endif ()
|
||||
endforeach()
|
||||
|
||||
if (NOT IS_FIELD_SUPPORTED)
|
||||
message( FATAL_ERROR "The value of FIELD variable: ${FIELD} is not one of the supported fields: ${SUPPORTED_FIELDS}" )
|
||||
endif ()
|
||||
endfunction()
|
||||
|
||||
function(setup_field_target)
|
||||
add_library(icicle_field SHARED
|
||||
src/fields/ffi_extern.cpp
|
||||
src/vec_ops.cpp
|
||||
src/matrix_ops.cpp
|
||||
)
|
||||
# handle APIs that are for some curves only
|
||||
add_ntt_sources_or_disable()
|
||||
set_target_properties(icicle_field PROPERTIES OUTPUT_NAME "icicle_field_${FIELD}")
|
||||
target_link_libraries(icicle_field PUBLIC icicle_device)
|
||||
|
||||
# Make sure FIELD is defined in the cache for backends to see
|
||||
set(FIELD "${FIELD}" CACHE STRING "")
|
||||
target_compile_definitions(icicle_field PUBLIC FIELD=${FIELD})
|
||||
if (EXT_FIELD)
|
||||
set(EXT_FIELD "${EXT_FIELD}" CACHE STRING "")
|
||||
target_compile_definitions(icicle_field PUBLIC EXT_FIELD=${EXT_FIELD})
|
||||
endif()
|
||||
|
||||
install(TARGETS icicle_field
|
||||
RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/"
|
||||
LIBRARY DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/"
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_PREFIX}/lib/")
|
||||
endfunction()
|
||||
|
||||
function(add_ntt_sources_or_disable)
|
||||
set(SUPPORTED_FIELDS_WITHOUT_NTT grumpkin)
|
||||
|
||||
if (NOT FIELD IN_LIST SUPPORTED_FIELDS_WITHOUT_NTT)
|
||||
add_compile_definitions(NTT_ENABLED)
|
||||
target_sources(icicle_field PRIVATE
|
||||
src/ntt.cpp
|
||||
src/polynomials/polynomials.cpp
|
||||
src/polynomials/polynomials_c_api.cpp
|
||||
src/polynomials/polynomials_abstract_factory.cpp
|
||||
)
|
||||
else()
|
||||
set(NTT OFF CACHE BOOL "NTT not available for field" FORCE)
|
||||
endif()
|
||||
|
||||
endfunction()
|
||||
110
icicle_v3/include/icicle/api/babybear.h
Normal file
110
icicle_v3/include/icicle/api/babybear.h
Normal file
@@ -0,0 +1,110 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "icicle/fields/stark_fields/babybear.h"
|
||||
#include "icicle/ntt.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
extern "C" void babybear_extension_generate_scalars(babybear::extension_t* scalars, int size);
|
||||
|
||||
extern "C" eIcicleError babybear_extension_scalar_convert_montgomery(
|
||||
const babybear::extension_t* input,
|
||||
uint64_t size,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
babybear::extension_t* output);
|
||||
|
||||
extern "C" eIcicleError babybear_ntt_init_domain(babybear::scalar_t* primitive_root, const NTTInitDomainConfig& config);
|
||||
|
||||
extern "C" eIcicleError babybear_ntt(
|
||||
const babybear::scalar_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<babybear::scalar_t>& config,
|
||||
babybear::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError babybear_ntt_release_domain();
|
||||
|
||||
extern "C" eIcicleError babybear_extension_vector_mul(
|
||||
const babybear::extension_t* vec_a,
|
||||
const babybear::extension_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
babybear::extension_t* result);
|
||||
|
||||
extern "C" eIcicleError babybearextension_vector_add(
|
||||
const babybear::extension_t* vec_a,
|
||||
const babybear::extension_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
babybear::extension_t* result);
|
||||
|
||||
// extern "C" eIcicleError babybear_extension_accumulate_cuda(
|
||||
// const babybear::extension_t* vec_a, const babybear::extension_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError babybear_extension_vector_sub(
|
||||
const babybear::extension_t* vec_a,
|
||||
const babybear::extension_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
babybear::extension_t* result);
|
||||
|
||||
extern "C" eIcicleError babybear_extension_transpose_matrix(
|
||||
const babybear::extension_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
babybear::extension_t* output);
|
||||
|
||||
extern "C" eIcicleError babybear_extension_bit_reverse(
|
||||
const babybear::extension_t* input, uint64_t n, const VecOpsConfig& config, babybear::extension_t* output);
|
||||
|
||||
extern "C" eIcicleError babybear_extension_ntt(
|
||||
const babybear::extension_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<babybear::scalar_t>& config,
|
||||
babybear::extension_t* output);
|
||||
|
||||
extern "C" void babybear_generate_scalars(babybear::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void babybear_scalar_convert_montgomery(
|
||||
const babybear::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, babybear::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError babybear_vector_mul(
|
||||
const babybear::scalar_t* vec_a,
|
||||
const babybear::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
babybear::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError babybearvector_add(
|
||||
const babybear::scalar_t* vec_a,
|
||||
const babybear::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
babybear::scalar_t* result);
|
||||
|
||||
// extern "C" eIcicleError babybear_accumulate_cuda(
|
||||
// const babybear::scalar_t* vec_a, const babybear::scalar_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError babybear_vector_sub(
|
||||
const babybear::scalar_t* vec_a,
|
||||
const babybear::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
babybear::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError babybear_transpose_matrix(
|
||||
const babybear::scalar_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
babybear::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError babybear_bit_reverse(
|
||||
const babybear::scalar_t* input, uint64_t n, const VecOpsConfig& config, babybear::scalar_t* output);
|
||||
133
icicle_v3/include/icicle/api/bls12_377.h
Normal file
133
icicle_v3/include/icicle/api/bls12_377.h
Normal file
@@ -0,0 +1,133 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "icicle/curves/params/bls12_377.h"
|
||||
#include "icicle/ntt.h"
|
||||
#include "icicle/msm.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
extern "C" eIcicleError bls12_377_g2_precompute_msm_bases(
|
||||
const bls12_377::g2_affine_t* bases, int nof_bases, const MSMConfig& config, bls12_377::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bls12_377_g2_msm(
|
||||
const bls12_377::scalar_t* scalars,
|
||||
const bls12_377::g2_affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bls12_377::g2_projective_t* out);
|
||||
|
||||
extern "C" bool bls12_377_g2_eq(bls12_377::g2_projective_t* point1, bls12_377::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_377_g2_to_affine(bls12_377::g2_projective_t* point, bls12_377::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_projective_points(bls12_377::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_377_g2_generate_affine_points(bls12_377::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bls12_377_g2_affine_convert_montgomery(
|
||||
const bls12_377::g2_affine_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::g2_affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_377_g2_projective_convert_montgomery(
|
||||
const bls12_377::g2_projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::g2_projective_t* output);
|
||||
|
||||
extern "C" bool bls12_377_eq(bls12_377::projective_t* point1, bls12_377::projective_t* point2);
|
||||
|
||||
extern "C" void bls12_377_to_affine(bls12_377::projective_t* point, bls12_377::affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_377_generate_projective_points(bls12_377::projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_377_generate_affine_points(bls12_377::affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bls12_377_affine_convert_montgomery(
|
||||
const bls12_377::affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, bls12_377::affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_377_projective_convert_montgomery(
|
||||
const bls12_377::projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_377_precompute_msm_bases(
|
||||
const bls12_377::affine_t* bases, int nof_bases, const MSMConfig& config, bls12_377::affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bls12_377_msm(
|
||||
const bls12_377::scalar_t* scalars,
|
||||
const bls12_377::affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bls12_377::projective_t* out);
|
||||
|
||||
extern "C" eIcicleError bls12_377_ecntt(
|
||||
const bls12_377::projective_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<bls12_377::scalar_t>& config,
|
||||
bls12_377::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError
|
||||
bls12_377_ntt_init_domain(bls12_377::scalar_t* primitive_root, const NTTInitDomainConfig& config);
|
||||
|
||||
extern "C" eIcicleError bls12_377_ntt(
|
||||
const bls12_377::scalar_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<bls12_377::scalar_t>& config,
|
||||
bls12_377::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_377_ntt_release_domain();
|
||||
|
||||
extern "C" void bls12_377_generate_scalars(bls12_377::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void bls12_377_scalar_convert_montgomery(
|
||||
const bls12_377::scalar_t* input,
|
||||
uint64_t size,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_377_vector_mul(
|
||||
const bls12_377::scalar_t* vec_a,
|
||||
const bls12_377::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bls12_377vector_add(
|
||||
const bls12_377::scalar_t* vec_a,
|
||||
const bls12_377::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::scalar_t* result);
|
||||
|
||||
// extern "C" eIcicleError bls12_377_accumulate_cuda(
|
||||
// const bls12_377::scalar_t* vec_a, const bls12_377::scalar_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError bls12_377_vector_sub(
|
||||
const bls12_377::scalar_t* vec_a,
|
||||
const bls12_377::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bls12_377_transpose_matrix(
|
||||
const bls12_377::scalar_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
bls12_377::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_377_bit_reverse(
|
||||
const bls12_377::scalar_t* input, uint64_t n, const VecOpsConfig& config, bls12_377::scalar_t* output);
|
||||
133
icicle_v3/include/icicle/api/bls12_381.h
Normal file
133
icicle_v3/include/icicle/api/bls12_381.h
Normal file
@@ -0,0 +1,133 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "icicle/curves/params/bls12_381.h"
|
||||
#include "icicle/ntt.h"
|
||||
#include "icicle/msm.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
extern "C" eIcicleError bls12_381_g2_precompute_msm_bases(
|
||||
const bls12_381::g2_affine_t* bases, int nof_bases, const MSMConfig& config, bls12_381::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bls12_381_g2_msm(
|
||||
const bls12_381::scalar_t* scalars,
|
||||
const bls12_381::g2_affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bls12_381::g2_projective_t* out);
|
||||
|
||||
extern "C" bool bls12_381_g2_eq(bls12_381::g2_projective_t* point1, bls12_381::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bls12_381_g2_to_affine(bls12_381::g2_projective_t* point, bls12_381::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_projective_points(bls12_381::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_381_g2_generate_affine_points(bls12_381::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bls12_381_g2_affine_convert_montgomery(
|
||||
const bls12_381::g2_affine_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::g2_affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_381_g2_projective_convert_montgomery(
|
||||
const bls12_381::g2_projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::g2_projective_t* output);
|
||||
|
||||
extern "C" bool bls12_381_eq(bls12_381::projective_t* point1, bls12_381::projective_t* point2);
|
||||
|
||||
extern "C" void bls12_381_to_affine(bls12_381::projective_t* point, bls12_381::affine_t* point_out);
|
||||
|
||||
extern "C" void bls12_381_generate_projective_points(bls12_381::projective_t* points, int size);
|
||||
|
||||
extern "C" void bls12_381_generate_affine_points(bls12_381::affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bls12_381_affine_convert_montgomery(
|
||||
const bls12_381::affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, bls12_381::affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_381_projective_convert_montgomery(
|
||||
const bls12_381::projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_381_precompute_msm_bases(
|
||||
const bls12_381::affine_t* bases, int nof_bases, const MSMConfig& config, bls12_381::affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bls12_381_msm(
|
||||
const bls12_381::scalar_t* scalars,
|
||||
const bls12_381::affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bls12_381::projective_t* out);
|
||||
|
||||
extern "C" eIcicleError bls12_381_ecntt(
|
||||
const bls12_381::projective_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<bls12_381::scalar_t>& config,
|
||||
bls12_381::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError
|
||||
bls12_381_ntt_init_domain(bls12_381::scalar_t* primitive_root, const NTTInitDomainConfig& config);
|
||||
|
||||
extern "C" eIcicleError bls12_381_ntt(
|
||||
const bls12_381::scalar_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<bls12_381::scalar_t>& config,
|
||||
bls12_381::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_381_ntt_release_domain();
|
||||
|
||||
extern "C" void bls12_381_generate_scalars(bls12_381::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void bls12_381_scalar_convert_montgomery(
|
||||
const bls12_381::scalar_t* input,
|
||||
uint64_t size,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_381_vector_mul(
|
||||
const bls12_381::scalar_t* vec_a,
|
||||
const bls12_381::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bls12_381vector_add(
|
||||
const bls12_381::scalar_t* vec_a,
|
||||
const bls12_381::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::scalar_t* result);
|
||||
|
||||
// extern "C" eIcicleError bls12_381_accumulate_cuda(
|
||||
// const bls12_381::scalar_t* vec_a, const bls12_381::scalar_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError bls12_381_vector_sub(
|
||||
const bls12_381::scalar_t* vec_a,
|
||||
const bls12_381::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bls12_381_transpose_matrix(
|
||||
const bls12_381::scalar_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
bls12_381::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bls12_381_bit_reverse(
|
||||
const bls12_381::scalar_t* input, uint64_t n, const VecOpsConfig& config, bls12_381::scalar_t* output);
|
||||
116
icicle_v3/include/icicle/api/bn254.h
Normal file
116
icicle_v3/include/icicle/api/bn254.h
Normal file
@@ -0,0 +1,116 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "icicle/curves/params/bn254.h"
|
||||
#include "icicle/ntt.h"
|
||||
#include "icicle/msm.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
extern "C" eIcicleError bn254_g2_precompute_msm_bases(
|
||||
const bn254::g2_affine_t* bases, int nof_bases, const MSMConfig& config, bn254::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bn254_g2_msm(
|
||||
const bn254::scalar_t* scalars,
|
||||
const bn254::g2_affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bn254::g2_projective_t* out);
|
||||
|
||||
extern "C" bool bn254_g2_eq(bn254::g2_projective_t* point1, bn254::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bn254_g2_to_affine(bn254::g2_projective_t* point, bn254::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bn254_g2_generate_projective_points(bn254::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bn254_g2_generate_affine_points(bn254::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bn254_g2_affine_convert_montgomery(
|
||||
const bn254::g2_affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, bn254::g2_affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bn254_g2_projective_convert_montgomery(
|
||||
const bn254::g2_projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bn254::g2_projective_t* output);
|
||||
|
||||
extern "C" bool bn254_eq(bn254::projective_t* point1, bn254::projective_t* point2);
|
||||
|
||||
extern "C" void bn254_to_affine(bn254::projective_t* point, bn254::affine_t* point_out);
|
||||
|
||||
extern "C" void bn254_generate_projective_points(bn254::projective_t* points, int size);
|
||||
|
||||
extern "C" void bn254_generate_affine_points(bn254::affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bn254_affine_convert_montgomery(
|
||||
const bn254::affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, bn254::affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bn254_projective_convert_montgomery(
|
||||
const bn254::projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, bn254::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError bn254_precompute_msm_bases(
|
||||
const bn254::affine_t* bases, int nof_bases, const MSMConfig& config, bn254::affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bn254_msm(
|
||||
const bn254::scalar_t* scalars,
|
||||
const bn254::affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bn254::projective_t* out);
|
||||
|
||||
extern "C" eIcicleError bn254_ecntt(
|
||||
const bn254::projective_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<bn254::scalar_t>& config,
|
||||
bn254::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError bn254_ntt_init_domain(bn254::scalar_t* primitive_root, const NTTInitDomainConfig& config);
|
||||
|
||||
extern "C" eIcicleError bn254_ntt(
|
||||
const bn254::scalar_t* input, int size, NTTDir dir, NTTConfig<bn254::scalar_t>& config, bn254::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bn254_ntt_release_domain();
|
||||
|
||||
extern "C" void bn254_generate_scalars(bn254::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void bn254_scalar_convert_montgomery(
|
||||
const bn254::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, bn254::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bn254_vector_mul(
|
||||
const bn254::scalar_t* vec_a,
|
||||
const bn254::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bn254::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bn254vector_add(
|
||||
const bn254::scalar_t* vec_a,
|
||||
const bn254::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bn254::scalar_t* result);
|
||||
|
||||
// extern "C" eIcicleError bn254_accumulate_cuda(
|
||||
// const bn254::scalar_t* vec_a, const bn254::scalar_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError bn254_vector_sub(
|
||||
const bn254::scalar_t* vec_a,
|
||||
const bn254::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bn254::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bn254_transpose_matrix(
|
||||
const bn254::scalar_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
bn254::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError
|
||||
bn254_bit_reverse(const bn254::scalar_t* input, uint64_t n, const VecOpsConfig& config, bn254::scalar_t* output);
|
||||
124
icicle_v3/include/icicle/api/bw6_761.h
Normal file
124
icicle_v3/include/icicle/api/bw6_761.h
Normal file
@@ -0,0 +1,124 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "icicle/curves/params/bw6_761.h"
|
||||
#include "icicle/ntt.h"
|
||||
#include "icicle/msm.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
extern "C" eIcicleError bw6_761_g2_precompute_msm_bases(
|
||||
const bw6_761::g2_affine_t* bases, int nof_bases, const MSMConfig& config, bw6_761::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bw6_761_g2_msm(
|
||||
const bw6_761::scalar_t* scalars,
|
||||
const bw6_761::g2_affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bw6_761::g2_projective_t* out);
|
||||
|
||||
extern "C" bool bw6_761_g2_eq(bw6_761::g2_projective_t* point1, bw6_761::g2_projective_t* point2);
|
||||
|
||||
extern "C" void bw6_761_g2_to_affine(bw6_761::g2_projective_t* point, bw6_761::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_projective_points(bw6_761::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void bw6_761_g2_generate_affine_points(bw6_761::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bw6_761_g2_affine_convert_montgomery(
|
||||
const bw6_761::g2_affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, bw6_761::g2_affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bw6_761_g2_projective_convert_montgomery(
|
||||
const bw6_761::g2_projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bw6_761::g2_projective_t* output);
|
||||
|
||||
extern "C" bool bw6_761_eq(bw6_761::projective_t* point1, bw6_761::projective_t* point2);
|
||||
|
||||
extern "C" void bw6_761_to_affine(bw6_761::projective_t* point, bw6_761::affine_t* point_out);
|
||||
|
||||
extern "C" void bw6_761_generate_projective_points(bw6_761::projective_t* points, int size);
|
||||
|
||||
extern "C" void bw6_761_generate_affine_points(bw6_761::affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError bw6_761_affine_convert_montgomery(
|
||||
const bw6_761::affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, bw6_761::affine_t* output);
|
||||
|
||||
extern "C" eIcicleError bw6_761_projective_convert_montgomery(
|
||||
const bw6_761::projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
bw6_761::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError bw6_761_precompute_msm_bases(
|
||||
const bw6_761::affine_t* bases, int nof_bases, const MSMConfig& config, bw6_761::affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError bw6_761_msm(
|
||||
const bw6_761::scalar_t* scalars,
|
||||
const bw6_761::affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
bw6_761::projective_t* out);
|
||||
|
||||
extern "C" eIcicleError bw6_761_ecntt(
|
||||
const bw6_761::projective_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<bw6_761::scalar_t>& config,
|
||||
bw6_761::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError bw6_761_ntt_init_domain(bw6_761::scalar_t* primitive_root, const NTTInitDomainConfig& config);
|
||||
|
||||
extern "C" eIcicleError bw6_761_ntt(
|
||||
const bw6_761::scalar_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<bw6_761::scalar_t>& config,
|
||||
bw6_761::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bw6_761_ntt_release_domain();
|
||||
|
||||
extern "C" void bw6_761_generate_scalars(bw6_761::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void bw6_761_scalar_convert_montgomery(
|
||||
const bw6_761::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, bw6_761::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError bw6_761_vector_mul(
|
||||
const bw6_761::scalar_t* vec_a,
|
||||
const bw6_761::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bw6_761::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bw6_761vector_add(
|
||||
const bw6_761::scalar_t* vec_a,
|
||||
const bw6_761::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bw6_761::scalar_t* result);
|
||||
|
||||
// extern "C" eIcicleError bw6_761_accumulate_cuda(
|
||||
// const bw6_761::scalar_t* vec_a, const bw6_761::scalar_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError bw6_761_vector_sub(
|
||||
const bw6_761::scalar_t* vec_a,
|
||||
const bw6_761::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
bw6_761::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError bw6_761_transpose_matrix(
|
||||
const bw6_761::scalar_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
bw6_761::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError
|
||||
bw6_761_bit_reverse(const bw6_761::scalar_t* input, uint64_t n, const VecOpsConfig& config, bw6_761::scalar_t* output);
|
||||
77
icicle_v3/include/icicle/api/grumpkin.h
Normal file
77
icicle_v3/include/icicle/api/grumpkin.h
Normal file
@@ -0,0 +1,77 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "icicle/curves/params/grumpkin.h"
|
||||
#include "icicle/msm.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
extern "C" bool grumpkin_eq(grumpkin::projective_t* point1, grumpkin::projective_t* point2);
|
||||
|
||||
extern "C" void grumpkin_to_affine(grumpkin::projective_t* point, grumpkin::affine_t* point_out);
|
||||
|
||||
extern "C" void grumpkin_generate_projective_points(grumpkin::projective_t* points, int size);
|
||||
|
||||
extern "C" void grumpkin_generate_affine_points(grumpkin::affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError grumpkin_affine_convert_montgomery(
|
||||
const grumpkin::affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, grumpkin::affine_t* output);
|
||||
|
||||
extern "C" eIcicleError grumpkin_projective_convert_montgomery(
|
||||
const grumpkin::projective_t* input,
|
||||
size_t n,
|
||||
bool is_into,
|
||||
const VecOpsConfig& config,
|
||||
grumpkin::projective_t* output);
|
||||
|
||||
extern "C" eIcicleError grumpkin_precompute_msm_bases(
|
||||
const grumpkin::affine_t* bases, int nof_bases, const MSMConfig& config, grumpkin::affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError grumpkin_msm(
|
||||
const grumpkin::scalar_t* scalars,
|
||||
const grumpkin::affine_t* points,
|
||||
int msm_size,
|
||||
const MSMConfig& config,
|
||||
grumpkin::projective_t* out);
|
||||
|
||||
extern "C" void grumpkin_generate_scalars(grumpkin::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void grumpkin_scalar_convert_montgomery(
|
||||
const grumpkin::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, grumpkin::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError grumpkin_vector_mul(
|
||||
const grumpkin::scalar_t* vec_a,
|
||||
const grumpkin::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
grumpkin::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError grumpkinvector_add(
|
||||
const grumpkin::scalar_t* vec_a,
|
||||
const grumpkin::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
grumpkin::scalar_t* result);
|
||||
|
||||
// extern "C" eIcicleError grumpkin_accumulate_cuda(
|
||||
// const grumpkin::scalar_t* vec_a, const grumpkin::scalar_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError grumpkin_vector_sub(
|
||||
const grumpkin::scalar_t* vec_a,
|
||||
const grumpkin::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
grumpkin::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError grumpkin_transpose_matrix(
|
||||
const grumpkin::scalar_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
grumpkin::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError grumpkin_bit_reverse(
|
||||
const grumpkin::scalar_t* input, uint64_t n, const VecOpsConfig& config, grumpkin::scalar_t* output);
|
||||
60
icicle_v3/include/icicle/api/stark252.h
Normal file
60
icicle_v3/include/icicle/api/stark252.h
Normal file
@@ -0,0 +1,60 @@
|
||||
// WARNING: This file is auto-generated by a script.
|
||||
// Any changes made to this file may be overwritten.
|
||||
// Please modify the code generation script instead.
|
||||
// Path to the code generation script: scripts/gen_c_api.py
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "icicle/fields/stark_fields/stark252.h"
|
||||
#include "icicle/ntt.h"
|
||||
#include "icicle/vec_ops.h"
|
||||
|
||||
extern "C" eIcicleError stark252_ntt_init_domain(stark252::scalar_t* primitive_root, const NTTInitDomainConfig& config);
|
||||
|
||||
extern "C" eIcicleError stark252_ntt(
|
||||
const stark252::scalar_t* input,
|
||||
int size,
|
||||
NTTDir dir,
|
||||
NTTConfig<stark252::scalar_t>& config,
|
||||
stark252::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError stark252_ntt_release_domain();
|
||||
|
||||
extern "C" void stark252_generate_scalars(stark252::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void stark252_scalar_convert_montgomery(
|
||||
const stark252::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, stark252::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError stark252_vector_mul(
|
||||
const stark252::scalar_t* vec_a,
|
||||
const stark252::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
stark252::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError stark252vector_add(
|
||||
const stark252::scalar_t* vec_a,
|
||||
const stark252::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
stark252::scalar_t* result);
|
||||
|
||||
// extern "C" eIcicleError stark252_accumulate_cuda(
|
||||
// const stark252::scalar_t* vec_a, const stark252::scalar_t* vec_b, uint64_t n, const VecOpsConfig& config);
|
||||
|
||||
extern "C" eIcicleError stark252_vector_sub(
|
||||
const stark252::scalar_t* vec_a,
|
||||
const stark252::scalar_t* vec_b,
|
||||
uint64_t n,
|
||||
const VecOpsConfig& config,
|
||||
stark252::scalar_t* result);
|
||||
|
||||
extern "C" eIcicleError stark252_transpose_matrix(
|
||||
const stark252::scalar_t* input,
|
||||
uint32_t nof_rows,
|
||||
uint32_t nof_cols,
|
||||
const VecOpsConfig& config,
|
||||
stark252::scalar_t* output);
|
||||
|
||||
extern "C" eIcicleError stark252_bit_reverse(
|
||||
const stark252::scalar_t* input, uint64_t n, const VecOpsConfig& config, stark252::scalar_t* output);
|
||||
13
icicle_v3/include/icicle/api/templates/curves/curve.template
Normal file
13
icicle_v3/include/icicle/api/templates/curves/curve.template
Normal file
@@ -0,0 +1,13 @@
|
||||
extern "C" bool ${CURVE}_eq(${CURVE}::projective_t* point1, ${CURVE}::projective_t* point2);
|
||||
|
||||
extern "C" void ${CURVE}_to_affine(${CURVE}::projective_t* point, ${CURVE}::affine_t* point_out);
|
||||
|
||||
extern "C" void ${CURVE}_generate_projective_points(${CURVE}::projective_t* points, int size);
|
||||
|
||||
extern "C" void ${CURVE}_generate_affine_points(${CURVE}::affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError ${CURVE}_affine_convert_montgomery(
|
||||
const ${CURVE}::affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, ${CURVE}::affine_t* output);
|
||||
|
||||
extern "C" eIcicleError ${CURVE}_projective_convert_montgomery(
|
||||
const ${CURVE}::projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, ${CURVE}::projective_t* output);
|
||||
@@ -0,0 +1,13 @@
|
||||
extern "C" bool ${CURVE}_g2_eq(${CURVE}::g2_projective_t* point1, ${CURVE}::g2_projective_t* point2);
|
||||
|
||||
extern "C" void ${CURVE}_g2_to_affine(${CURVE}::g2_projective_t* point, ${CURVE}::g2_affine_t* point_out);
|
||||
|
||||
extern "C" void ${CURVE}_g2_generate_projective_points(${CURVE}::g2_projective_t* points, int size);
|
||||
|
||||
extern "C" void ${CURVE}_g2_generate_affine_points(${CURVE}::g2_affine_t* points, int size);
|
||||
|
||||
extern "C" eIcicleError ${CURVE}_g2_affine_convert_montgomery(
|
||||
const ${CURVE}::g2_affine_t* input, size_t n, bool is_into, const VecOpsConfig& config, ${CURVE}::g2_affine_t* output);
|
||||
|
||||
extern "C" eIcicleError ${CURVE}_g2_projective_convert_montgomery(
|
||||
const ${CURVE}::g2_projective_t* input, size_t n, bool is_into, const VecOpsConfig& config, ${CURVE}::g2_projective_t* output);
|
||||
@@ -0,0 +1,2 @@
|
||||
extern "C" eIcicleError ${CURVE}_ecntt(
|
||||
const ${CURVE}::projective_t* input, int size, NTTDir dir, NTTConfig<${CURVE}::scalar_t>& config, ${CURVE}::projective_t* output);
|
||||
@@ -0,0 +1,8 @@
|
||||
extern "C" eIcicleError ${CURVE}_precompute_msm_bases(
|
||||
const ${CURVE}::affine_t* bases,
|
||||
int nof_bases,
|
||||
const MSMConfig& config,
|
||||
${CURVE}::affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError ${CURVE}_msm(
|
||||
const ${CURVE}::scalar_t* scalars, const ${CURVE}::affine_t* points, int msm_size, const MSMConfig& config, ${CURVE}::projective_t* out);
|
||||
@@ -0,0 +1,8 @@
|
||||
extern "C" eIcicleError ${CURVE}_g2_precompute_msm_bases(
|
||||
const ${CURVE}::g2_affine_t* bases,
|
||||
int nof_bases,
|
||||
const MSMConfig& config,
|
||||
${CURVE}::g2_affine_t* output_bases);
|
||||
|
||||
extern "C" eIcicleError ${CURVE}_g2_msm(
|
||||
const ${CURVE}::scalar_t* scalars, const ${CURVE}::g2_affine_t* points, int msm_size, const MSMConfig& config, ${CURVE}::g2_projective_t* out);
|
||||
@@ -0,0 +1,4 @@
|
||||
extern "C" void ${FIELD}_generate_scalars(${FIELD}::scalar_t* scalars, int size);
|
||||
|
||||
extern "C" void ${FIELD}_scalar_convert_montgomery(
|
||||
const ${FIELD}::scalar_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, ${FIELD}::scalar_t* output);
|
||||
@@ -0,0 +1,4 @@
|
||||
extern "C" void ${FIELD}_extension_generate_scalars(${FIELD}::extension_t* scalars, int size);
|
||||
|
||||
extern "C" eIcicleError ${FIELD}_extension_scalar_convert_montgomery(
|
||||
const ${FIELD}::extension_t* input, uint64_t size, bool is_into, const VecOpsConfig& config, ${FIELD}::extension_t* output);
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user