mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
194 Commits
pa/fix/com
...
release/1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e53f5b6cba | ||
|
|
09dbe6eef3 | ||
|
|
f9318b5167 | ||
|
|
1f5169a580 | ||
|
|
686885e890 | ||
|
|
7826a3b537 | ||
|
|
3cfa7499ae | ||
|
|
6295531229 | ||
|
|
6a3f037b55 | ||
|
|
4e02ec5299 | ||
|
|
c319a8b637 | ||
|
|
ca6b4d487c | ||
|
|
b0afa12d09 | ||
|
|
6cdab08cf1 | ||
|
|
357d25d09d | ||
|
|
e1e87a7cc5 | ||
|
|
9657480262 | ||
|
|
bb063dfc8f | ||
|
|
b6b32ba096 | ||
|
|
597ffdc8f3 | ||
|
|
9ed7adf5f5 | ||
|
|
69438d40a8 | ||
|
|
7354265c52 | ||
|
|
e8576ca2e1 | ||
|
|
413559e536 | ||
|
|
87e1b57803 | ||
|
|
7ae4f00805 | ||
|
|
ce56ea2078 | ||
|
|
f9795a6199 | ||
|
|
c768af8093 | ||
|
|
d0ce05027c | ||
|
|
1771a400bc | ||
|
|
bed95d26f6 | ||
|
|
9f2e8128e6 | ||
|
|
8a0bf69f11 | ||
|
|
20602453ce | ||
|
|
1e58803432 | ||
|
|
1e5fb00715 | ||
|
|
78fc99aa79 | ||
|
|
2c3cf3bfd3 | ||
|
|
6a252fc08b | ||
|
|
a037ca5618 | ||
|
|
1f381cf9da | ||
|
|
41a09317e7 | ||
|
|
6ad29e4540 | ||
|
|
009257b63e | ||
|
|
3d1c25888c | ||
|
|
e0d442922e | ||
|
|
8e6663e9fb | ||
|
|
0d722d167e | ||
|
|
21abcbdf4c | ||
|
|
8e30d5e538 | ||
|
|
d82a0a4b75 | ||
|
|
91dc4f44da | ||
|
|
9eb6d5afd1 | ||
|
|
ac4d36d6f6 | ||
|
|
6e158cd109 | ||
|
|
cdcf00af45 | ||
|
|
78638a24d2 | ||
|
|
84c12cca56 | ||
|
|
7d05a427a5 | ||
|
|
c7bc981f7f | ||
|
|
f7210c80a9 | ||
|
|
a11584f905 | ||
|
|
f326aaf2f2 | ||
|
|
3a63b96b77 | ||
|
|
bbf30e5227 | ||
|
|
9f5266a56d | ||
|
|
065f863ba2 | ||
|
|
93dd4cf61a | ||
|
|
3331ce2548 | ||
|
|
7e30816fe8 | ||
|
|
765d2b6dbe | ||
|
|
5207e55684 | ||
|
|
a4bd78912b | ||
|
|
c59fa4c479 | ||
|
|
bbcad438fc | ||
|
|
89016d7a07 | ||
|
|
d454e67b89 | ||
|
|
f1cf021d18 | ||
|
|
b1008824e2 | ||
|
|
4928b1354e | ||
|
|
7d2a296d4d | ||
|
|
11cbffb3f2 | ||
|
|
a7111014e8 | ||
|
|
7d3cdbf466 | ||
|
|
dc9afe1146 | ||
|
|
8287f59ebd | ||
|
|
9282dc49bf | ||
|
|
71d8bcff89 | ||
|
|
30319452a4 | ||
|
|
fb0e9a3b35 | ||
|
|
dbcbea78a5 | ||
|
|
c9b9fc52d8 | ||
|
|
f404e8f10d | ||
|
|
31de8d52d8 | ||
|
|
fd866d18fe | ||
|
|
56572d0223 | ||
|
|
f3e14dc311 | ||
|
|
1600f8c995 | ||
|
|
5258acc08f | ||
|
|
912af0e87e | ||
|
|
0fdda14495 | ||
|
|
9886256242 | ||
|
|
4b89766011 | ||
|
|
5d3b4438d5 | ||
|
|
e62710de12 | ||
|
|
a2f1825691 | ||
|
|
dcec10b0cf | ||
|
|
573ce0c803 | ||
|
|
459969e9d2 | ||
|
|
8dadb626f2 | ||
|
|
ba1235059a | ||
|
|
d53db210de | ||
|
|
2258aa0cbe | ||
|
|
54a7d4b57c | ||
|
|
18db93f8fa | ||
|
|
268b5892b7 | ||
|
|
a2beabf003 | ||
|
|
311d666042 | ||
|
|
464f4ef9cf | ||
|
|
f8e56c104e | ||
|
|
adfd8e8c86 | ||
|
|
473a4e383e | ||
|
|
fca0cca071 | ||
|
|
b7d33e6b3f | ||
|
|
b0d7bb9f95 | ||
|
|
396f30ff5d | ||
|
|
10b82141eb | ||
|
|
e6e7081c7c | ||
|
|
20421747ed | ||
|
|
59bb7ba35c | ||
|
|
80a1109260 | ||
|
|
54396370a1 | ||
|
|
3621d12c42 | ||
|
|
1f2e1537fa | ||
|
|
52a1191474 | ||
|
|
d06e8d1e87 | ||
|
|
863234d134 | ||
|
|
fcfb77a8c5 | ||
|
|
98d58ada7a | ||
|
|
8962d1f925 | ||
|
|
f7655cc749 | ||
|
|
371e8238db | ||
|
|
c1d534efa4 | ||
|
|
47589ea9a7 | ||
|
|
ce327b7b27 | ||
|
|
877d0234ac | ||
|
|
f457ac40e5 | ||
|
|
d9feb57b92 | ||
|
|
fa41fb3ad4 | ||
|
|
375a482d0b | ||
|
|
7e941b29c1 | ||
|
|
3897137a3f | ||
|
|
3988c85d6b | ||
|
|
c1bf43eac1 | ||
|
|
95863e1e36 | ||
|
|
a508f4cadc | ||
|
|
dad278cdd3 | ||
|
|
699e24f735 | ||
|
|
12ed899b34 | ||
|
|
8565b79a28 | ||
|
|
1d7f9f1152 | ||
|
|
3ecdd0d1bc | ||
|
|
14517ca111 | ||
|
|
a2eceabd82 | ||
|
|
968ab31f27 | ||
|
|
74d5a88f1b | ||
|
|
e18ce00f63 | ||
|
|
7ec8f901da | ||
|
|
610406ac27 | ||
|
|
4162ff5b64 | ||
|
|
efd06c5b43 | ||
|
|
bd2a488f13 | ||
|
|
9f48db2a90 | ||
|
|
f962716fa5 | ||
|
|
ec3f3a1b52 | ||
|
|
ab36f36116 | ||
|
|
06638c33d7 | ||
|
|
e583212e6d | ||
|
|
486ec9f053 | ||
|
|
0216e640bf | ||
|
|
d00224caa3 | ||
|
|
bd06971680 | ||
|
|
58688cd401 | ||
|
|
2757f7209a | ||
|
|
b38b119746 | ||
|
|
219c755a77 | ||
|
|
fc4abd5fb1 | ||
|
|
5de1445cbf | ||
|
|
6b21bff1e8 | ||
|
|
a1dc260fb2 | ||
|
|
5d9af12f6e | ||
|
|
32c93876d7 |
@@ -11,6 +11,8 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -51,7 +53,7 @@ jobs:
|
||||
name: Backward compatibility tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -62,14 +64,10 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Install git-lfs
|
||||
run: |
|
||||
sudo apt update && sudo apt -y install git-lfs
|
||||
|
||||
- name: Use specific data branch
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'data_PR') }}
|
||||
env:
|
||||
@@ -83,7 +81,23 @@ jobs:
|
||||
BRANCH="$(make backward_compat_branch)"
|
||||
echo "branch=${BRANCH}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Get backward compat branch head SHA
|
||||
id: backward_compat_sha
|
||||
env:
|
||||
REPO_URL: "https://github.com/zama-ai/tfhe-backward-compat-data"
|
||||
run: |
|
||||
SHA=$(git ls-remote ${{ env.REPO_URL }} refs/heads/${{ steps.backward_compat_branch.outputs.branch }} | awk '{print $1}')
|
||||
echo "sha=${SHA}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Retrieve data from cache
|
||||
id: retrieve-data-cache
|
||||
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
with:
|
||||
path: tests/tfhe-backward-compat-data
|
||||
key: ${{ steps.backward_compat_branch.outputs.branch }}_${{ steps.backward_compat_sha.outputs.sha }}
|
||||
|
||||
- name: Clone test data
|
||||
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
@@ -96,13 +110,26 @@ jobs:
|
||||
run: |
|
||||
make test_backward_compatibility_ci
|
||||
|
||||
- name: Store data in cache
|
||||
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
|
||||
continue-on-error: true
|
||||
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
with:
|
||||
path: tests/tfhe-backward-compat-data
|
||||
key: ${{ steps.backward_compat_branch.outputs.branch }}_${{ steps.backward_compat_sha.outputs.sha }}
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (backward-compat-tests)
|
||||
@@ -127,4 +154,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
21
.github/workflows/aws_tfhe_fast_tests.yml
vendored
21
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -63,7 +65,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
@@ -158,7 +160,7 @@ jobs:
|
||||
name: Fast CPU tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -169,7 +171,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -209,7 +211,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -222,7 +224,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -264,13 +266,18 @@ jobs:
|
||||
run: |
|
||||
make test_zk
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (fast-tests)
|
||||
@@ -295,4 +302,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
17
.github/workflows/aws_tfhe_integer_tests.yml
vendored
17
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -10,6 +10,8 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
@@ -50,7 +52,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
integer:
|
||||
@@ -98,7 +100,7 @@ jobs:
|
||||
name: Unsigned integer tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -109,7 +111,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -134,13 +136,18 @@ jobs:
|
||||
run: |
|
||||
AVX512_SUPPORT=ON NO_BIG_PARAMS=${{ env.NO_BIG_PARAMS }} BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_ci
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (unsigned-integer-tests)
|
||||
@@ -165,4 +172,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -10,6 +10,8 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
@@ -51,7 +53,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
integer:
|
||||
@@ -99,7 +101,7 @@ jobs:
|
||||
name: Signed integer tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -110,7 +112,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -139,13 +141,18 @@ jobs:
|
||||
run: |
|
||||
AVX512_SUPPORT=ON NO_BIG_PARAMS=${{ env.NO_BIG_PARAMS }} BIG_TESTS_INSTANCE=TRUE make test_signed_integer_ci
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (signed-integer-tests)
|
||||
@@ -170,4 +177,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
17
.github/workflows/aws_tfhe_tests.yml
vendored
17
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -10,7 +10,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -72,7 +74,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
@@ -169,7 +171,7 @@ jobs:
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}_${{github.event_name}}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -180,7 +182,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -246,13 +248,18 @@ jobs:
|
||||
make test_trivium
|
||||
make test_kreyvium
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
@@ -277,4 +284,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
19
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
19
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -10,6 +10,8 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -52,7 +54,7 @@ jobs:
|
||||
name: WASM tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -63,7 +65,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -73,7 +75,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -86,7 +88,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -115,13 +117,18 @@ jobs:
|
||||
run: |
|
||||
make test_zk_wasm_x86_compat_ci
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (wasm-tests)
|
||||
@@ -146,4 +153,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
6
.github/workflows/benchmark_boolean.yml
vendored
6
.github/workflows/benchmark_boolean.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -94,7 +94,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_boolean
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
7
.github/workflows/benchmark_core_crypto.yml
vendored
7
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
@@ -62,12 +62,13 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_ks_pbs
|
||||
make bench_pbs
|
||||
make bench_pbs128
|
||||
make bench_ks
|
||||
@@ -85,7 +86,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
6
.github/workflows/benchmark_erc20.yml
vendored
6
.github/workflows/benchmark_erc20.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: 720 # 12 hours
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -26,7 +26,9 @@ on:
|
||||
- integer_multi_bit
|
||||
- integer_compression
|
||||
- pbs
|
||||
- pbs128
|
||||
- ks
|
||||
- ks_pbs
|
||||
op_flavor:
|
||||
description: "Operations set to run"
|
||||
type: choice
|
||||
@@ -47,6 +49,14 @@ on:
|
||||
- latency
|
||||
- throughput
|
||||
- both
|
||||
params_type:
|
||||
description: "Parameters type"
|
||||
type: choice
|
||||
default: multi_bit
|
||||
options:
|
||||
- classical
|
||||
- multi_bit
|
||||
- both
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
@@ -68,12 +78,13 @@ jobs:
|
||||
run-benchmarks:
|
||||
name: Run benchmarks
|
||||
needs: parse-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
profile: ${{ needs.parse-inputs.outputs.profile }}
|
||||
hardware_name: ${{ needs.parse-inputs.outputs.hardware_name }}
|
||||
command: ${{ inputs.command }}
|
||||
op_flavor: ${{ inputs.op_flavor }}
|
||||
bench_type: ${{ inputs.bench_type }}
|
||||
params_type: ${{ inputs.params_type }}
|
||||
all_precisions: ${{ inputs.all_precisions }}
|
||||
secrets: inherit
|
||||
16
.github/workflows/benchmark_gpu_4090.yml
vendored
16
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs' ||
|
||||
contains(github.event.label.name, '4090_bench') }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}_cuda_integer_bench
|
||||
group: ${{ github.workflow_ref }}_cuda_integer_bench
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -80,7 +80,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_multi_bit_gpu_default
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -97,14 +97,14 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
cuda-core-crypto-benchmarks:
|
||||
name: Cuda core crypto benchmarks (RTX 4090)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
|
||||
needs: cuda-integer-benchmarks
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}_cuda_core_crypto_bench
|
||||
group: ${{ github.workflow_ref }}_cuda_core_crypto_bench
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -157,7 +157,7 @@ jobs:
|
||||
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Core crypto RTX 4090 full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
remove_github_label:
|
||||
name: Remove 4090 bench label
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Run integer benchmarks on CUDA instance and return parsed results to Slab CI bot.
|
||||
# Run benchmarks on CUDA instance and return parsed results to Slab CI bot.
|
||||
name: Cuda benchmarks - common
|
||||
|
||||
on:
|
||||
@@ -18,10 +18,13 @@ on:
|
||||
required: true
|
||||
op_flavor: # Use a comma separated values to generate an array
|
||||
type: string
|
||||
required: true
|
||||
default: default
|
||||
bench_type:
|
||||
type: string
|
||||
default: latency
|
||||
params_type:
|
||||
type: string
|
||||
default: multi_bit
|
||||
all_precisions:
|
||||
type: boolean
|
||||
default: false
|
||||
@@ -63,6 +66,7 @@ jobs:
|
||||
command: ${{ steps.set_command.outputs.command }}
|
||||
op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
|
||||
bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
|
||||
params_type: ${{ steps.set_params_type.outputs.params_type }}
|
||||
steps:
|
||||
- name: Set single command
|
||||
if: ${{ !contains(inputs.command, ',')}}
|
||||
@@ -94,6 +98,14 @@ jobs:
|
||||
echo "BENCH_TYPE=[\"${{ inputs.bench_type }}\"]" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
|
||||
- name: Set parameters types
|
||||
run: |
|
||||
if [[ "${{ inputs.params_type }}" == "both" ]]; then
|
||||
echo "PARAMS_TYPE=[\"classical\", \"multi_bit\"]" >> "${GITHUB_ENV}"
|
||||
else
|
||||
echo "PARAMS_TYPE=[\"${{ inputs.params_type }}\"]" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
|
||||
- name: Set command output
|
||||
id: set_command
|
||||
run: |
|
||||
@@ -109,15 +121,26 @@ jobs:
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Set parameters types output
|
||||
id: set_params_type
|
||||
run: |
|
||||
echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -127,6 +150,41 @@ jobs:
|
||||
backend: ${{ inputs.backend }}
|
||||
profile: ${{ inputs.profile }}
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' &&
|
||||
steps.start-remote-instance.outcome == 'failure' &&
|
||||
inputs.profile == 'single-h100'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# Install dependencies only once since cuda-benchmarks uses a matrix strategy, thus running multiple times.
|
||||
install-dependencies:
|
||||
name: Install dependencies
|
||||
needs: [ setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
matrix:
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- cuda: "12.2"
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
cuda-benchmarks:
|
||||
name: Cuda benchmarks (${{ inputs.profile }})
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
@@ -140,10 +198,10 @@ jobs:
|
||||
command: ${{ fromJSON(needs.prepare-matrix.outputs.command) }}
|
||||
op_flavor: ${{ fromJSON(needs.prepare-matrix.outputs.op_flavor) }}
|
||||
bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
|
||||
params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
- cuda: "12.2"
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
@@ -153,12 +211,6 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -167,8 +219,28 @@ jobs:
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
# Re-export environment variables as dependencies setup perform this task in the previous job.
|
||||
# Local env variables are cleaned at the end of each job.
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDA_MODULE_LOADER=EAGER" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Export gcc and g++ variables
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -179,7 +251,7 @@ jobs:
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} BENCH_TYPE=${{ matrix.bench_type }} bench_${{ matrix.command }}_gpu
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} BENCH_TYPE=${{ matrix.bench_type }} BENCH_PARAM_TYPE=${{ matrix.params_type }} bench_${{ matrix.command }}_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -196,9 +268,9 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}_${{ matrix.bench_type }}_${{ matrix.params_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
@@ -230,7 +302,7 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
151
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
151
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
@@ -1,151 +0,0 @@
|
||||
# Run core crypto benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Core crypto GPU benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-core-crypto-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-core-crypto-benchmarks:
|
||||
name: Execute GPU core crypto benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_pbs_gpu
|
||||
make bench_ks_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--name-suffix avx512 \
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
python3 slab/scripts/data_sender.py ${{ env.RESULTS_FILENAME }} "${{ secrets.JOB_SECRET }}" \
|
||||
--slab-url "${{ secrets.SLAB_URL }}"
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-core-crypto-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-core-crypto-benchmarks.result }}
|
||||
SLACK_MESSAGE: "PBS GPU benchmarks finished with status: ${{ needs.cuda-core-crypto-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
needs: [ setup-instance, cuda-core-crypto-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-core-crypto-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
28
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
28
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -50,10 +50,16 @@ jobs:
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -63,6 +69,15 @@ jobs:
|
||||
backend: ${{ inputs.backend }}
|
||||
profile: ${{ inputs.profile }}
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' &&
|
||||
steps.start-remote-instance.outcome == 'failure' &&
|
||||
inputs.profile == 'single-h100'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-erc20-benchmarks:
|
||||
name: Cuda ERC20 benchmarks (${{ inputs.profile }})
|
||||
needs: setup-instance
|
||||
@@ -84,6 +99,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -98,7 +114,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -120,7 +136,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20_${{ inputs.profile }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -154,7 +170,7 @@ jobs:
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
@@ -8,9 +8,9 @@ on:
|
||||
|
||||
jobs:
|
||||
run-benchmarks-1-h100:
|
||||
name: Run benchmarks (1xH100)
|
||||
name: Run integer benchmarks (1xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
profile: single-h100
|
||||
hardware_name: n3-H100x1
|
||||
@@ -21,9 +21,9 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-2-h100:
|
||||
name: Run benchmarks (2xH100)
|
||||
name: Run integer benchmarks (2xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
profile: 2-h100
|
||||
hardware_name: n3-H100x2
|
||||
@@ -34,9 +34,9 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-8-h100:
|
||||
name: Run benchmarks (8xH100)
|
||||
name: Run integer benchmarks (8xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
profile: multi-h100
|
||||
hardware_name: n3-H100x8
|
||||
@@ -47,9 +47,9 @@ jobs:
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-l40:
|
||||
name: Run benchmarks (L40)
|
||||
name: Run integer benchmarks (L40)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_integer_common.yml
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
profile: l40
|
||||
hardware_name: n3-L40x1
|
||||
@@ -58,3 +58,14 @@ jobs:
|
||||
bench_type: latency
|
||||
all_precisions: true
|
||||
secrets: inherit
|
||||
|
||||
run-benchmarks-1-h100-core-crypto:
|
||||
name: Run core-crypto benchmarks (1xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
profile: single-h100
|
||||
hardware_name: n3-H100x1
|
||||
command: pbs,pbs128,ks,ks_pbs
|
||||
bench_type: latency
|
||||
secrets: inherit
|
||||
6
.github/workflows/benchmark_integer.yml
vendored
6
.github/workflows/benchmark_integer.yml
vendored
@@ -104,7 +104,7 @@ jobs:
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
@@ -131,7 +131,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -172,7 +172,7 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
6
.github/workflows/benchmark_shortint.yml
vendored
6
.github/workflows/benchmark_shortint.yml
vendored
@@ -70,7 +70,7 @@ jobs:
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
@@ -94,7 +94,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -138,7 +138,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
@@ -131,7 +131,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -166,7 +166,7 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
4
.github/workflows/benchmark_tfhe_fft.yml
vendored
4
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
name: Execute FFT benchmarks in EC2
|
||||
needs: setup-ec2
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
4
.github/workflows/benchmark_tfhe_ntt.yml
vendored
4
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
name: Execute NTT benchmarks in EC2
|
||||
needs: setup-ec2
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
8
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
8
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -45,7 +45,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
@@ -80,7 +80,7 @@ jobs:
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
group: ${{ github.workflow_ref }}_${{github.event_name}}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -132,7 +132,7 @@ jobs:
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_tfhe_zk_pok
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
10
.github/workflows/benchmark_wasm_client.yml
vendored
10
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
wasm_bench:
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -123,7 +123,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -167,7 +167,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
8
.github/workflows/benchmark_zk_pke.yml
vendored
8
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -48,7 +48,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
@@ -118,7 +118,7 @@ jobs:
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
group: ${{ github.workflow_ref }}_${{github.event_name}}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -142,7 +142,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -179,7 +179,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_zk
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
63
.github/workflows/cargo_test_fft.yml
vendored
63
.github/workflows/cargo_test_fft.yml
vendored
@@ -3,16 +3,46 @@ name: Cargo Test tfhe-fft
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
outputs:
|
||||
fft_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.fft_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
fft:
|
||||
- tfhe/Cargo.toml
|
||||
- Makefile
|
||||
- tfhe-fft/**
|
||||
- '.github/workflows/cargo_test_fft.yml'
|
||||
|
||||
cargo-tests-fft:
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -39,6 +69,8 @@ jobs:
|
||||
make test_fft_no_std
|
||||
|
||||
cargo-tests-fft-nightly:
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ${{ matrix.runner_type }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -61,7 +93,9 @@ jobs:
|
||||
make test_fft_no_std_nightly
|
||||
|
||||
cargo-tests-fft-node-js:
|
||||
runs-on: "ubuntu-latest"
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
@@ -69,3 +103,30 @@ jobs:
|
||||
run: |
|
||||
make install_node
|
||||
make test_fft_node_js_ci
|
||||
|
||||
cargo-tests-fft-successful:
|
||||
needs: [ should-run, cargo-tests-fft, cargo-tests-fft-nightly, cargo-tests-fft-node-js ]
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Tests do not need to run
|
||||
if: needs.should-run.outputs.fft_test == 'false'
|
||||
run: |
|
||||
echo "tfhe-fft files haven't changed tests don't need to run"
|
||||
|
||||
- name: Check all tests passed
|
||||
if: needs.should-run.outputs.fft_test == 'true' &&
|
||||
needs.cargo-tests-fft.result == 'success' &&
|
||||
needs.cargo-tests-fft-nightly.result == 'success' &&
|
||||
needs.cargo-tests-fft-node-js.result == 'success'
|
||||
run: |
|
||||
echo "All tfhe-fft test passed"
|
||||
|
||||
- name: Check tests failure
|
||||
if: needs.should-run.outputs.fft_test == 'true' &&
|
||||
(needs.cargo-tests-fft.result != 'success' ||
|
||||
needs.cargo-tests-fft-nightly.result != 'success' ||
|
||||
needs.cargo-tests-fft-node-js.result != 'success')
|
||||
run: |
|
||||
echo "Some tfhe-fft tests failed"
|
||||
exit 1
|
||||
|
||||
59
.github/workflows/cargo_test_ntt.yml
vendored
59
.github/workflows/cargo_test_ntt.yml
vendored
@@ -3,16 +3,46 @@ name: Cargo Test tfhe-ntt
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read
|
||||
outputs:
|
||||
ntt_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ntt_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
ntt:
|
||||
- tfhe/Cargo.toml
|
||||
- Makefile
|
||||
- tfhe-ntt/**
|
||||
- '.github/workflows/cargo_test_ntt.yml'
|
||||
|
||||
cargo-tests-ntt:
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.ntt_test == 'true'
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -34,6 +64,8 @@ jobs:
|
||||
run: make test_ntt_no_std
|
||||
|
||||
cargo-tests-ntt-nightly:
|
||||
needs: should-run
|
||||
if: needs.should-run.outputs.ntt_test == 'true'
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
@@ -52,3 +84,28 @@ jobs:
|
||||
|
||||
- name: Test no-std nightly
|
||||
run: make test_ntt_no_std_nightly
|
||||
|
||||
cargo-tests-ntt-successful:
|
||||
needs: [ should-run, cargo-tests-ntt, cargo-tests-ntt-nightly ]
|
||||
if: ${{ always() }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Tests do not need to run
|
||||
if: needs.should-run.outputs.ntt_test == 'false'
|
||||
run: |
|
||||
echo "tfhe-ntt files haven't changed tests don't need to run"
|
||||
|
||||
- name: Check all tests success
|
||||
if: needs.should-run.outputs.ntt_test == 'true' &&
|
||||
needs.cargo-tests-ntt.result == 'success' &&
|
||||
needs.cargo-tests-ntt-nightly.result == 'success'
|
||||
run: |
|
||||
echo "All tfhe-ntt tests passed"
|
||||
|
||||
- name: Check tests failure
|
||||
if: needs.should-run.outputs.ntt_test == 'true' &&
|
||||
(needs.cargo-tests-ntt.result != 'success' ||
|
||||
needs.cargo-tests-ntt-nightly.result != 'success')
|
||||
run: |
|
||||
echo "Some tfhe-ntt tests failed"
|
||||
exit 1
|
||||
|
||||
10
.github/workflows/ci_lint.yml
vendored
10
.github/workflows/ci_lint.yml
vendored
@@ -5,7 +5,8 @@ on:
|
||||
pull_request:
|
||||
|
||||
env:
|
||||
ACTIONLINT_VERSION: 1.6.27
|
||||
ACTIONLINT_VERSION: 1.7.7
|
||||
ACTIONLINT_CHECKSUM: "023070a287cd8cccd71515fedc843f1985bf96c436b7effaecce67290e7e0757"
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
jobs:
|
||||
@@ -21,9 +22,10 @@ jobs:
|
||||
|
||||
- name: Get actionlint
|
||||
run: |
|
||||
bash <(curl https://raw.githubusercontent.com/rhysd/actionlint/main/scripts/download-actionlint.bash) ${{ env.ACTIONLINT_VERSION }}
|
||||
echo "f2ee6d561ce00fa93aab62a7791c1a0396ec7e8876b2a8f2057475816c550782 actionlint" > checksum
|
||||
wget "https://github.com/rhysd/actionlint/releases/download/v${{ env.ACTIONLINT_VERSION }}/actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz"
|
||||
echo "${{ env.ACTIONLINT_CHECKSUM }} actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz" > checksum
|
||||
sha256sum -c checksum
|
||||
tar -xf actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz actionlint
|
||||
ln -s "$(pwd)/actionlint" /usr/local/bin/
|
||||
|
||||
- name: Lint workflows
|
||||
@@ -31,7 +33,7 @@ jobs:
|
||||
make lint_workflow
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@25ed13d0628a1601b4b44048e63cc4328ed03633 # v3.0.22
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@4830be28ce81da52ec70d65c552a7403821d98d4 # v3.0.23
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
|
||||
10
.github/workflows/code_coverage.yml
vendored
10
.github/workflows/code_coverage.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
name: Code coverage tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.event_name }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}_${{ github.event_name }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 5760 # 4 days
|
||||
@@ -47,13 +47,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
|
||||
uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@13ce06bfc6bbe3ecf90edbbf1bc32fe5978ca1d3
|
||||
uses: codecov/codecov-action@0565863a31f2c772f9f0395002a31e3f06189574
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
|
||||
@@ -52,7 +52,7 @@ jobs:
|
||||
name: CSPRNG randomness tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -77,7 +77,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "tfhe-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (csprng-randomness-tests)
|
||||
@@ -102,4 +102,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
7
.github/workflows/gpu_4090_tests.yml
vendored
7
.github/workflows/gpu_4090_tests.yml
vendored
@@ -29,9 +29,10 @@ jobs:
|
||||
contains(github.event.label.name, '4090_test') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
@@ -41,7 +42,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -81,4 +82,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CUDA RTX 4090 tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
35
.github/workflows/gpu_fast_h100_tests.yml
vendored
35
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -40,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -68,11 +70,17 @@ jobs:
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -82,6 +90,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
@@ -95,7 +110,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -114,6 +129,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -121,7 +137,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -150,16 +166,21 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -180,4 +201,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
17
.github/workflows/gpu_fast_tests.yml
vendored
17
.github/workflows/gpu_fast_tests.yml
vendored
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -39,7 +41,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -93,7 +95,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -119,7 +121,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -148,12 +150,17 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -178,4 +185,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
29
.github/workflows/gpu_full_h100_tests.yml
vendored
29
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -20,10 +20,16 @@ jobs:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -33,11 +39,18 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -68,13 +81,14 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -105,10 +119,11 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Full H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -128,4 +143,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
19
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
19
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -40,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -95,7 +97,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -121,7 +123,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -144,7 +146,7 @@ jobs:
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||
BIG_TESTS_INSTANCE=FALSE make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
@@ -153,12 +155,17 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests-multi-gpu)
|
||||
@@ -183,4 +190,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
10
.github/workflows/gpu_integer_long_run_tests.yml
vendored
10
.github/workflows/gpu_integer_long_run_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Long Run Tests on GPU
|
||||
name: Cuda - Long Run Tests on GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -15,8 +15,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
# Nightly tests will be triggered each evening 8p.m.
|
||||
- cron: "0 20 * * *"
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
name: Long run GPU tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}_${{github.event_name}}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
15
.github/workflows/gpu_pcc.yml
vendored
15
.github/workflows/gpu_pcc.yml
vendored
@@ -11,6 +11,8 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -49,7 +51,7 @@ jobs:
|
||||
name: CUDA post-commit checks
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -81,7 +83,7 @@ jobs:
|
||||
sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -111,13 +113,18 @@ jobs:
|
||||
run: |
|
||||
make pcc_gpu
|
||||
|
||||
- name: Set pull-request URL
|
||||
if: ${{ failure() && github.event_name == 'pull_request' }}
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-pcc)
|
||||
@@ -142,4 +149,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -40,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -95,7 +97,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -121,7 +123,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -136,12 +138,17 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU signed integer tests with classical PBS finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-signed-classic-tests)
|
||||
@@ -166,4 +173,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -41,7 +43,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -69,11 +71,17 @@ jobs:
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -83,6 +91,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
@@ -96,7 +111,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -115,6 +130,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -122,7 +138,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -137,16 +153,21 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -167,4 +188,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
21
.github/workflows/gpu_signed_integer_tests.yml
vendored
21
.github/workflows/gpu_signed_integer_tests.yml
vendored
@@ -11,9 +11,11 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -44,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -63,6 +65,7 @@ jobs:
|
||||
- '.github/workflows/gpu_signed_integer_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-signed-integer-tests)
|
||||
runs-on: ubuntu-latest
|
||||
@@ -98,7 +101,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -108,7 +111,7 @@ jobs:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -124,7 +127,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -147,11 +150,17 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-signed-integer-tests.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Signed GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -176,4 +185,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -41,7 +43,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -96,7 +98,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -122,7 +124,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -137,12 +139,17 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU classic tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-unsigned-classic-tests)
|
||||
@@ -167,4 +174,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-classic-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -11,7 +11,9 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -40,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -68,11 +70,17 @@ jobs:
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
@@ -82,6 +90,13 @@ jobs:
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
@@ -95,7 +110,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -114,6 +129,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
@@ -121,7 +137,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -136,16 +152,21 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
if: ${{ always() && needs.setup-instance.result == 'success' }}
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
@@ -166,4 +187,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
21
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
21
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
@@ -11,8 +11,11 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
@@ -22,7 +25,6 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
@@ -44,7 +46,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@dcc7a0cba800f454d79fff4b993e8c3555bcc0a8
|
||||
uses: tj-actions/changed-files@823fcebdb31bb35fdf2229d9f769b400309430d0 # v46.0.3
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
@@ -66,11 +68,11 @@ jobs:
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-unsigned-integer-tests)
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
steps:
|
||||
@@ -99,7 +101,7 @@ jobs:
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
@@ -125,7 +127,7 @@ jobs:
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -148,12 +150,17 @@ jobs:
|
||||
if: ${{ always() && needs.cuda-unsigned-integer-tests.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${{ vars.PR_BASE_URL }}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
@@ -178,4 +185,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
4
.github/workflows/integer_long_run_tests.yml
vendored
4
.github/workflows/integer_long_run_tests.yml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
name: Long run CPU tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
group: ${{ github.workflow_ref }}_${{github.event_name}}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
@@ -54,7 +54,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
6
.github/workflows/m1_tests.yml
vendored
6
.github/workflows/m1_tests.yml
vendored
@@ -24,7 +24,7 @@ env:
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -195,7 +195,7 @@ jobs:
|
||||
SLACK_COLOR: ${{ needs.cargo-builds-m1.result }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "M1 tests finished with status: ${{ needs.cargo-builds-m1.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
MSG_MINIMAL: event,action url,commit
|
||||
|
||||
15
.github/workflows/make_release.yml
vendored
15
.github/workflows/make_release.yml
vendored
@@ -51,7 +51,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
# For provenance of npmjs publish
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
id-token: write # also needed for OIDC token exchange on crates.io
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -94,17 +94,20 @@ jobs:
|
||||
run: |
|
||||
echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
|
||||
with:
|
||||
name: crate
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
if: ${{ inputs.push_to_crates }}
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
cargo publish -p tfhe ${{ env.DRY_RUN }}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
17
.github/workflows/make_release_cuda.yml
vendored
17
.github/workflows/make_release_cuda.yml
vendored
@@ -65,7 +65,7 @@ jobs:
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -115,6 +115,9 @@ jobs:
|
||||
name: Publish CUDA Release
|
||||
needs: [setup-instance, package] # for comparing hashes
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
@@ -127,7 +130,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -152,12 +155,16 @@ jobs:
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-cuda-backend --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
cargo publish -p tfhe-cuda-backend ${{ env.DRY_RUN }}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
16
.github/workflows/make_release_tfhe_csprng.yml
vendored
16
.github/workflows/make_release_tfhe_csprng.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-csprng
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-tfhe-csprng
|
||||
path: target/package/*.crate
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -59,6 +59,9 @@ jobs:
|
||||
name: Publish tfhe-csprng Release
|
||||
needs: [verify_tag, package]
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -66,16 +69,19 @@ jobs:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
|
||||
with:
|
||||
name: crate-tfhe-csprng
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
cargo publish -p tfhe-csprng ${{ env.DRY_RUN }}
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
15
.github/workflows/make_release_tfhe_fft.yml
vendored
15
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-fft
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -60,6 +60,9 @@ jobs:
|
||||
name: Publish tfhe-fft Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -67,12 +70,16 @@ jobs:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-fft --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
cargo publish -p tfhe-fft ${{ env.DRY_RUN }}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
15
.github/workflows/make_release_tfhe_ntt.yml
vendored
15
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-ntt
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -60,18 +60,25 @@ jobs:
|
||||
name: Publish tfhe-ntt Release
|
||||
runs-on: ubuntu-latest
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-ntt --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
cargo publish -p tfhe-ntt ${{ env.DRY_RUN }}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
|
||||
@@ -2,14 +2,13 @@ name: Publish tfhe-versionable release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
verify_tag:
|
||||
@@ -19,6 +18,7 @@ jobs:
|
||||
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
|
||||
|
||||
package-derive:
|
||||
name: Package tfhe-versionable-derive Release
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable-derive
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-tfhe-versionable-derive
|
||||
path: target/package/*.crate
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
|
||||
provenance-derive:
|
||||
needs: [package-derive]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -53,9 +53,12 @@ jobs:
|
||||
base64-subjects: ${{ needs.package-derive.outputs.hash }}
|
||||
|
||||
publish_release-derive:
|
||||
name: Publish tfhe-versionable Release
|
||||
needs: [verify_tag, package-derive] # for comparing hashes
|
||||
name: Publish tfhe-versionable-derive Release
|
||||
needs: [ verify_tag, package-derive ] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -64,15 +67,18 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
|
||||
with:
|
||||
name: crate-tfhe-versionable-derive
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
run: |
|
||||
cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
cargo publish -p tfhe-versionable-derive
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
@@ -82,24 +88,18 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "SLSA tfhe-versionable-derive - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
package:
|
||||
name: Package tfhe-versionable Release
|
||||
needs: publish_release-derive
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
@@ -111,7 +111,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-versionable
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-tfhe-versionable
|
||||
path: target/package/*.crate
|
||||
@@ -120,8 +120,8 @@ jobs:
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
needs: package
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -135,7 +135,7 @@ jobs:
|
||||
|
||||
publish_release:
|
||||
name: Publish tfhe-versionable Release
|
||||
needs: [package] # for comparing hashes
|
||||
needs: package # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
@@ -143,40 +143,32 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
|
||||
with:
|
||||
name: crate-tfhe-versionable
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
run: |
|
||||
cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
|
||||
cargo publish -p tfhe-versionable
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "SLSA tfhe-versionable - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "tfhe-versionable release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
16
.github/workflows/make_release_zk_pok.yml
vendored
16
.github/workflows/make_release_zk_pok.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe-zk-pok
|
||||
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
|
||||
- uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package/*.crate
|
||||
@@ -34,7 +34,7 @@ jobs:
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
@@ -56,6 +56,9 @@ jobs:
|
||||
name: Publish tfhe-zk-pok Release
|
||||
needs: [verify_tag, package] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
# Needed for OIDC token exchange on crates.io
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
|
||||
@@ -64,16 +67,19 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
uses: actions/download-artifact@95815c38cf2ff2164869cbab79da8d1f422bc89e # v4.2.1
|
||||
with:
|
||||
name: crate-zk-pok
|
||||
path: target/package
|
||||
- name: Authenticate on registry
|
||||
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
|
||||
id: auth
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-zk-pok --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
cargo publish -p tfhe-zk-pok ${{ env.DRY_RUN }}
|
||||
- name: Verify hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -34,6 +34,9 @@ package-lock.json
|
||||
|
||||
# Python .env
|
||||
.env
|
||||
__pycache__
|
||||
|
||||
# Dir used for backward compatibility test data
|
||||
tests/tfhe-backward-compat-data/
|
||||
# First directive is to ignore symlinks
|
||||
tests/tfhe-backward-compat-data
|
||||
ci/
|
||||
|
||||
233
CONTRIBUTING.md
Normal file
233
CONTRIBUTING.md
Normal file
@@ -0,0 +1,233 @@
|
||||
# Contributing to TFHE-rs
|
||||
|
||||
This document provides guidance on how to contribute to **TFHE-rs**.
|
||||
|
||||
There are two ways to contribute:
|
||||
|
||||
- **Report issues:** Open issues on GitHub to report bugs, suggest improvements, or note typos.
|
||||
- **Submit codes**: To become an official contributor, you must sign our Contributor License Agreement (CLA). Our CLA-bot will guide you through this process when you open your first pull request.
|
||||
|
||||
## 1. Setting up the project
|
||||
|
||||
Start by [forking](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/fork-a-repo) the **TFHE-rs** repository.
|
||||
|
||||
{% hint style="info" %}
|
||||
- **Rust version**: Ensure that you use a Rust version >= 1.81 to compile **TFHE-rs**.
|
||||
- **Incompatibility**: AArch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in **TFHE-rs**.
|
||||
- **Performance**: For optimal performance, it is highly recommended to run **TFHE-rs** code in release mode with cargo's `--release` flag.
|
||||
{% endhint %}
|
||||
|
||||
To get more details about the library, please refer to the [documentation](https://docs.zama.ai/tfhe-rs).
|
||||
|
||||
## 2. Creating a new branch
|
||||
|
||||
When creating your branch, make sure to use the following format :
|
||||
|
||||
```
|
||||
git checkout -b {feat|fix|docs|chore…}/short_description
|
||||
```
|
||||
|
||||
For example:
|
||||
|
||||
```
|
||||
git checkout -b feat/new_feature_X
|
||||
```
|
||||
|
||||
## 3. Before committing
|
||||
|
||||
### 3.1 Linting
|
||||
|
||||
Each commit to **TFHE-rs** should conform to the standards of the project. In particular, every source code, docker or workflows files should be linted to prevent programmatic and stylistic errors.
|
||||
|
||||
- Rust source code linters: `clippy`
|
||||
- Typescript/Javascript source code linters: `eslint`, `prettier`
|
||||
|
||||
To apply automatic code formatting, run:
|
||||
|
||||
```
|
||||
make fmt
|
||||
```
|
||||
|
||||
You can perform linting of all Cargo targets with:
|
||||
|
||||
```
|
||||
make clippy_all_targets
|
||||
```
|
||||
|
||||
### 3.2 Testing
|
||||
|
||||
Your contributions must include comprehensive documentation and tests without breaking existing tests. To run pre-commit checks, execute:
|
||||
|
||||
```
|
||||
make pcc
|
||||
```
|
||||
|
||||
This command ensure that all the targets in the library are building correctly.
|
||||
For a faster check, use:
|
||||
|
||||
```
|
||||
make fpcc
|
||||
```
|
||||
|
||||
If you're contributing to GPU code, run also:
|
||||
|
||||
```
|
||||
make pcc_gpu
|
||||
```
|
||||
|
||||
Unit testing suites are heavy and can require a lot of computing power and RAM availability.
|
||||
Whilst tests are run automatically in continuous integration pipeline, you can run tests locally.
|
||||
|
||||
All unit tests have a command formatted as:
|
||||
|
||||
```
|
||||
make test_*
|
||||
```
|
||||
|
||||
Run `make help` to display a list of all the commands available.
|
||||
|
||||
To quickly test your changes locally, follow these steps:
|
||||
1. Locate where the code has changed.
|
||||
2. Add (or modify) a Cargo test filter to the corresponding `make` target in Makefile.
|
||||
3. Run the target.
|
||||
|
||||
{% hint style="success" %}
|
||||
`make test_<something>` will print the underlying cargo command in STDOUT. You can quickly test your changes by copy/pasting the command and then modify it to suit your needs.
|
||||
{% endhint %}
|
||||
|
||||
For example, if you made changes in `tfhe/src/integer/*`, you can test them with the following steps:
|
||||
1. In `test_integer` target, replace the filter `-- integer::` by `-- my_new_test`.
|
||||
2. Run `make test_integer`.
|
||||
|
||||
## 4. Committing
|
||||
|
||||
**TFHE-rs** follows the conventional commit specification to maintain a consistent commit history, essential for Semantic Versioning ([semver.org](https://semver.org/)).
|
||||
Commit messages are automatically checked in CI and will be rejected if they do not comply, so make sure that you follow the commit conventions detailed on [this page]
|
||||
(https://www.conventionalcommits.org/en/v1.0.0/).
|
||||
|
||||
## 5. Rebasing
|
||||
|
||||
Before creating a pull request, rebase your branch on the repository's `main` branch. Merge commits are not permitted, thus rebasing ensures fewer conflicts and a smoother PR review process.
|
||||
|
||||
## 6. Opening a Pull Request
|
||||
|
||||
Once your changes are ready, open a pull request.
|
||||
|
||||
For instructions on creating a PR from a fork, refer to GitHub's [official documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork).
|
||||
|
||||
## 7. Continuous integration
|
||||
|
||||
Before a pull request can be merged, several test suites run automatically. Below is an overview of the CI process:
|
||||
|
||||
```mermaid
|
||||
---
|
||||
title: Continuous Integration Process
|
||||
---
|
||||
sequenceDiagram
|
||||
autonumber
|
||||
|
||||
participant Contributor
|
||||
participant GitHub
|
||||
participant Reviewer
|
||||
participant CI-pipeline
|
||||
|
||||
Contributor ->> GitHub: Open pull-request
|
||||
GitHub -->> Contributor: Ask for CLA signing (once)
|
||||
loop
|
||||
Reviewer ->> GitHub: Review code
|
||||
Reviewer ->> CI-pipeline: Approve workflows (short-run)
|
||||
CI-pipeline -->> GitHub: Send checks results
|
||||
Contributor ->> GitHub: Make changes
|
||||
end
|
||||
Reviewer ->> GitHub: Pull-request approval
|
||||
Reviewer ->> CI-pipeline: Approve workflows (long-run)
|
||||
CI-pipeline -->> GitHub: Send checks results
|
||||
Reviewer -->> GitHub: Merge if pipeline green
|
||||
```
|
||||
|
||||
> [!Note]
|
||||
>Useful details:
|
||||
>* pipeline is triggered by humans
|
||||
>* review team is located in Paris timezone, pipeline launch will most likely happen during office hours
|
||||
>* direct changes to CI related files are not allowed for external contributors
|
||||
>* run `make pcc` to fix any build errors before pushing commits
|
||||
|
||||
## 8. Data versioning
|
||||
|
||||
Data serialized with TFHE-rs must remain backward compatible. This is done using the [tfhe-versionable](https://crates.io/crates/tfhe-versionable) crate.
|
||||
|
||||
If you modify a type that derives `Versionize` in a backward-incompatible way, an upgrade implementation must be provided.
|
||||
|
||||
For example, these changes are data breaking:
|
||||
* Adding a field to a struct.
|
||||
* Changing the order of the fields within a struct or the variants within an enum.
|
||||
* Renaming a field of a struct or a variant of an enum.
|
||||
* Changing the type of field in a struct or a variant in an enum.
|
||||
|
||||
On the contrary, these changes are *not* data breaking:
|
||||
* Renaming a type (unless it implements the `Named` trait).
|
||||
* Adding a variant to the end of an enum.
|
||||
|
||||
## Example: adding a field
|
||||
|
||||
Suppose you want to add an i32 field to a type named `MyType`. The original type is defined as:
|
||||
```rust
|
||||
#[derive(Serialize, Deserialize, Versionize)]
|
||||
#[versionize(MyTypeVersions)]
|
||||
struct MyType {
|
||||
val: u64,
|
||||
}
|
||||
```
|
||||
And you want to change it to:
|
||||
```rust
|
||||
#[derive(Serialize, Deserialize, Versionize)]
|
||||
#[versionize(MyTypeVersions)]
|
||||
struct MyType {
|
||||
val: u64,
|
||||
other_val: i32
|
||||
}
|
||||
```
|
||||
|
||||
Follow these steps:
|
||||
|
||||
1. Navigate to the definition of the dispatch enum of this type. This is the type inside the `#[versionize(MyTypeVersions)]` macro attribute. In general, this type has the same name as the base type with a `Versions` suffix. You should find something like
|
||||
|
||||
```rust
|
||||
#[derive(VersionsDispatch)]
|
||||
enum MyTypeVersions {
|
||||
V0(MyTypeV0),
|
||||
V1(MyType)
|
||||
}
|
||||
```
|
||||
|
||||
2. Add a new variant to the enum to preserve the previous version of the type. You can simply copy and paste the previous definition of the type and add a version suffix:
|
||||
|
||||
```rust
|
||||
#[derive(Version)]
|
||||
struct MyTypeV1 {
|
||||
val: u64,
|
||||
}
|
||||
|
||||
#[derive(VersionsDispatch)]
|
||||
enum MyTypeVersions {
|
||||
V0(MyTypeV0),
|
||||
V1(MyTypeV1),
|
||||
V2(MyType) // Here this points to your modified type
|
||||
}
|
||||
```
|
||||
|
||||
3. Implement the `Upgrade` trait to define how we should go from the previous version to the current version:
|
||||
```rust
|
||||
impl Upgrade<MyType> for MyTypeV1 {
|
||||
type Error = Infallible;
|
||||
|
||||
fn upgrade(self) -> Result<MyType, Self::Error> {
|
||||
Ok(MyType {
|
||||
val: self.val,
|
||||
other_val: 0
|
||||
})
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
4. Fix the upgrade target of the previous version. In this example, `impl Upgrade<MyType> for MyTypeV0 {` should simply be changed to `impl Upgrade<MyTypeV1> for MyTypeV0 {`
|
||||
11
Cargo.toml
11
Cargo.toml
@@ -6,7 +6,6 @@ members = [
|
||||
"tfhe-ntt",
|
||||
"tfhe-zk-pok",
|
||||
"tasks",
|
||||
"apps/trivium",
|
||||
"tfhe-csprng",
|
||||
"backends/tfhe-cuda-backend",
|
||||
"utils/tfhe-versionable",
|
||||
@@ -14,16 +13,20 @@ members = [
|
||||
"tests",
|
||||
]
|
||||
|
||||
exclude = ["tests/backward_compatibility_tests", "utils/tfhe-lints"]
|
||||
exclude = [
|
||||
"tests/backward_compatibility_tests",
|
||||
"utils/tfhe-lints",
|
||||
"apps/trivium",
|
||||
]
|
||||
[workspace.dependencies]
|
||||
aligned-vec = { version = "0.6", default-features = false }
|
||||
bytemuck = "1.14.3"
|
||||
dyn-stack = { version = "0.11", default-features = false }
|
||||
itertools = "0.14"
|
||||
num-complex = "0.4"
|
||||
pulp = { version = "0.20", default-features = false }
|
||||
pulp = { version = "0.21", default-features = false }
|
||||
rand = "0.8"
|
||||
rayon = "1"
|
||||
rayon = "1.11"
|
||||
serde = { version = "1.0", default-features = false }
|
||||
wasm-bindgen = "0.2.100"
|
||||
|
||||
|
||||
139
Makefile
139
Makefile
@@ -18,6 +18,8 @@ FAST_BENCH?=FALSE
|
||||
NIGHTLY_TESTS?=FALSE
|
||||
BENCH_OP_FLAVOR?=DEFAULT
|
||||
BENCH_TYPE?=latency
|
||||
BENCH_PARAM_TYPE?=classical
|
||||
BENCH_PARAMS_SET?=default
|
||||
NODE_VERSION=22.6
|
||||
BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
|
||||
BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
|
||||
@@ -282,14 +284,14 @@ check_typos: install_typos_checker
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
|
||||
check_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
@@ -348,6 +350,9 @@ clippy_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=integer,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=integer,experimental,extended-types \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
|
||||
clippy: install_rs_check_toolchain
|
||||
@@ -380,16 +385,16 @@ clippy_rustdoc_gpu: install_rs_check_toolchain
|
||||
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
|
||||
clippy_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,extended-types \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
|
||||
clippy_js_wasm_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,extended-types \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
|
||||
@@ -399,16 +404,21 @@ clippy_tasks: install_rs_check_toolchain
|
||||
|
||||
.PHONY: clippy_trivium # Run clippy lints on Trivium app
|
||||
clippy_trivium: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
cd apps/trivium; RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-trivium -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_ws_tests # Run clippy on the workspace level tests
|
||||
clippy_ws_tests: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --tests \
|
||||
-p tests --features=shortint,integer,zk-pok -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,extended-types \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,extended-types,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
|
||||
@@ -431,12 +441,13 @@ clippy_versionable: install_rs_check_toolchain
|
||||
.PHONY: clippy_tfhe_lints # Run clippy lints on tfhe-lints
|
||||
clippy_tfhe_lints: install_cargo_dylint # the toolchain is selected with toolchain.toml
|
||||
cd utils/tfhe-lints && \
|
||||
rustup toolchain install && \
|
||||
cargo clippy --all-targets -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_all # Run all clippy targets
|
||||
clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
|
||||
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
|
||||
clippy_versionable clippy_tfhe_lints
|
||||
clippy_versionable clippy_tfhe_lints clippy_ws_tests
|
||||
|
||||
.PHONY: clippy_fast # Run main clippy targets
|
||||
clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
|
||||
@@ -506,13 +517,13 @@ build_tfhe_coverage: install_rs_build_toolchain
|
||||
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
|
||||
build_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
|
||||
build_c_api_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
|
||||
@@ -526,7 +537,7 @@ build_web_js_api: install_rs_build_toolchain install_wasm_pack
|
||||
cd tfhe && \
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
|
||||
wasm-pack build --release --target=web \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok,extended-types
|
||||
|
||||
.PHONY: build_web_js_api_parallel # Build the js API targeting the web browser with parallelism support
|
||||
build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
|
||||
@@ -534,7 +545,7 @@ build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
|
||||
rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory" rustup run $(RS_CHECK_TOOLCHAIN) \
|
||||
wasm-pack build --release --target=web \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok,extended-types \
|
||||
-Z build-std=panic_abort,std && \
|
||||
find pkg/snippets -type f -iname workerHelpers.js -exec sed -i "s|const pkg = await import('..\/..\/..');|const pkg = await import('..\/..\/..\/tfhe.js');|" {} \;
|
||||
jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json
|
||||
@@ -544,7 +555,7 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
|
||||
cd tfhe && \
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
|
||||
wasm-pack build --release --target=nodejs \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok,extended-types
|
||||
|
||||
.PHONY: build_tfhe_csprng # Build tfhe_csprng
|
||||
build_tfhe_csprng: install_rs_build_toolchain
|
||||
@@ -596,7 +607,7 @@ test_core_crypto_gpu: install_rs_build_toolchain
|
||||
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=4
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
|
||||
@@ -868,12 +879,12 @@ test_examples: test_sha256_bool test_regex_engine
|
||||
|
||||
.PHONY: test_trivium # Run tests for trivium
|
||||
test_trivium: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
cd apps/trivium; RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-trivium -- --test-threads=1 trivium::
|
||||
|
||||
.PHONY: test_kreyvium # Run tests for kreyvium
|
||||
test_kreyvium: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
cd apps/trivium; RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-trivium -- --test-threads=1 kreyvium::
|
||||
|
||||
.PHONY: test_tfhe_csprng # Run tfhe-csprng tests
|
||||
@@ -907,6 +918,7 @@ test_versionable: install_rs_build_toolchain
|
||||
.PHONY: test_tfhe_lints # Run test on tfhe-lints
|
||||
test_tfhe_lints: install_cargo_dylint
|
||||
cd utils/tfhe-lints && \
|
||||
rustup toolchain install && \
|
||||
cargo test
|
||||
|
||||
# The backward compat data repo holds historical binary data but also rust code to generate and load them.
|
||||
@@ -1071,35 +1083,42 @@ bench_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer # Run benchmarks for signed integer
|
||||
bench_signed_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
|
||||
bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer_gpu # Run benchmarks for signed integer on GPU backend
|
||||
bench_signed_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression_gpu
|
||||
bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,gpu,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1107,7 +1126,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
|
||||
bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1115,7 +1134,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
|
||||
bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1123,22 +1142,22 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
|
||||
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
.PHONY: bench_signed_integer_multi_bit_gpu # Run benchmarks for signed integer on GPU backend using multi-bit parameters
|
||||
bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT \
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-pke-bench \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512,pbs-stats \
|
||||
-p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
@@ -1169,34 +1188,60 @@ bench_boolean: install_rs_check_toolchain
|
||||
--bench boolean-bench \
|
||||
--features=boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs # Run benchmarks for PBS
|
||||
bench_pbs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
.PHONY: bench_ks # Run benchmarks for keyswitch
|
||||
bench_ks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
|
||||
bench_pbs128: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs128-bench \
|
||||
.PHONY: bench_ks_gpu # Run benchmarks for keyswitch on GPU backend
|
||||
bench_ks_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs # Run benchmarks for PBS
|
||||
bench_pbs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_ks # Run benchmarks for keyswitch
|
||||
bench_ks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
.PHONY: bench_ks_pbs # Run benchmarks for KS-PBS
|
||||
bench_ks_pbs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-pbs-bench \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_ks_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
.PHONY: bench_ks_pbs_gpu # Run benchmarks for KS-PBS on GPU backend
|
||||
bench_ks_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_PARAMS_SET=$(BENCH_PARAMS_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-pbs-bench \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
|
||||
bench_pbs128: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs128-bench \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs128_gpu # Run benchmarks for PBS using FFT 128 bits on GPU
|
||||
bench_pbs128_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs128-bench \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
|
||||
|
||||
36
README.md
36
README.md
@@ -10,7 +10,7 @@
|
||||
<hr/>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://docs.zama.ai/tfhe-rs"> 📒 Documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
|
||||
<a href="https://github.com/zama-ai/tfhe-rs-handbook/blob/main/tfhe-rs-handbook.pdf"> 📃 Read Handbook</a> |<a href="https://docs.zama.ai/tfhe-rs"> 📒 Documentation</a> | <a href="https://zama.ai/community"> 💛 Community support</a> | <a href="https://github.com/zama-ai/awesome-zama"> 📚 FHE resources by Zama</a>
|
||||
</p>
|
||||
|
||||
|
||||
@@ -67,6 +67,9 @@ production-ready library for all the advanced features of TFHE.
|
||||
|
||||
## Getting started
|
||||
|
||||
> [!Important]
|
||||
> **TFHE-rs** released its first stable version v1.0.0 in February 2025, stabilizing the high-level API for the x86 CPU backend.
|
||||
|
||||
### Cargo.toml configuration
|
||||
To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:
|
||||
|
||||
@@ -75,13 +78,13 @@ tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
|
||||
```
|
||||
|
||||
> [!Note]
|
||||
> Note: You need to use a Rust version >= 1.81 to compile TFHE-rs.
|
||||
> Note: You need to use Rust version >= 1.84 to compile TFHE-rs.
|
||||
|
||||
> [!Note]
|
||||
> Note: aarch64-based machines are not yet supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs.
|
||||
> Note: AArch64-based machines are not supported for Windows as it's currently missing an entropy source to be able to seed the [CSPRNGs](https://en.wikipedia.org/wiki/Cryptographically_secure_pseudorandom_number_generator) used in TFHE-rs.
|
||||
|
||||
<p align="right">
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
</p>
|
||||
|
||||
### A simple example
|
||||
@@ -138,7 +141,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
}
|
||||
```
|
||||
|
||||
To run this code, use the following command:
|
||||
To run this code, use the following command:
|
||||
<p align="center"> <code> cargo run --release </code> </p>
|
||||
|
||||
> [!Note]
|
||||
@@ -148,12 +151,15 @@ to run in release mode with cargo's `--release` flag to have the best performanc
|
||||
*Find an example with more explanations in [this part of the documentation](https://docs.zama.ai/tfhe-rs/get-started/quick_start)*
|
||||
|
||||
<p align="right">
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
</p>
|
||||
|
||||
|
||||
|
||||
## Resources
|
||||
## Resources
|
||||
|
||||
### TFHE-rs Handbook
|
||||
A document containing scientific and technical details about algorithms implemented into the library is available here: [TFHE-rs: A (Practical) Handbook](https://github.com/zama-ai/tfhe-rs-handbook/blob/main/tfhe-rs-handbook.pdf).
|
||||
|
||||
### TFHE deep dive
|
||||
- [TFHE Deep Dive - Part I - Ciphertext types](https://www.zama.ai/post/tfhe-deep-dive-part-1)
|
||||
@@ -176,7 +182,7 @@ to run in release mode with cargo's `--release` flag to have the best performanc
|
||||
|
||||
Full, comprehensive documentation is available here: [https://docs.zama.ai/tfhe-rs](https://docs.zama.ai/tfhe-rs).
|
||||
<p align="right">
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
</p>
|
||||
|
||||
|
||||
@@ -194,9 +200,13 @@ When a new update is published in the Lattice Estimator, we update parameters ac
|
||||
|
||||
### Security model
|
||||
|
||||
The default parameters for the TFHE-rs library are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at p_error = $2^{-64}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [1].
|
||||
By default, the parameter sets used in the High-Level API with the x86 CPU backend have a failure probability $\le 2^{128}$ to securely work in the IND-CPA^D model using the algorithmic techniques provided in our code base [1].
|
||||
If you want to work within the IND-CPA security model, which is less strict than the IND-CPA-D model, the parameter sets can easily be changed and would have slightly better performance. More details can be found in the [TFHE-rs documentation](https://docs.zama.ai/tfhe-rs).
|
||||
|
||||
[1] Li, Baiyu, et al. "Securing approximate homomorphic encryption using differential privacy." Annual International Cryptology Conference. Cham: Springer Nature Switzerland, 2022. https://eprint.iacr.org/2022/816.pdf
|
||||
The default parameters used in the High-Level API with the GPU backend are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at $p_{error} \le 2^{-64}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [2].
|
||||
|
||||
[1] Bernard, Olivier, et al. "Drifting Towards Better Error Probabilities in Fully Homomorphic Encryption Schemes". https://eprint.iacr.org/2024/1718.pdf
|
||||
[2] Li, Baiyu, et al. "Securing approximate homomorphic encryption using differential privacy." Annual International Cryptology Conference. Cham: Springer Nature Switzerland, 2022. https://eprint.iacr.org/2022/816.pdf
|
||||
|
||||
#### Side-channel attacks
|
||||
|
||||
@@ -245,7 +255,7 @@ This software is distributed under the **BSD-3-Clause-Clear** license. Read [thi
|
||||
>We are open to collaborating and advancing the FHE space with our partners. If you have specific needs, please email us at hello@zama.ai.
|
||||
|
||||
<p align="right">
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
</p>
|
||||
|
||||
|
||||
@@ -259,8 +269,8 @@ This software is distributed under the **BSD-3-Clause-Clear** license. Read [thi
|
||||
</picture>
|
||||
</a>
|
||||
|
||||
🌟 If you find this project helpful or interesting, please consider giving it a star on GitHub! Your support helps to grow the community and motivates further development.
|
||||
🌟 If you find this project helpful or interesting, please consider giving it a star on GitHub! Your support helps to grow the community and motivates further development.
|
||||
|
||||
<p align="right">
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
</p>
|
||||
|
||||
@@ -13,3 +13,9 @@ extend-ignore-identifiers-re = [
|
||||
# Example in trivium
|
||||
"C9217BA0D762ACA1"
|
||||
]
|
||||
|
||||
[files]
|
||||
extend-exclude = [
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu",
|
||||
"backends/tfhe-cuda-backend/cuda/src/fft/twiddles.cu",
|
||||
]
|
||||
|
||||
@@ -6,11 +6,17 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rayon = { workspace = true }
|
||||
tfhe = { path = "../../tfhe", features = [ "boolean", "shortint", "integer" ] }
|
||||
rayon = "1"
|
||||
tfhe = { path = "../../tfhe", features = ["boolean", "shortint", "integer"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = [ "html_reports" ]}
|
||||
criterion = { version = "0.5.1", features = ["html_reports"] }
|
||||
|
||||
[profile.devo]
|
||||
inherits = "dev"
|
||||
opt-level = 3
|
||||
lto = "off"
|
||||
debug-assertions = false
|
||||
|
||||
[[bench]]
|
||||
name = "trivium"
|
||||
|
||||
@@ -129,7 +129,7 @@ Other sizes than 64 bit are expected to be available in the future.
|
||||
|
||||
# FHE shortint Trivium implementation
|
||||
|
||||
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
|
||||
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
|
||||
It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
|
||||
on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
|
||||
its setup a little more intricate.
|
||||
@@ -137,10 +137,10 @@ its setup a little more intricate.
|
||||
Example code:
|
||||
```rust
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::v1_1::{
|
||||
V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::{ConfigBuilder, generate_keys, FheUint64};
|
||||
use tfhe::prelude::*;
|
||||
@@ -148,17 +148,17 @@ use tfhe_trivium::TriviumStreamShortint;
|
||||
|
||||
fn test_shortint() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::v1_1::{
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
@@ -11,19 +11,19 @@ use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
|
||||
|
||||
pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -64,19 +64,19 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
|
||||
pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -112,19 +112,19 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
|
||||
pub fn kreyvium_shortint_trans(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::v1_1::{
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
@@ -11,19 +11,19 @@ use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
|
||||
|
||||
pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -64,19 +64,19 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
|
||||
pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -112,19 +112,19 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
|
||||
pub fn trivium_shortint_trans(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::v1_1::{
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo renaud1239/Kreyvium,
|
||||
@@ -66,7 +66,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
|
||||
assert!(a.len() % 8 == 0);
|
||||
let mut hexadecimal: String = "".to_string();
|
||||
for test in a {
|
||||
hexadecimal.push_str(&format!("{:02X?}", test));
|
||||
hexadecimal.push_str(&format!("{test:02X?}"));
|
||||
}
|
||||
hexadecimal
|
||||
}
|
||||
@@ -74,7 +74,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
|
||||
fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
|
||||
let mut hexadecimal: String = "".to_string();
|
||||
for test in a {
|
||||
hexadecimal.push_str(&format!("{:016X?}", test));
|
||||
hexadecimal.push_str(&format!("{test:016X?}"));
|
||||
}
|
||||
hexadecimal
|
||||
}
|
||||
@@ -221,19 +221,19 @@ use tfhe::shortint::prelude::*;
|
||||
#[test]
|
||||
fn kreyvium_test_shortint_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -55,7 +55,7 @@ impl<const N: usize, T> Index<usize> for StaticDeque<N, T> {
|
||||
/// 0 is youngest
|
||||
fn index(&self, i: usize) -> &T {
|
||||
if i >= N {
|
||||
panic!("Index {:?} too high for size {:?}", i, N);
|
||||
panic!("Index {i:?} too high for size {N:?}");
|
||||
}
|
||||
&self.arr[(N + self.cursor - i - 1) % N]
|
||||
}
|
||||
@@ -66,7 +66,7 @@ impl<const N: usize, T> IndexMut<usize> for StaticDeque<N, T> {
|
||||
/// 0 is youngest
|
||||
fn index_mut(&mut self, i: usize) -> &mut T {
|
||||
if i >= N {
|
||||
panic!("Index {:?} too high for size {:?}", i, N);
|
||||
panic!("Index {i:?} too high for size {N:?}");
|
||||
}
|
||||
&mut self.arr[(N + self.cursor - i - 1) % N]
|
||||
}
|
||||
|
||||
@@ -48,6 +48,8 @@ fn transcipher_from_1_1_stream(
|
||||
) -> FheUint64 {
|
||||
assert_eq!(stream.len(), 64);
|
||||
|
||||
let id_lut = internal_server_key.generate_lookup_table(|x| x);
|
||||
|
||||
let pairs = (0..32)
|
||||
.into_par_iter()
|
||||
.map(|i| {
|
||||
@@ -57,10 +59,11 @@ fn transcipher_from_1_1_stream(
|
||||
let b0 = &stream[8 * byte_idx + 2 * pair_idx];
|
||||
let b1 = &stream[8 * byte_idx + 2 * pair_idx + 1];
|
||||
|
||||
casting_key.cast(
|
||||
&internal_server_key
|
||||
.unchecked_add(b0, &internal_server_key.unchecked_scalar_mul(b1, 2)),
|
||||
)
|
||||
let mut combined = internal_server_key
|
||||
.unchecked_add(b0, &internal_server_key.unchecked_scalar_mul(b1, 2));
|
||||
internal_server_key.apply_lookup_table_assign(&mut combined, &id_lut);
|
||||
|
||||
casting_key.cast(&combined)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::v1_0::{
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
use tfhe::shortint::parameters::v1_1::{
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
|
||||
@@ -66,7 +66,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
|
||||
assert!(a.len() % 8 == 0);
|
||||
let mut hexadecimal: String = "".to_string();
|
||||
for test in a {
|
||||
hexadecimal.push_str(&format!("{:02X?}", test));
|
||||
hexadecimal.push_str(&format!("{test:02X?}"));
|
||||
}
|
||||
hexadecimal
|
||||
}
|
||||
@@ -74,7 +74,7 @@ fn get_hexagonal_string_from_bytes(a: Vec<u8>) -> String {
|
||||
fn get_hexagonal_string_from_u64(a: Vec<u64>) -> String {
|
||||
let mut hexadecimal: String = "".to_string();
|
||||
for test in a {
|
||||
hexadecimal.push_str(&format!("{:016X?}", test));
|
||||
hexadecimal.push_str(&format!("{test:016X?}"));
|
||||
}
|
||||
hexadecimal
|
||||
}
|
||||
@@ -357,19 +357,19 @@ use tfhe::shortint::prelude::*;
|
||||
#[test]
|
||||
fn trivium_test_shortint_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_0_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_1_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_0_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_1_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_0_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_1_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-cuda-backend"
|
||||
version = "0.8.0"
|
||||
version = "0.9.0"
|
||||
edition = "2021"
|
||||
authors = ["Zama team"]
|
||||
license = "BSD-3-Clause-Clear"
|
||||
|
||||
@@ -13,24 +13,26 @@ and forth between the CPU and the GPU, to create and destroy Cuda streams, etc.:
|
||||
- `cuda_get_number_of_gpus`
|
||||
- `cuda_synchronize_device`
|
||||
The cryptographic operations it provides are:
|
||||
- an amortized implementation of the TFHE programmable bootstrap: `cuda_bootstrap_amortized_lwe_ciphertext_vector_32` and `cuda_bootstrap_amortized_lwe_ciphertext_vector_64`
|
||||
- a low latency implementation of the TFHE programmable bootstrap: `cuda_bootstrap_low latency_lwe_ciphertext_vector_32` and `cuda_bootstrap_low_latency_lwe_ciphertext_vector_64`
|
||||
- the keyswitch: `cuda_keyswitch_lwe_ciphertext_vector_32` and `cuda_keyswitch_lwe_ciphertext_vector_64`
|
||||
- the larger precision programmable bootstrap (wop PBS, which supports up to 16 bits of message while the classical PBS only supports up to 8 bits of message) and its sub-components: `cuda_wop_pbs_64`, `cuda_extract_bits_64`, `cuda_circuit_bootstrap_64`, `cuda_cmux_tree_64`, `cuda_blind_rotation_sample_extraction_64`
|
||||
- acceleration for leveled operations: `cuda_negate_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_64`, `cuda_add_lwe_ciphertext_vector_plaintext_vector_64`, `cuda_mult_lwe_ciphertext_vector_cleartext_vector`.
|
||||
- an implementation of the classical TFHE programmable bootstrap,
|
||||
- an implementation of the multi-bit TFHE programmable bootstrap,
|
||||
- the keyswitch,
|
||||
- acceleration for leveled operations,
|
||||
- acceleration for arithmetics over encrypted integers of arbitrary size,
|
||||
- acceleration for integer compression/decompression.
|
||||
|
||||
## Dependencies
|
||||
|
||||
**Disclaimer**: Compilation on Windows/Mac is not supported yet. Only Nvidia GPUs are supported.
|
||||
|
||||
- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation
|
||||
- nvidia driver - for example, if you're running Ubuntu 20.04 check this [page](https://linuxconfig.org/how-to-install-the-nvidia-drivers-on-ubuntu-20-04-focal-fossa-linux) for installation. You need an Nvidia GPU with Compute Capability >= 3.0
|
||||
- [nvcc](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html) >= 10.0
|
||||
- [gcc](https://gcc.gnu.org/) >= 8.0 - check this [page](https://gist.github.com/ax3l/9489132) for more details about nvcc/gcc compatible versions
|
||||
- [cmake](https://cmake.org/) >= 3.24
|
||||
- libclang, to match Rust bingen [requirements](https://rust-lang.github.io/rust-bindgen/requirements.html) >= 9.0
|
||||
|
||||
## Build
|
||||
|
||||
The Cuda project held in `tfhe-cuda-backend` can be compiled independently from TFHE-rs in the following way:
|
||||
The Cuda project held in `tfhe-cuda-backend` can be compiled independently of TFHE-rs in the following way:
|
||||
```
|
||||
git clone git@github.com:zama-ai/tfhe-rs
|
||||
cd backends/tfhe-cuda-backend/cuda
|
||||
|
||||
@@ -62,6 +62,7 @@ fn main() {
|
||||
"cuda/include/integer/integer.h",
|
||||
"cuda/include/keyswitch.h",
|
||||
"cuda/include/linear_algebra.h",
|
||||
"cuda/include/fft/fft128.h",
|
||||
"cuda/include/pbs/programmable_bootstrap.h",
|
||||
"cuda/include/pbs/programmable_bootstrap_multibit.h",
|
||||
];
|
||||
@@ -73,7 +74,7 @@ fn main() {
|
||||
};
|
||||
let mut headers_modified = bindings_modified;
|
||||
for header in headers {
|
||||
println!("cargo:rerun-if-changed={}", header);
|
||||
println!("cargo:rerun-if-changed={header}");
|
||||
// Check modification times
|
||||
let header_modified = std::fs::metadata(header).unwrap().modified().unwrap();
|
||||
if header_modified > headers_modified {
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
#include "stdint.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *stream,
|
||||
uint32_t gpu_index,
|
||||
void *dest, void const *src,
|
||||
@@ -18,7 +19,18 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream,
|
||||
void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out, void const *glwe_array_in,
|
||||
uint32_t const *nth_array, uint32_t num_nths,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t lwe_per_glwe, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size);
|
||||
|
||||
void cuda_modulus_switch_inplace_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out, uint32_t size,
|
||||
uint32_t log_modulus);
|
||||
|
||||
void cuda_improve_noise_modulus_switch_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *encrypted_zeros, uint32_t lwe_size,
|
||||
uint32_t num_lwes, uint32_t num_zeros, double input_variance,
|
||||
double r_sigma, double bound, uint32_t log_modulus);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -52,13 +52,13 @@ void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
|
||||
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
@@ -76,7 +76,7 @@ void cuda_drop(void *ptr, uint32_t gpu_index);
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
|
||||
}
|
||||
|
||||
int cuda_get_max_shared_memory(uint32_t gpu_index);
|
||||
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index);
|
||||
|
||||
bool cuda_check_support_cooperative_groups();
|
||||
|
||||
|
||||
17
backends/tfhe-cuda-backend/cuda/include/fft/fft128.h
Normal file
17
backends/tfhe-cuda-backend/cuda/include/fft/fft128.h
Normal file
@@ -0,0 +1,17 @@
|
||||
#include <stdint.h>
|
||||
extern "C" {
|
||||
void cuda_fourier_transform_forward_as_torus_f128_async(
|
||||
void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
|
||||
void *im1, void const *standard, uint32_t const N,
|
||||
const uint32_t number_of_samples);
|
||||
|
||||
void cuda_fourier_transform_forward_as_integer_f128_async(
|
||||
void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
|
||||
void *im1, void const *standard, uint32_t const N,
|
||||
const uint32_t number_of_samples);
|
||||
|
||||
void cuda_fourier_transform_backward_as_torus_f128_async(
|
||||
void *stream, uint32_t gpu_index, void *standard, void const *re0,
|
||||
void const *re1, void const *im0, void const *im1, uint32_t const N,
|
||||
const uint32_t number_of_samples);
|
||||
}
|
||||
@@ -20,8 +20,8 @@ void scratch_cuda_integer_decompress_radix_ciphertext_64(
|
||||
uint32_t compression_polynomial_size, uint32_t lwe_dimension,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t storage_log_modulus, uint32_t body_count,
|
||||
bool allocate_gpu_memory);
|
||||
uint32_t storage_log_modulus, uint32_t body_count, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_compress_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
|
||||
@@ -102,9 +102,7 @@ template <typename Torus> struct int_decompression {
|
||||
// Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
|
||||
// space, we want to keep the original 2 bits value in the 4 bits space,
|
||||
// so we apply the identity and the encoding will rescale it for us.
|
||||
auto decompression_rescale_f = [encryption_params](Torus x) -> Torus {
|
||||
return x;
|
||||
};
|
||||
auto decompression_rescale_f = [](Torus x) -> Torus { return x; };
|
||||
|
||||
auto effective_compression_message_modulus =
|
||||
encryption_params.carry_modulus;
|
||||
|
||||
@@ -44,6 +44,7 @@ typedef struct {
|
||||
uint64_t *degrees;
|
||||
uint64_t *noise_levels;
|
||||
uint32_t num_radix_blocks;
|
||||
uint32_t max_num_radix_blocks;
|
||||
uint32_t lwe_dimension;
|
||||
} CudaRadixCiphertextFFI;
|
||||
|
||||
@@ -54,7 +55,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
uint64_t lut_degree, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
@@ -62,12 +63,15 @@ void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
uint32_t num_many_lut, uint64_t lut_degree, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array);
|
||||
void cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks);
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
void *const *bsks);
|
||||
|
||||
void cleanup_cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -81,15 +85,16 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
uint64_t lut_degree, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_1,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe_2, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_radix_blocks,
|
||||
uint32_t shift);
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
void *const *bsks, uint32_t num_radix_blocks, uint32_t shift);
|
||||
|
||||
void cleanup_cuda_apply_bivariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -100,8 +105,9 @@ void cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_luts,
|
||||
uint32_t lut_stride);
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
void *const *bsks, uint32_t num_luts, uint32_t lut_stride);
|
||||
|
||||
void scratch_cuda_full_propagation_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -109,13 +115,13 @@ void scratch_cuda_full_propagation_64(
|
||||
uint32_t polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_full_propagation_64_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *input_blocks,
|
||||
int8_t *mem_ptr, void *const *ksks,
|
||||
void *const *bsks, uint32_t num_blocks);
|
||||
void cuda_full_propagation_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *input_blocks, int8_t *mem_ptr, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
void *const *bsks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_full_propagation(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -128,14 +134,16 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log,
|
||||
uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void const *radix_lwe_left, bool const is_bool_left,
|
||||
void const *radix_lwe_right, bool const is_bool_right, void *const *bsks,
|
||||
void *const *ksks, int8_t *mem_ptr, uint32_t polynomial_size,
|
||||
uint32_t num_blocks);
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI const *radix_lwe_left, bool const is_bool_left,
|
||||
CudaRadixCiphertextFFI const *radix_lwe_right, bool const is_bool_right,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
int8_t *mem_ptr, uint32_t polynomial_size, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_mult(void *const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -145,12 +153,13 @@ void cuda_negate_integer_radix_ciphertext_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, uint32_t message_modulus,
|
||||
uint32_t carry_modulus);
|
||||
uint32_t carry_modulus, uint32_t num_radix_blocks);
|
||||
|
||||
void cuda_scalar_addition_integer_radix_ciphertext_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, void const *scalar_input,
|
||||
uint32_t num_scalars, uint32_t message_modulus, uint32_t carry_modulus);
|
||||
void const *h_scalar_input, uint32_t num_scalars, uint32_t message_modulus,
|
||||
uint32_t carry_modulus);
|
||||
|
||||
void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -159,12 +168,13 @@ void scratch_cuda_integer_radix_logical_scalar_shift_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_radix_logical_scalar_shift_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -173,12 +183,13 @@ void scratch_cuda_integer_radix_arithmetic_scalar_shift_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_radix_arithmetic_scalar_shift_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint32_t shift, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_radix_logical_scalar_shift(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -195,12 +206,13 @@ void scratch_cuda_integer_radix_shift_and_rotate_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool is_signed, bool allocate_gpu_memory);
|
||||
bool is_signed, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_radix_shift_and_rotate_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI const *lwe_shift,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks);
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_radix_shift_and_rotate(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -214,19 +226,25 @@ void scratch_cuda_integer_radix_comparison_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
COMPARISON_TYPE op_type, bool is_signed, bool allocate_gpu_memory);
|
||||
COMPARISON_TYPE op_type, bool is_signed, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array);
|
||||
|
||||
void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t lwe_ciphertext_count);
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, void const *scalar_blocks,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t lwe_ciphertext_count, uint32_t num_scalar_blocks);
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, void const *scalar_blocks,
|
||||
void const *h_scalar_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t num_scalar_blocks);
|
||||
|
||||
void cleanup_cuda_integer_comparison(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -239,21 +257,23 @@ void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
BITOP_TYPE op_type, bool allocate_gpu_memory);
|
||||
BITOP_TYPE op_type, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cuda_scalar_bitop_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input, void const *clear_blocks,
|
||||
uint32_t num_clear_blocks, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
void const *h_clear_blocks, uint32_t num_clear_blocks, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -266,7 +286,7 @@ void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -274,7 +294,8 @@ void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -287,12 +308,13 @@ void scratch_cuda_integer_radix_scalar_rotate_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, SHIFT_OR_ROTATE_TYPE shift_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_radix_scalar_rotate_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint32_t n, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_radix_scalar_rotate(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -306,7 +328,7 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory);
|
||||
uint32_t uses_carry, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -315,19 +337,22 @@ void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory);
|
||||
uint32_t uses_carry, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lwe_array, CudaRadixCiphertextFFI *carry_out,
|
||||
const CudaRadixCiphertextFFI *carry_in, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t requested_flag, uint32_t uses_carry);
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
@@ -347,15 +372,16 @@ void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t compute_overflow,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
|
||||
CudaRadixCiphertextFFI *overflow_block,
|
||||
const CudaRadixCiphertextFFI *input_borrow, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow);
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t compute_overflow, uint32_t uses_input_borrow);
|
||||
|
||||
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -369,13 +395,14 @@ void scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks_in_radix, uint32_t max_num_radix_in_vec,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void *radix_lwe_vec, uint32_t num_radix_in_vec,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks_in_radix);
|
||||
CudaRadixCiphertextFFI *radix_lwe_out,
|
||||
CudaRadixCiphertextFFI *radix_lwe_vec, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -387,14 +414,15 @@ void scratch_cuda_integer_scalar_mul_kb_64(
|
||||
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_scalar_multiplication_integer_radix_ciphertext_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array, uint64_t const *decomposed_scalar,
|
||||
CudaRadixCiphertextFFI *lwe_array, uint64_t const *decomposed_scalar,
|
||||
uint64_t const *has_at_least_one_set, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t lwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t message_modulus, uint32_t num_blocks, uint32_t num_scalars);
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t polynomial_size, uint32_t message_modulus, uint32_t num_scalars);
|
||||
|
||||
void cleanup_cuda_integer_radix_scalar_mul(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -408,13 +436,15 @@ void scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *quotient, void *remainder, void const *numerator, void const *divisor,
|
||||
bool is_signed, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks_in_radix);
|
||||
CudaRadixCiphertextFFI *quotient, CudaRadixCiphertextFFI *remainder,
|
||||
CudaRadixCiphertextFFI const *numerator,
|
||||
CudaRadixCiphertextFFI const *divisor, bool is_signed, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_div_rem(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -427,13 +457,15 @@ void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint64_t lut_degree, bool allocate_gpu_memory);
|
||||
uint64_t lut_degree, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *output_radix_lwe,
|
||||
CudaRadixCiphertextFFI *generates_or_propagates, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks);
|
||||
void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
void *const *bsks, uint32_t num_blocks);
|
||||
|
||||
void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -451,12 +483,13 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
|
||||
void *const *bsks, void *const *ksks);
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
|
||||
|
||||
void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
@@ -470,12 +503,15 @@ void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t num_radix_blocks);
|
||||
|
||||
void cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -488,12 +524,15 @@ void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t num_radix_blocks);
|
||||
|
||||
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,8 +1,11 @@
|
||||
#ifndef CUDA_RADIX_CIPHERTEXT_H
|
||||
#define CUDA_RADIX_CIPHERTEXT_H
|
||||
|
||||
void release_radix_ciphertext(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *data);
|
||||
void release_radix_ciphertext_async(cudaStream_t const stream,
|
||||
uint32_t const gpu_index,
|
||||
CudaRadixCiphertextFFI *data);
|
||||
|
||||
void reset_radix_ciphertext_blocks(CudaRadixCiphertextFFI *data,
|
||||
uint32_t new_num_blocks);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,7 +1,17 @@
|
||||
#ifndef CUDA_PBS_ENUMS_H
|
||||
#define CUDA_PBS_ENUMS_H
|
||||
|
||||
#include <stdint.h>
|
||||
enum PBS_TYPE { MULTI_BIT = 0, CLASSICAL = 1 };
|
||||
enum PBS_VARIANT { DEFAULT = 0, CG = 1, TBC = 2 };
|
||||
|
||||
extern "C" {
|
||||
typedef struct {
|
||||
void *const *ptr;
|
||||
uint32_t num_zeros;
|
||||
double ms_bound;
|
||||
double ms_r_sigma;
|
||||
double ms_input_variance;
|
||||
} CudaModulusSwitchNoiseReductionKeyFFI;
|
||||
}
|
||||
|
||||
#endif // CUDA_PBS_ENUMS_H
|
||||
|
||||
@@ -5,12 +5,12 @@
|
||||
|
||||
template <typename Torus>
|
||||
bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size, int max_shared_memory);
|
||||
uint32_t polynomial_size, uint32_t max_shared_memory);
|
||||
|
||||
template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
|
||||
uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, int max_shared_memory);
|
||||
uint32_t level_count, uint32_t max_shared_memory);
|
||||
|
||||
#if CUDA_ARCH >= 900
|
||||
template <typename Torus>
|
||||
|
||||
@@ -9,20 +9,26 @@
|
||||
template <typename Torus>
|
||||
uint64_t get_buffer_size_full_sm_programmable_bootstrap_step_one(
|
||||
uint32_t polynomial_size) {
|
||||
return sizeof(Torus) * polynomial_size + // accumulator_rotated
|
||||
sizeof(double2) * polynomial_size / 2; // accumulator fft
|
||||
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
|
||||
return sizeof(Torus) * polynomial_size + // accumulator_rotated
|
||||
sizeof(double) * 2 * double_count * polynomial_size /
|
||||
2; // accumulator fft
|
||||
}
|
||||
template <typename Torus>
|
||||
uint64_t get_buffer_size_full_sm_programmable_bootstrap_step_two(
|
||||
uint32_t polynomial_size) {
|
||||
return sizeof(Torus) * polynomial_size + // accumulator
|
||||
sizeof(double2) * polynomial_size / 2; // accumulator fft
|
||||
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
|
||||
return sizeof(Torus) * polynomial_size + // accumulator
|
||||
sizeof(double) * 2 * double_count * polynomial_size /
|
||||
2; // accumulator fft
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
uint64_t
|
||||
get_buffer_size_partial_sm_programmable_bootstrap(uint32_t polynomial_size) {
|
||||
return sizeof(double2) * polynomial_size / 2; // accumulator fft
|
||||
size_t double_count = (sizeof(Torus) == 16) ? 2 : 1;
|
||||
return sizeof(double) * 2 * double_count * polynomial_size /
|
||||
2; // accumulator fft
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -61,7 +67,7 @@ get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
|
||||
|
||||
template <typename Torus>
|
||||
bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
|
||||
uint32_t polynomial_size, int max_shared_memory);
|
||||
uint32_t polynomial_size, uint32_t max_shared_memory);
|
||||
|
||||
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;
|
||||
|
||||
@@ -70,18 +76,26 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
|
||||
Torus *global_accumulator;
|
||||
double2 *global_join_buffer;
|
||||
Torus *temp_lwe_array_in;
|
||||
|
||||
PBS_VARIANT pbs_variant;
|
||||
bool uses_noise_reduction;
|
||||
|
||||
pbs_buffer(cudaStream_t stream, uint32_t gpu_index, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
|
||||
bool allocate_gpu_memory) {
|
||||
pbs_buffer(cudaStream_t stream, uint32_t gpu_index, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
PBS_VARIANT pbs_variant, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array) {
|
||||
cuda_set_device(gpu_index);
|
||||
this->uses_noise_reduction = allocate_ms_array;
|
||||
this->pbs_variant = pbs_variant;
|
||||
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
|
||||
if (allocate_ms_array) {
|
||||
this->temp_lwe_array_in = (Torus *)cuda_malloc_async(
|
||||
(lwe_dimension + 1) * input_lwe_ciphertext_count * sizeof(Torus),
|
||||
stream, gpu_index);
|
||||
}
|
||||
if (allocate_gpu_memory) {
|
||||
switch (pbs_variant) {
|
||||
case PBS_VARIANT::DEFAULT: {
|
||||
@@ -212,6 +226,170 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
|
||||
if (pbs_variant == DEFAULT)
|
||||
cuda_drop_async(global_accumulator, stream, gpu_index);
|
||||
|
||||
if (uses_noise_reduction)
|
||||
cuda_drop_async(temp_lwe_array_in, stream, gpu_index);
|
||||
}
|
||||
};
|
||||
|
||||
template <PBS_TYPE pbs_type> struct pbs_buffer_128;
|
||||
|
||||
template <> struct pbs_buffer_128<PBS_TYPE::CLASSICAL> {
|
||||
int8_t *d_mem;
|
||||
|
||||
__uint128_t *global_accumulator;
|
||||
double *global_join_buffer;
|
||||
__uint128_t *temp_lwe_array_in;
|
||||
|
||||
PBS_VARIANT pbs_variant;
|
||||
bool uses_noise_reduction;
|
||||
|
||||
pbs_buffer_128(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
|
||||
bool allocate_gpu_memory, bool allocate_ms_array) {
|
||||
cuda_set_device(gpu_index);
|
||||
this->pbs_variant = pbs_variant;
|
||||
this->uses_noise_reduction = allocate_ms_array;
|
||||
if (allocate_ms_array) {
|
||||
this->temp_lwe_array_in = (__uint128_t *)cuda_malloc_async(
|
||||
(lwe_dimension + 1) * input_lwe_ciphertext_count *
|
||||
sizeof(__uint128_t),
|
||||
stream, gpu_index);
|
||||
}
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
size_t global_join_buffer_size = (glwe_dimension + 1) * level_count *
|
||||
input_lwe_ciphertext_count *
|
||||
polynomial_size / 2 * sizeof(double) * 4;
|
||||
|
||||
if (allocate_gpu_memory) {
|
||||
switch (pbs_variant) {
|
||||
case PBS_VARIANT::DEFAULT: {
|
||||
uint64_t full_sm_step_one =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_step_one<
|
||||
__uint128_t>(polynomial_size);
|
||||
uint64_t full_sm_step_two =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_step_two<
|
||||
__uint128_t>(polynomial_size);
|
||||
uint64_t partial_sm =
|
||||
get_buffer_size_partial_sm_programmable_bootstrap<__uint128_t>(
|
||||
polynomial_size);
|
||||
|
||||
uint64_t partial_dm_step_one = full_sm_step_one - partial_sm;
|
||||
uint64_t partial_dm_step_two = full_sm_step_two - partial_sm;
|
||||
uint64_t full_dm = full_sm_step_one;
|
||||
|
||||
uint64_t device_mem = 0;
|
||||
if (max_shared_memory < partial_sm) {
|
||||
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
|
||||
(glwe_dimension + 1);
|
||||
} else if (max_shared_memory < full_sm_step_two) {
|
||||
device_mem =
|
||||
(partial_dm_step_two + partial_dm_step_one * level_count) *
|
||||
input_lwe_ciphertext_count * (glwe_dimension + 1);
|
||||
} else if (max_shared_memory < full_sm_step_one) {
|
||||
device_mem = partial_dm_step_one * input_lwe_ciphertext_count *
|
||||
level_count * (glwe_dimension + 1);
|
||||
}
|
||||
// Otherwise, both kernels run all in shared memory
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream, gpu_index);
|
||||
|
||||
global_join_buffer = (double *)cuda_malloc_async(
|
||||
global_join_buffer_size, stream, gpu_index);
|
||||
|
||||
global_accumulator = (__uint128_t *)cuda_malloc_async(
|
||||
(glwe_dimension + 1) * input_lwe_ciphertext_count *
|
||||
polynomial_size * sizeof(__uint128_t),
|
||||
stream, gpu_index);
|
||||
} break;
|
||||
case PBS_VARIANT::CG: {
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_cg<__uint128_t>(
|
||||
polynomial_size);
|
||||
uint64_t partial_sm =
|
||||
get_buffer_size_partial_sm_programmable_bootstrap_cg<__uint128_t>(
|
||||
polynomial_size);
|
||||
|
||||
uint64_t partial_dm = full_sm - partial_sm;
|
||||
uint64_t full_dm = full_sm;
|
||||
uint64_t device_mem = 0;
|
||||
|
||||
if (max_shared_memory < partial_sm) {
|
||||
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
|
||||
(glwe_dimension + 1);
|
||||
} else if (max_shared_memory < full_sm) {
|
||||
device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
|
||||
(glwe_dimension + 1);
|
||||
}
|
||||
|
||||
// Otherwise, both kernels run all in shared memory
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream, gpu_index);
|
||||
|
||||
global_join_buffer = (double *)cuda_malloc_async(
|
||||
global_join_buffer_size, stream, gpu_index);
|
||||
} break;
|
||||
#if CUDA_ARCH >= 900
|
||||
case PBS_VARIANT::TBC: {
|
||||
|
||||
bool supports_dsm =
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
|
||||
__uint128_t>(polynomial_size, max_shared_memory);
|
||||
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_tbc<__uint128_t>(
|
||||
polynomial_size);
|
||||
uint64_t partial_sm =
|
||||
get_buffer_size_partial_sm_programmable_bootstrap_tbc<__uint128_t>(
|
||||
polynomial_size);
|
||||
uint64_t minimum_sm_tbc = 0;
|
||||
if (supports_dsm)
|
||||
minimum_sm_tbc =
|
||||
get_buffer_size_sm_dsm_plus_tbc_classic_programmable_bootstrap<
|
||||
__uint128_t>(polynomial_size);
|
||||
|
||||
uint64_t partial_dm = full_sm - partial_sm;
|
||||
uint64_t full_dm = full_sm;
|
||||
uint64_t device_mem = 0;
|
||||
|
||||
// There is a minimum amount of memory we need to run the TBC PBS, which
|
||||
// is minimum_sm_tbc. We know that minimum_sm_tbc bytes are available
|
||||
// because otherwise the previous check would have redirected
|
||||
// computation to some other variant. If over that we don't have more
|
||||
// partial_sm bytes, TBC PBS will run on NOSM. If we have partial_sm but
|
||||
// not full_sm bytes, it will run on PARTIALSM. Otherwise, FULLSM.
|
||||
//
|
||||
// NOSM mode actually requires minimum_sm_tbc shared memory bytes.
|
||||
if (max_shared_memory < partial_sm + minimum_sm_tbc) {
|
||||
device_mem = full_dm * input_lwe_ciphertext_count * level_count *
|
||||
(glwe_dimension + 1);
|
||||
} else if (max_shared_memory < full_sm + minimum_sm_tbc) {
|
||||
device_mem = partial_dm * input_lwe_ciphertext_count * level_count *
|
||||
(glwe_dimension + 1);
|
||||
}
|
||||
|
||||
// Otherwise, both kernels run all in shared memory
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream, gpu_index);
|
||||
|
||||
global_join_buffer = (double *)cuda_malloc_async(
|
||||
global_join_buffer_size, stream, gpu_index);
|
||||
} break;
|
||||
#endif
|
||||
default:
|
||||
PANIC("Cuda error (PBS): unsupported implementation variant.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void release(cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_drop_async(d_mem, stream, gpu_index);
|
||||
cuda_drop_async(global_join_buffer, stream, gpu_index);
|
||||
|
||||
if (pbs_variant == DEFAULT)
|
||||
cuda_drop_async(global_accumulator, stream, gpu_index);
|
||||
|
||||
if (uses_noise_reduction)
|
||||
cuda_drop_async(temp_lwe_array_in, stream, gpu_index);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -245,7 +423,7 @@ bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count,
|
||||
uint32_t num_samples,
|
||||
int max_shared_memory);
|
||||
uint32_t max_shared_memory);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
@@ -284,21 +462,24 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
template <typename Torus>
|
||||
void scratch_cuda_programmable_bootstrap_tbc(
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
#endif
|
||||
|
||||
template <typename Torus>
|
||||
void scratch_cuda_programmable_bootstrap_cg(
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
template <typename Torus>
|
||||
void scratch_cuda_programmable_bootstrap(
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
bool allocate_gpu_memory, bool allocate_ms_array);
|
||||
|
||||
template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_tbc(uint32_t num_samples,
|
||||
|
||||
@@ -20,6 +20,11 @@ void cuda_convert_lwe_programmable_bootstrap_key_64(
|
||||
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
|
||||
uint32_t polynomial_size);
|
||||
|
||||
void cuda_convert_lwe_programmable_bootstrap_key_128(
|
||||
void *stream, uint32_t gpu_index, void *dest, void const *src,
|
||||
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
|
||||
uint32_t polynomial_size);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_amortized_32(
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
@@ -53,14 +58,22 @@ void cleanup_cuda_programmable_bootstrap_amortized(void *stream,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_32(
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_64(
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_128(
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array);
|
||||
|
||||
void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
@@ -76,11 +89,24 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_output_indexes, void const *lut_vector,
|
||||
void const *lut_vector_indexes, void const *lwe_array_in,
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
void cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lut_vector, void const *lwe_array_in,
|
||||
void const *bootstrapping_key,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples);
|
||||
|
||||
void cleanup_cuda_programmable_bootstrap(void *stream, uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
void cleanup_cuda_programmable_bootstrap_128(void *stream, uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
}
|
||||
#endif // CUDA_BOOTSTRAP_H
|
||||
|
||||
@@ -8,7 +8,7 @@ extern "C" {
|
||||
|
||||
bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t num_samples, int max_shared_memory);
|
||||
uint32_t num_samples, uint32_t max_shared_memory);
|
||||
|
||||
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
|
||||
void *stream, uint32_t gpu_index, void *dest, void const *src,
|
||||
|
||||
@@ -24,7 +24,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream,
|
||||
void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out, void const *glwe_array_in,
|
||||
uint32_t const *nth_array, uint32_t num_nths,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t lwe_per_glwe, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
@@ -32,46 +32,67 @@ void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
|
||||
host_sample_extract<uint64_t, AmortizedDegree<256>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
|
||||
glwe_dimension);
|
||||
lwe_per_glwe, glwe_dimension);
|
||||
break;
|
||||
case 512:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
|
||||
glwe_dimension);
|
||||
lwe_per_glwe, glwe_dimension);
|
||||
break;
|
||||
case 1024:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
|
||||
glwe_dimension);
|
||||
lwe_per_glwe, glwe_dimension);
|
||||
break;
|
||||
case 2048:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<2048>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
|
||||
glwe_dimension);
|
||||
lwe_per_glwe, glwe_dimension);
|
||||
break;
|
||||
case 4096:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<4096>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
|
||||
glwe_dimension);
|
||||
lwe_per_glwe, glwe_dimension);
|
||||
break;
|
||||
case 8192:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<8192>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
|
||||
glwe_dimension);
|
||||
lwe_per_glwe, glwe_dimension);
|
||||
break;
|
||||
case 16384:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<16384>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t const *)glwe_array_in, (uint32_t const *)nth_array, num_nths,
|
||||
glwe_dimension);
|
||||
lwe_per_glwe, glwe_dimension);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error: unsupported polynomial size. Supported "
|
||||
"N's are powers of two in the interval [256..16384].")
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_modulus_switch_inplace_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out, uint32_t size,
|
||||
uint32_t log_modulus) {
|
||||
host_modulus_switch_inplace<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out), size, log_modulus);
|
||||
}
|
||||
|
||||
void cuda_improve_noise_modulus_switch_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *encrypted_zeros, uint32_t lwe_size,
|
||||
uint32_t num_lwes, uint32_t num_zeros, double input_variance,
|
||||
double r_sigma, double bound, uint32_t log_modulus) {
|
||||
host_improve_noise_modulus_switch<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t const *>(lwe_array_in),
|
||||
static_cast<const uint64_t *>(encrypted_zeros), lwe_size, num_lwes,
|
||||
num_zeros, input_variance, r_sigma, bound, log_modulus);
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
|
||||
|
||||
template <typename Torus, class params>
|
||||
__global__ void sample_extract(Torus *lwe_array_out, Torus const *glwe_array_in,
|
||||
uint32_t const *nth_array,
|
||||
uint32_t const *nth_array, uint32_t lwe_per_glwe,
|
||||
uint32_t glwe_dimension) {
|
||||
|
||||
const int input_id = blockIdx.x;
|
||||
@@ -39,28 +39,28 @@ __global__ void sample_extract(Torus *lwe_array_out, Torus const *glwe_array_in,
|
||||
auto lwe_out = lwe_array_out + input_id * lwe_output_size;
|
||||
|
||||
// We assume each GLWE will store the first polynomial_size inputs
|
||||
uint32_t lwe_per_glwe = params::degree;
|
||||
auto glwe_in = glwe_array_in + (input_id / lwe_per_glwe) * glwe_input_size;
|
||||
|
||||
// nth is ensured to be in [0, lwe_per_glwe)
|
||||
auto nth = nth_array[input_id] % lwe_per_glwe;
|
||||
// nth is ensured to be in [0, params::degree)
|
||||
auto nth = nth_array[input_id] % params::degree;
|
||||
|
||||
sample_extract_mask<Torus, params>(lwe_out, glwe_in, glwe_dimension, nth);
|
||||
sample_extract_body<Torus, params>(lwe_out, glwe_in, glwe_dimension, nth);
|
||||
}
|
||||
|
||||
// lwe_per_glwe LWEs will be extracted per GLWE ciphertext, thus we need to have
|
||||
// enough indexes
|
||||
template <typename Torus, class params>
|
||||
__host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *lwe_array_out,
|
||||
Torus const *glwe_array_in,
|
||||
uint32_t const *nth_array, uint32_t num_nths,
|
||||
uint32_t glwe_dimension) {
|
||||
__host__ void
|
||||
host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *lwe_array_out, Torus const *glwe_array_in,
|
||||
uint32_t const *nth_array, uint32_t num_nths,
|
||||
uint32_t lwe_per_glwe, uint32_t glwe_dimension) {
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
dim3 grid(num_nths);
|
||||
dim3 thds(params::degree / params::opt);
|
||||
sample_extract<Torus, params><<<grid, thds, 0, stream>>>(
|
||||
lwe_array_out, glwe_array_in, nth_array, glwe_dimension);
|
||||
lwe_array_out, glwe_array_in, nth_array, lwe_per_glwe, glwe_dimension);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#define CNCRT_FAST_KS_CUH
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "gadget.cuh"
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
#include "crypto/torus.cuh"
|
||||
#include "device.h"
|
||||
#include "fft128/f128.cuh"
|
||||
#include <cstdint>
|
||||
|
||||
/**
|
||||
@@ -42,6 +43,13 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
__device__ void decompose_and_compress_next_128(double *result) {
|
||||
for (int j = 0; j < num_poly; j++) {
|
||||
auto result_slice = result + j * params::degree / 2 * 4;
|
||||
decompose_and_compress_next_polynomial_128(result_slice, j);
|
||||
}
|
||||
}
|
||||
|
||||
// Decomposes a single polynomial
|
||||
__device__ void decompose_and_compress_next_polynomial(double2 *result,
|
||||
int j) {
|
||||
@@ -75,10 +83,58 @@ public:
|
||||
synchronize_threads_in_block();
|
||||
}
|
||||
|
||||
// Decomposes a single polynomial
|
||||
__device__ void decompose_and_compress_next_polynomial_128(double *result,
|
||||
int j) {
|
||||
uint32_t tid = threadIdx.x;
|
||||
auto state_slice = &state[j * params::degree];
|
||||
for (int i = 0; i < params::opt / 2; i++) {
|
||||
auto input1 = &state_slice[tid];
|
||||
auto input2 = &state_slice[tid + params::degree / 2];
|
||||
T res_re = *input1 & mask_mod_b;
|
||||
T res_im = *input2 & mask_mod_b;
|
||||
|
||||
*input1 >>= base_log; // Update state
|
||||
*input2 >>= base_log; // Update state
|
||||
|
||||
T carry_re = ((res_re - 1ll) | *input1) & res_re;
|
||||
T carry_im = ((res_im - 1ll) | *input2) & res_im;
|
||||
carry_re >>= (base_log - 1);
|
||||
carry_im >>= (base_log - 1);
|
||||
|
||||
*input1 += carry_re; // Update state
|
||||
*input2 += carry_im; // Update state
|
||||
|
||||
res_re -= carry_re << base_log;
|
||||
res_im -= carry_im << base_log;
|
||||
|
||||
auto out_re = u128_to_signed_to_f128(res_re);
|
||||
auto out_im = u128_to_signed_to_f128(res_im);
|
||||
|
||||
auto out_re_hi = result + 0 * params::degree / 2;
|
||||
auto out_re_lo = result + 1 * params::degree / 2;
|
||||
auto out_im_hi = result + 2 * params::degree / 2;
|
||||
auto out_im_lo = result + 3 * params::degree / 2;
|
||||
|
||||
out_re_hi[tid] = out_re.hi;
|
||||
out_re_lo[tid] = out_re.lo;
|
||||
out_im_hi[tid] = out_im.hi;
|
||||
out_im_lo[tid] = out_im.lo;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
synchronize_threads_in_block();
|
||||
}
|
||||
|
||||
__device__ void decompose_and_compress_level(double2 *result, int level) {
|
||||
for (int i = 0; i < level_count - level; i++)
|
||||
decompose_and_compress_next(result);
|
||||
}
|
||||
|
||||
__device__ void decompose_and_compress_level_128(double *result, int level) {
|
||||
for (int i = 0; i < level_count - level; i++)
|
||||
decompose_and_compress_next_128(result);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
#ifndef CNCRT_TORUS_CUH
|
||||
#define CNCRT_TORUS_CUH
|
||||
|
||||
#include "ciphertext.h"
|
||||
#include "device.h"
|
||||
#include "helper_multi_gpu.h"
|
||||
#include "polynomial/parameters.cuh"
|
||||
#include "types/int128.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
@@ -115,8 +117,180 @@ __host__ void host_modulus_switch_inplace(cudaStream_t stream,
|
||||
int num_threads = 0, num_blocks = 0;
|
||||
getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);
|
||||
|
||||
modulus_switch_inplace<<<num_blocks, num_threads, 0, stream>>>(array, size,
|
||||
log_modulus);
|
||||
modulus_switch_inplace<Torus>
|
||||
<<<num_blocks, num_threads, 0, stream>>>(array, size, log_modulus);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ double round_error_double(T input,
|
||||
uint32_t log_modulus) {
|
||||
T rounded;
|
||||
constexpr uint32_t BITS = sizeof(T) * 8;
|
||||
modulus_switch<T>(input, rounded, log_modulus);
|
||||
rounded <<= (BITS - log_modulus);
|
||||
rounded -= input;
|
||||
return __ll2double_rn((int64_t)rounded);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ double measure_modulus_switch_noise(
|
||||
T input1, T input2, uint32_t log_modulus, uint32_t lwe_size,
|
||||
double *sum_mask_errors, double *sum_squared_mask_errors, double *body,
|
||||
double input_variance, double r_sigma, double bound) {
|
||||
|
||||
double input_double1 = round_error_double<T>(input1, log_modulus);
|
||||
double input_double2 = round_error_double<T>(input2, log_modulus);
|
||||
|
||||
if (threadIdx.x + blockDim.x == lwe_size - 1) {
|
||||
body[0] = input_double2;
|
||||
}
|
||||
// Here we are assuming that lwe is at least 512 so all threads will work
|
||||
sum_mask_errors[threadIdx.x] = input_double1;
|
||||
sum_squared_mask_errors[threadIdx.x] = input_double1 * input_double1;
|
||||
|
||||
if (threadIdx.x + blockDim.x < lwe_size - 1) {
|
||||
sum_mask_errors[threadIdx.x] += input_double2;
|
||||
sum_squared_mask_errors[threadIdx.x] += input_double2 * input_double2;
|
||||
}
|
||||
|
||||
// We need to perform a reduction to get the expectancy and variance
|
||||
for (int offset = blockDim.x / 2; offset > 0; offset /= 2) {
|
||||
__syncthreads();
|
||||
if (threadIdx.x < offset) {
|
||||
sum_mask_errors[threadIdx.x] += sum_mask_errors[threadIdx.x + offset];
|
||||
sum_squared_mask_errors[threadIdx.x] +=
|
||||
sum_squared_mask_errors[threadIdx.x + offset];
|
||||
}
|
||||
}
|
||||
|
||||
// Thread 0 has the sum of the mask errors and calculates the noise
|
||||
double noise = 0;
|
||||
if (threadIdx.x == 0) {
|
||||
double expectancy = body[threadIdx.x] - sum_mask_errors[threadIdx.x] / 2.0f;
|
||||
double variance = sum_squared_mask_errors[threadIdx.x] / 4.0f;
|
||||
double std_dev = sqrt(variance + input_variance);
|
||||
noise = abs(expectancy) + std_dev * r_sigma;
|
||||
}
|
||||
__syncthreads();
|
||||
return noise; // only thread 0 will return the correct noise
|
||||
}
|
||||
|
||||
// Each thread processes two elements of the lwe array
|
||||
template <typename Torus>
|
||||
__global__ void
|
||||
improve_noise_modulus_switch(Torus *array_out, const Torus *array_in,
|
||||
const Torus *zeros, int lwe_size, int num_zeros,
|
||||
double input_variance, double r_sigma,
|
||||
double bound, uint32_t log_modulus) {
|
||||
|
||||
// First we will assume size is less than the number of threads per block
|
||||
// I should switch this to dynamic shared memory
|
||||
__shared__ double sum_mask_errors[512];
|
||||
__shared__ double sum_squared_mask_errors[512];
|
||||
__shared__ double body[1];
|
||||
__shared__ bool found;
|
||||
|
||||
// We need to initialize the shared memory
|
||||
if (threadIdx.x == 0)
|
||||
found = false;
|
||||
__syncthreads();
|
||||
// This probably are not needed cause we are setting the values
|
||||
sum_mask_errors[threadIdx.x] = 0.f;
|
||||
sum_squared_mask_errors[threadIdx.x] = 0.f;
|
||||
|
||||
Torus input_element1 = array_in[threadIdx.x + blockIdx.x * lwe_size];
|
||||
|
||||
Torus input_element2 =
|
||||
threadIdx.x + blockDim.x < lwe_size
|
||||
? array_in[threadIdx.x + blockDim.x + blockIdx.x * lwe_size]
|
||||
: 0;
|
||||
|
||||
// Base noise is only handled by thread 0
|
||||
double base_noise = measure_modulus_switch_noise<Torus>(
|
||||
input_element1, input_element2, log_modulus, lwe_size, sum_mask_errors,
|
||||
sum_squared_mask_errors, body, input_variance, r_sigma, bound);
|
||||
|
||||
// If the noise is less than the bound we can just copy the input
|
||||
if (base_noise <= bound && threadIdx.x == 0) {
|
||||
found = true;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
if (found)
|
||||
array_out[threadIdx.x + blockIdx.x * lwe_size] = input_element1;
|
||||
|
||||
if (found && (threadIdx.x + blockDim.x) < lwe_size)
|
||||
array_out[threadIdx.x + blockDim.x + blockIdx.x * lwe_size] =
|
||||
input_element2;
|
||||
|
||||
__syncthreads();
|
||||
// If we found a zero element we stop iterating (in avg 20 times are
|
||||
// required)
|
||||
if (found)
|
||||
return;
|
||||
|
||||
// Now we need to start testing the other zero_elements
|
||||
for (int index = 0; index < num_zeros; index++) {
|
||||
|
||||
Torus zero_element1 =
|
||||
zeros[threadIdx.x + index * lwe_size] + input_element1;
|
||||
Torus zero_element2 =
|
||||
threadIdx.x + blockDim.x < lwe_size
|
||||
? zeros[threadIdx.x + blockDim.x + index * lwe_size] +
|
||||
input_element2
|
||||
: 0;
|
||||
// Index noise is only handled by thread 0
|
||||
// Measuring the potential noise is costly cause requires a reduction
|
||||
double index_noise = measure_modulus_switch_noise<Torus>(
|
||||
zero_element1, zero_element2, log_modulus, lwe_size, sum_mask_errors,
|
||||
sum_squared_mask_errors, body, input_variance, r_sigma, bound);
|
||||
|
||||
if (index_noise <= bound && threadIdx.x == 0) {
|
||||
found = true;
|
||||
}
|
||||
__syncthreads();
|
||||
// Assumption we always have at least 512 elements
|
||||
// If we find a useful zero encryption we replace the lwe by lwe + zero
|
||||
if (found)
|
||||
array_out[threadIdx.x + blockIdx.x * lwe_size] = zero_element1;
|
||||
|
||||
if (found && (threadIdx.x + blockDim.x) < lwe_size)
|
||||
array_out[threadIdx.x + blockDim.x + blockIdx.x * lwe_size] =
|
||||
zero_element2;
|
||||
|
||||
__syncthreads();
|
||||
// If we found a zero element we stop iterating (in avg 20 times are
|
||||
// required)
|
||||
if (found)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_improve_noise_modulus_switch(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *array_out,
|
||||
Torus const *array_in, const Torus *zeros, uint32_t lwe_size,
|
||||
uint32_t num_lwes, const uint32_t num_zeros, const double input_variance,
|
||||
const double r_sigma, const double bound, uint32_t log_modulus) {
|
||||
|
||||
if (lwe_size < 512) {
|
||||
PANIC("The lwe_size is less than 512, this is not supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (lwe_size > 1024) {
|
||||
PANIC("The lwe_size is greater than 1024, this is not supported\n");
|
||||
return;
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
|
||||
// This reduction requires a power of two num of threads
|
||||
int num_threads = 512, num_blocks = num_lwes;
|
||||
|
||||
improve_noise_modulus_switch<Torus><<<num_blocks, num_threads, 0, stream>>>(
|
||||
array_out, array_in, zeros, lwe_size, num_zeros, input_variance, r_sigma,
|
||||
bound, log_modulus);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ bool cuda_check_support_thread_block_clusters() {
|
||||
}
|
||||
|
||||
/// Copy memory to the GPU asynchronously
|
||||
void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
|
||||
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
if (size == 0)
|
||||
return;
|
||||
@@ -176,7 +176,7 @@ void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
}
|
||||
|
||||
/// Copy memory within a GPU
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
void cuda_memcpy_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
uint32_t gpu_index) {
|
||||
if (size == 0)
|
||||
return;
|
||||
@@ -303,9 +303,9 @@ void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Get the maximum size for the shared memory
|
||||
int cuda_get_max_shared_memory(uint32_t gpu_index) {
|
||||
int max_shared_memory = 0;
|
||||
/// Get the maximum size for the shared memory per streaming multiprocessors
|
||||
uint32_t cuda_get_max_shared_memory(uint32_t gpu_index) {
|
||||
auto max_shared_memory = 0;
|
||||
#if CUDA_ARCH == 900
|
||||
max_shared_memory = 226000;
|
||||
#elif CUDA_ARCH == 890
|
||||
@@ -317,9 +317,10 @@ int cuda_get_max_shared_memory(uint32_t gpu_index) {
|
||||
#elif CUDA_ARCH == 700
|
||||
max_shared_memory = 95000;
|
||||
#else
|
||||
cudaDeviceGetAttribute(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock,
|
||||
cudaDeviceGetAttribute(&max_shared_memory,
|
||||
cudaDevAttrMaxSharedMemoryPerMultiprocessor,
|
||||
gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
#endif
|
||||
return max_shared_memory;
|
||||
return (uint32_t)(max_shared_memory);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
410
backends/tfhe-cuda-backend/cuda/src/fft128/f128.cuh
Normal file
410
backends/tfhe-cuda-backend/cuda/src/fft128/f128.cuh
Normal file
@@ -0,0 +1,410 @@
|
||||
|
||||
#ifndef CUDA_FFT128_F128_CUH
|
||||
#define CUDA_FFT128_F128_CUH
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
struct alignas(16) f128 {
|
||||
double hi;
|
||||
double lo;
|
||||
|
||||
// Default and parameterized constructors
|
||||
__host__ __device__ f128() : hi(0.0), lo(0.0) {}
|
||||
__host__ __device__ f128(double high, double low) : hi(high), lo(low) {}
|
||||
|
||||
// Quick two-sum
|
||||
__host__ __device__ __forceinline__ static f128 quick_two_sum(double a,
|
||||
double b) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
double s = __dadd_rn(a, b);
|
||||
return f128(s, __dsub_rn(b, __dsub_rn(s, a)));
|
||||
#else
|
||||
double s = a + b;
|
||||
return f128(s, b - (s - a));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Two-sum
|
||||
__host__ __device__ __forceinline__ static f128 two_sum(double a, double b) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
double s = __dadd_rn(a, b);
|
||||
double bb = __dsub_rn(s, a);
|
||||
return f128(s, __dadd_rn(__dsub_rn(a, __dsub_rn(s, bb)), __dsub_rn(b, bb)));
|
||||
#else
|
||||
double s = a + b;
|
||||
double bb = s - a;
|
||||
return f128(s, (a - (s - bb)) + (b - bb));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Two-product
|
||||
__host__ __device__ __forceinline__ static f128 two_prod(double a, double b) {
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
double p = __dmul_rn(a, b);
|
||||
double p2 = __fma_rn(a, b, -p);
|
||||
#else
|
||||
double p = a * b;
|
||||
double p2 = fma(a, b, -p);
|
||||
#endif
|
||||
return f128(p, p2);
|
||||
}
|
||||
|
||||
__host__ __device__ __forceinline__ static f128 two_diff(double a, double b) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
double s = __dsub_rn(a, b);
|
||||
double bb = __dsub_rn(s, a);
|
||||
return f128(s, __dsub_rn(__dsub_rn(a, __dsub_rn(s, bb)), __dadd_rn(b, bb)));
|
||||
#else
|
||||
double s = a - b;
|
||||
double bb = s - a;
|
||||
return f128(s, (a - (s - bb)) - (b + bb));
|
||||
#endif
|
||||
}
|
||||
|
||||
// Addition
|
||||
__host__ __device__ static f128 add(const f128 &a, const f128 &b) {
|
||||
auto s = two_sum(a.hi, b.hi);
|
||||
auto t = two_sum(a.lo, b.lo);
|
||||
|
||||
double hi = s.hi;
|
||||
double lo = s.lo + t.hi;
|
||||
hi = hi + lo;
|
||||
lo = lo - (hi - s.hi);
|
||||
|
||||
return f128(hi, lo + t.lo);
|
||||
}
|
||||
|
||||
// Addition with estimate
|
||||
__host__ __device__ static f128 add_estimate(const f128 &a, const f128 &b) {
|
||||
auto se = two_sum(a.hi, b.hi);
|
||||
#ifdef __CUDA_ARCH__
|
||||
se.lo = __dadd_rn(se.lo, __dadd_rn(a.lo, b.lo));
|
||||
#else
|
||||
se.lo += (a.lo + b.lo);
|
||||
#endif
|
||||
|
||||
return quick_two_sum(se.hi, se.lo);
|
||||
}
|
||||
|
||||
// Subtraction with estimate
|
||||
__host__ __device__ static f128 sub_estimate(const f128 &a, const f128 &b) {
|
||||
f128 se = two_diff(a.hi, b.hi);
|
||||
#ifdef __CUDA_ARCH__
|
||||
se.lo = __dadd_rn(se.lo, a.lo);
|
||||
se.lo = __dsub_rn(se.lo, b.lo);
|
||||
#else
|
||||
se.lo += a.lo;
|
||||
se.lo -= b.lo;
|
||||
#endif
|
||||
return quick_two_sum(se.hi, se.lo);
|
||||
}
|
||||
|
||||
// Subtraction
|
||||
__host__ __device__ static f128 sub(const f128 &a, const f128 &b) {
|
||||
auto s = two_diff(a.hi, b.hi);
|
||||
auto t = two_diff(a.lo, b.lo);
|
||||
s = quick_two_sum(s.hi, s.lo + t.hi);
|
||||
return quick_two_sum(s.hi, s.lo + t.lo);
|
||||
}
|
||||
|
||||
// Multiplication
|
||||
__host__ __device__ static f128 mul(const f128 &a, const f128 &b) {
|
||||
auto p = two_prod(a.hi, b.hi);
|
||||
#ifdef __CUDA_ARCH__
|
||||
double a_0_x_b_1 = __dmul_rn(a.hi, b.lo);
|
||||
double a_1_x_b_0 = __dmul_rn(a.lo, b.hi);
|
||||
p.lo = __dadd_rn(p.lo, __dadd_rn(a_0_x_b_1, a_1_x_b_0));
|
||||
#else
|
||||
p.lo += (a.hi * b.lo + a.lo * b.hi);
|
||||
#endif
|
||||
p = quick_two_sum(p.hi, p.lo);
|
||||
return p;
|
||||
}
|
||||
|
||||
__host__ __device__ static f128 add_f64_f64(const double a, const double b) {
|
||||
return two_sum(a, b);
|
||||
}
|
||||
|
||||
__host__ __device__ static f128 f128_floor(const f128 &x) {
|
||||
double x0_floor = floor(x.hi);
|
||||
if (x0_floor == x.hi) {
|
||||
return add_f64_f64(x0_floor, floor(x.lo));
|
||||
}
|
||||
|
||||
return f128(x0_floor, 0.0);
|
||||
}
|
||||
|
||||
__host__ __device__ static void
|
||||
cplx_f128_mul_assign(f128 &c_re, f128 &c_im, const f128 &a_re,
|
||||
const f128 &a_im, const f128 &b_re, const f128 &b_im) {
|
||||
auto a_re_x_b_re = mul(a_re, b_re);
|
||||
auto a_re_x_b_im = mul(a_re, b_im);
|
||||
auto a_im_x_b_re = mul(a_im, b_re);
|
||||
auto a_im_x_b_im = mul(a_im, b_im);
|
||||
|
||||
c_re = sub_estimate(a_re_x_b_re, a_im_x_b_im);
|
||||
c_im = add_estimate(a_im_x_b_re, a_re_x_b_im);
|
||||
}
|
||||
|
||||
__host__ __device__ static void
|
||||
cplx_f128_sub_assign(f128 &c_re, f128 &c_im, const f128 &a_re,
|
||||
const f128 &a_im, const f128 &b_re, const f128 &b_im) {
|
||||
c_re = sub_estimate(a_re, b_re);
|
||||
c_im = sub_estimate(a_im, b_im);
|
||||
}
|
||||
__host__ __device__ static void
|
||||
cplx_f128_add_assign(f128 &c_re, f128 &c_im, const f128 &a_re,
|
||||
const f128 &a_im, const f128 &b_re, const f128 &b_im) {
|
||||
c_re = add_estimate(a_re, b_re);
|
||||
c_im = add_estimate(a_im, b_im);
|
||||
}
|
||||
};
|
||||
|
||||
struct f128x2 {
|
||||
f128 re;
|
||||
f128 im;
|
||||
|
||||
__host__ __device__ f128x2() : re(), im() {}
|
||||
|
||||
__host__ __device__ f128x2(const f128 &real, const f128 &imag)
|
||||
: re(real), im(imag) {}
|
||||
|
||||
__host__ __device__ f128x2(double real, double imag)
|
||||
: re(real, 0.0), im(imag, 0.0) {}
|
||||
|
||||
__host__ __device__ explicit f128x2(double real)
|
||||
: re(real, 0.0), im(0.0, 0.0) {}
|
||||
|
||||
__host__ __device__ f128x2(const f128x2 &other)
|
||||
: re(other.re), im(other.im) {}
|
||||
|
||||
__host__ __device__ f128x2(f128x2 &&other) noexcept
|
||||
: re(std::move(other.re)), im(std::move(other.im)) {}
|
||||
|
||||
__host__ __device__ f128x2 &operator=(const f128x2 &other) {
|
||||
if (this != &other) {
|
||||
re = other.re;
|
||||
im = other.im;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ f128x2 &operator=(f128x2 &&other) noexcept {
|
||||
if (this != &other) {
|
||||
re = std::move(other.re);
|
||||
im = std::move(other.im);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
__host__ __device__ f128x2 conjugate() const {
|
||||
return f128x2(re, f128(-im.hi, -im.lo));
|
||||
}
|
||||
|
||||
__host__ __device__ f128 norm_squared() const {
|
||||
return f128::add(f128::mul(re, re), f128::mul(im, im));
|
||||
}
|
||||
|
||||
__host__ __device__ void zero() {
|
||||
re = f128(0.0, 0.0);
|
||||
im = f128(0.0, 0.0);
|
||||
}
|
||||
|
||||
// Addition
|
||||
__host__ __device__ friend f128x2 operator+(const f128x2 &a,
|
||||
const f128x2 &b) {
|
||||
return f128x2(f128::add(a.re, b.re), f128::add(a.im, b.im));
|
||||
}
|
||||
|
||||
// Subtraction
|
||||
__host__ __device__ friend f128x2 operator-(const f128x2 &a,
|
||||
const f128x2 &b) {
|
||||
return f128x2(f128::add(a.re, f128(-b.re.hi, -b.re.lo)),
|
||||
f128::add(a.im, f128(-b.im.hi, -b.im.lo)));
|
||||
}
|
||||
|
||||
// Multiplication (complex multiplication)
|
||||
__host__ __device__ friend f128x2 operator*(const f128x2 &a,
|
||||
const f128x2 &b) {
|
||||
f128 real_part =
|
||||
f128::add(f128::mul(a.re, b.re),
|
||||
f128(-f128::mul(a.im, b.im).hi, -f128::mul(a.im, b.im).lo));
|
||||
f128 imag_part = f128::add(f128::mul(a.re, b.im), f128::mul(a.im, b.re));
|
||||
return f128x2(real_part, imag_part);
|
||||
}
|
||||
|
||||
// Addition-assignment operator
|
||||
__host__ __device__ f128x2 &operator+=(const f128x2 &other) {
|
||||
re = f128::add(re, other.re);
|
||||
im = f128::add(im, other.im);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Subtraction-assignment operator
|
||||
__host__ __device__ f128x2 &operator-=(const f128x2 &other) {
|
||||
re = f128::add(re, f128(-other.re.hi, -other.re.lo));
|
||||
im = f128::add(im, f128(-other.im.hi, -other.im.lo));
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Multiplication-assignment operator
|
||||
__host__ __device__ f128x2 &operator*=(const f128x2 &other) {
|
||||
f128 new_re =
|
||||
f128::add(f128::mul(re, other.re), f128(-f128::mul(im, other.im).hi,
|
||||
-f128::mul(im, other.im).lo));
|
||||
f128 new_im = f128::add(f128::mul(re, other.im), f128::mul(im, other.re));
|
||||
re = new_re;
|
||||
im = new_im;
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
__host__ __device__ inline uint64_t double_to_bits(double d) {
|
||||
uint64_t bits = *reinterpret_cast<uint64_t *>(&d);
|
||||
return bits;
|
||||
}
|
||||
|
||||
__host__ __device__ inline double bits_to_double(uint64_t bits) {
|
||||
double d = *reinterpret_cast<double *>(&bits);
|
||||
return d;
|
||||
}
|
||||
|
||||
__host__ __device__ inline double u128_to_f64(__uint128_t x) {
|
||||
const __uint128_t ONE = 1;
|
||||
const double A = ONE << 52;
|
||||
const double B = ONE << 104;
|
||||
const double C = ONE << 76;
|
||||
const double D = 340282366920938500000000000000000000000.;
|
||||
|
||||
const __uint128_t threshold = (ONE << 104);
|
||||
|
||||
if (x < threshold) {
|
||||
uint64_t A_bits = double_to_bits(A);
|
||||
|
||||
__uint128_t shifted = (x << 12);
|
||||
uint64_t lower64 = static_cast<uint64_t>(shifted);
|
||||
lower64 >>= 12;
|
||||
|
||||
uint64_t bits_l = A_bits | lower64;
|
||||
double l_temp = bits_to_double(bits_l);
|
||||
double l = l_temp - A;
|
||||
|
||||
uint64_t B_bits = double_to_bits(B);
|
||||
uint64_t top64 = static_cast<uint64_t>(x >> 52);
|
||||
uint64_t bits_h = B_bits | top64;
|
||||
double h_temp = bits_to_double(bits_h);
|
||||
double h = h_temp - B;
|
||||
|
||||
return (l + h);
|
||||
|
||||
} else {
|
||||
uint64_t C_bits = double_to_bits(C);
|
||||
|
||||
__uint128_t shifted = (x >> 12);
|
||||
uint64_t lower64 = static_cast<uint64_t>(shifted);
|
||||
lower64 >>= 12;
|
||||
|
||||
uint64_t x_lo = static_cast<uint64_t>(x);
|
||||
uint64_t mask_part = (x_lo & 0xFFFFFFULL);
|
||||
|
||||
uint64_t bits_l = C_bits | lower64 | mask_part;
|
||||
double l_temp = bits_to_double(bits_l);
|
||||
double l = l_temp - C;
|
||||
|
||||
uint64_t D_bits = double_to_bits(D);
|
||||
uint64_t top64 = static_cast<uint64_t>(x >> 76);
|
||||
uint64_t bits_h = D_bits | top64;
|
||||
double h_temp = bits_to_double(bits_h);
|
||||
double h = h_temp - D;
|
||||
|
||||
return (l + h);
|
||||
}
|
||||
}
|
||||
|
||||
__host__ __device__ inline __uint128_t f64_to_u128(const double f) {
|
||||
const __uint128_t ONE = 1;
|
||||
const uint64_t f_bits = double_to_bits(f);
|
||||
if (f_bits < 1023ull << 52) {
|
||||
return 0;
|
||||
} else {
|
||||
const __uint128_t m = ONE << 127 | (__uint128_t)f_bits << 75;
|
||||
const uint64_t s = 1150 - (f_bits >> 52);
|
||||
if (s >= 128) {
|
||||
return 0;
|
||||
} else {
|
||||
return m >> s;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
__host__ __device__ inline __uint128_t f64_to_i128(const double f) {
|
||||
// Get raw bits of the double
|
||||
const uint64_t f_bits = double_to_bits(f);
|
||||
|
||||
// Remove sign bit (equivalent to Rust's !0 >> 1 mask)
|
||||
const uint64_t a = f_bits & 0x7FFFFFFFFFFFFFFFull;
|
||||
|
||||
// Check if value is in [0, 1) range
|
||||
if (a < (1023ull << 52)) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Reconstruct mantissa with implicit leading 1
|
||||
const __uint128_t m =
|
||||
(__uint128_t{1} << 127) | (static_cast<__uint128_t>(a) << 75);
|
||||
|
||||
// Calculate shift amount based on exponent
|
||||
const uint64_t exponent = a >> 52;
|
||||
const uint64_t s = 1150 - exponent;
|
||||
|
||||
// Perform unsigned right shift
|
||||
const __uint128_t u = m >> s;
|
||||
|
||||
// Apply sign (check original sign bit)
|
||||
const __int128_t result = static_cast<__int128_t>(u);
|
||||
return (f_bits >> 63) ? -result : result;
|
||||
}
|
||||
|
||||
__host__ __device__ inline double i128_to_f64(__int128_t const x) {
|
||||
uint64_t sign = static_cast<uint64_t>(x >> 64) & (1ULL << 63);
|
||||
__uint128_t abs =
|
||||
(x < 0) ? static_cast<__uint128_t>(-x) : static_cast<__uint128_t>(x);
|
||||
|
||||
return bits_to_double(double_to_bits(u128_to_f64(abs)) | sign);
|
||||
}
|
||||
__host__ __device__ inline f128 u128_to_signed_to_f128(__uint128_t x) {
|
||||
const double first_approx = i128_to_f64(x);
|
||||
const uint64_t sign_bit = double_to_bits(first_approx) & (1ull << 63);
|
||||
const __uint128_t first_approx_roundtrip =
|
||||
f64_to_u128((first_approx < 0) ? -first_approx : first_approx);
|
||||
const __uint128_t first_approx_roundtrip_signed =
|
||||
(sign_bit == (1ull << 63)) ? -first_approx_roundtrip
|
||||
: first_approx_roundtrip;
|
||||
|
||||
double correction = i128_to_f64(x - first_approx_roundtrip_signed);
|
||||
|
||||
return f128(first_approx, correction);
|
||||
}
|
||||
|
||||
__host__ __device__ inline __uint128_t u128_from_torus_f128(const f128 &a) {
|
||||
auto x = f128::sub_estimate(a, f128::f128_floor(a));
|
||||
const double normalization = 340282366920938500000000000000000000000.;
|
||||
#ifdef __CUDA_ARCH__
|
||||
x.hi = __dmul_rn(x.hi, normalization);
|
||||
x.lo = __dmul_rn(x.lo, normalization);
|
||||
#else
|
||||
x.hi *= normalization;
|
||||
x.lo *= normalization;
|
||||
#endif
|
||||
|
||||
// TODO has to be round
|
||||
x = f128::f128_floor(x);
|
||||
|
||||
__uint128_t x0 = f64_to_u128(x.hi);
|
||||
__int128_t x1 = f64_to_i128(x.lo);
|
||||
|
||||
return x0 + x1;
|
||||
}
|
||||
|
||||
#endif
|
||||
163
backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cu
Normal file
163
backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cu
Normal file
@@ -0,0 +1,163 @@
|
||||
#include "fft128.cuh"
|
||||
|
||||
void cuda_fourier_transform_forward_as_integer_f128_async(
|
||||
void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
|
||||
void *im1, void const *standard, const uint32_t N,
|
||||
const uint32_t number_of_samples) {
|
||||
switch (N) {
|
||||
case 64:
|
||||
host_fourier_transform_forward_as_integer_f128<AmortizedDegree<64>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 128:
|
||||
host_fourier_transform_forward_as_integer_f128<AmortizedDegree<128>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 256:
|
||||
host_fourier_transform_forward_as_integer_f128<AmortizedDegree<256>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 512:
|
||||
host_fourier_transform_forward_as_integer_f128<AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 1024:
|
||||
host_fourier_transform_forward_as_integer_f128<AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 2048:
|
||||
host_fourier_transform_forward_as_integer_f128<AmortizedDegree<2048>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 4096:
|
||||
host_fourier_transform_forward_as_integer_f128<AmortizedDegree<4096>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (f128 fft): unsupported polynomial size. Supported "
|
||||
"N's are powers of two"
|
||||
" in the interval [64..4096].")
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_fourier_transform_forward_as_torus_f128_async(
|
||||
void *stream, uint32_t gpu_index, void *re0, void *re1, void *im0,
|
||||
void *im1, void const *standard, const uint32_t N,
|
||||
const uint32_t number_of_samples) {
|
||||
switch (N) {
|
||||
case 64:
|
||||
host_fourier_transform_forward_as_torus_f128<AmortizedDegree<64>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 128:
|
||||
host_fourier_transform_forward_as_torus_f128<AmortizedDegree<128>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 256:
|
||||
host_fourier_transform_forward_as_torus_f128<AmortizedDegree<256>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 512:
|
||||
host_fourier_transform_forward_as_torus_f128<AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 1024:
|
||||
host_fourier_transform_forward_as_torus_f128<AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 2048:
|
||||
host_fourier_transform_forward_as_torus_f128<AmortizedDegree<2048>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
case 4096:
|
||||
host_fourier_transform_forward_as_torus_f128<AmortizedDegree<4096>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (double *)re0,
|
||||
(double *)re1, (double *)im0, (double *)im1,
|
||||
(__uint128_t const *)standard, N, number_of_samples);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (f128 fft): unsupported polynomial size. Supported "
|
||||
"N's are powers of two"
|
||||
" in the interval [64..4096].")
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_fourier_transform_backward_as_torus_f128_async(
|
||||
void *stream, uint32_t gpu_index, void *standard, void const *re0,
|
||||
void const *re1, void const *im0, void const *im1, const uint32_t N,
|
||||
const uint32_t number_of_samples) {
|
||||
switch (N) {
|
||||
case 64:
|
||||
host_fourier_transform_backward_as_torus_f128<AmortizedDegree<64>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
|
||||
(double const *)re0, (double const *)re1, (double const *)im0,
|
||||
(double const *)im1, N, number_of_samples);
|
||||
break;
|
||||
case 128:
|
||||
host_fourier_transform_backward_as_torus_f128<AmortizedDegree<128>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
|
||||
(double const *)re0, (double const *)re1, (double const *)im0,
|
||||
(double const *)im1, N, number_of_samples);
|
||||
break;
|
||||
case 256:
|
||||
host_fourier_transform_backward_as_torus_f128<AmortizedDegree<256>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
|
||||
(double const *)re0, (double const *)re1, (double const *)im0,
|
||||
(double const *)im1, N, number_of_samples);
|
||||
break;
|
||||
case 512:
|
||||
host_fourier_transform_backward_as_torus_f128<AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
|
||||
(double const *)re0, (double const *)re1, (double const *)im0,
|
||||
(double const *)im1, N, number_of_samples);
|
||||
break;
|
||||
case 1024:
|
||||
host_fourier_transform_backward_as_torus_f128<AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
|
||||
(double const *)re0, (double const *)re1, (double const *)im0,
|
||||
(double const *)im1, N, number_of_samples);
|
||||
break;
|
||||
case 2048:
|
||||
host_fourier_transform_backward_as_torus_f128<AmortizedDegree<2048>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
|
||||
(double const *)re0, (double const *)re1, (double const *)im0,
|
||||
(double const *)im1, N, number_of_samples);
|
||||
break;
|
||||
case 4096:
|
||||
host_fourier_transform_backward_as_torus_f128<AmortizedDegree<4096>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (__uint128_t *)standard,
|
||||
(double const *)re0, (double const *)re1, (double const *)im0,
|
||||
(double const *)im1, N, number_of_samples);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (f128 ifft): unsupported polynomial size. Supported "
|
||||
"N's are powers of two"
|
||||
" in the interval [64..4096].")
|
||||
}
|
||||
}
|
||||
669
backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cuh
Normal file
669
backends/tfhe-cuda-backend/cuda/src/fft128/fft128.cuh
Normal file
@@ -0,0 +1,669 @@
|
||||
#ifndef CUDA_FFT128_CUH
|
||||
#define CUDA_FFT128_CUH
|
||||
|
||||
#include "f128.cuh"
|
||||
#include "fft/fft128.h"
|
||||
#include "polynomial/functions.cuh"
|
||||
#include "polynomial/parameters.cuh"
|
||||
#include "twiddles.cuh"
|
||||
#include "types/complex/operations.cuh"
|
||||
#include <iostream>
|
||||
|
||||
using Index = unsigned;
|
||||
|
||||
#define NEG_TWID(i) \
|
||||
f128x2(f128(neg_twiddles_re_hi[(i)], neg_twiddles_re_lo[(i)]), \
|
||||
f128(neg_twiddles_im_hi[(i)], neg_twiddles_im_lo[(i)]))
|
||||
|
||||
#define F64x4_TO_F128x2(f128x2_reg, ind) \
|
||||
f128x2_reg.re.hi = dt_re_hi[ind]; \
|
||||
f128x2_reg.re.lo = dt_re_lo[ind]; \
|
||||
f128x2_reg.im.hi = dt_im_hi[ind]; \
|
||||
f128x2_reg.im.lo = dt_im_lo[ind]
|
||||
|
||||
#define F128x2_TO_F64x4(f128x2_reg, ind) \
|
||||
dt_re_hi[ind] = f128x2_reg.re.hi; \
|
||||
dt_re_lo[ind] = f128x2_reg.re.lo; \
|
||||
dt_im_hi[ind] = f128x2_reg.im.hi; \
|
||||
dt_im_lo[ind] = f128x2_reg.im.lo
|
||||
|
||||
template <class params>
|
||||
__device__ void negacyclic_forward_fft_f128(double *dt_re_hi, double *dt_re_lo,
|
||||
double *dt_im_hi,
|
||||
double *dt_im_lo) {
|
||||
|
||||
__syncthreads();
|
||||
constexpr Index BUTTERFLY_DEPTH = params::opt >> 1;
|
||||
constexpr Index LOG2_DEGREE = params::log2_degree;
|
||||
constexpr Index HALF_DEGREE = params::degree >> 1;
|
||||
constexpr Index STRIDE = params::degree / params::opt;
|
||||
|
||||
f128x2 u[BUTTERFLY_DEPTH], v[BUTTERFLY_DEPTH], w;
|
||||
|
||||
Index tid = threadIdx.x;
|
||||
|
||||
// load into registers
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
|
||||
F64x4_TO_F128x2(u[i], tid);
|
||||
F64x4_TO_F128x2(v[i], tid + HALF_DEGREE);
|
||||
tid += STRIDE;
|
||||
}
|
||||
|
||||
// level 1
|
||||
// we don't make actual complex multiplication on level1 since we have only
|
||||
// one twiddle, it's real and image parts are equal, so we can multiply
|
||||
// it with simpler operations
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
|
||||
auto ww = NEG_TWID(1);
|
||||
f128::cplx_f128_mul_assign(w.re, w.im, v[i].re, v[i].im, NEG_TWID(1).re,
|
||||
NEG_TWID(1).im);
|
||||
f128::cplx_f128_sub_assign(v[i].re, v[i].im, u[i].re, u[i].im, w.re, w.im);
|
||||
f128::cplx_f128_add_assign(u[i].re, u[i].im, u[i].re, u[i].im, w.re, w.im);
|
||||
}
|
||||
|
||||
Index twiddle_shift = 1;
|
||||
for (Index l = LOG2_DEGREE - 1; l >= 1; --l) {
|
||||
Index lane_mask = 1 << (l - 1);
|
||||
Index thread_mask = (1 << l) - 1;
|
||||
twiddle_shift <<= 1;
|
||||
|
||||
tid = threadIdx.x;
|
||||
__syncthreads();
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
|
||||
Index rank = tid & thread_mask;
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F128x2_TO_F64x4(((u_stays_in_register) ? v[i] : u[i]), tid);
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
|
||||
Index rank = tid & thread_mask;
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F64x4_TO_F128x2(w, tid ^ lane_mask);
|
||||
u[i] = (u_stays_in_register) ? u[i] : w;
|
||||
v[i] = (u_stays_in_register) ? w : v[i];
|
||||
w = NEG_TWID(tid / lane_mask + twiddle_shift);
|
||||
f128::cplx_f128_mul_assign(w.re, w.im, v[i].re, v[i].im, w.re, w.im);
|
||||
f128::cplx_f128_sub_assign(v[i].re, v[i].im, u[i].re, u[i].im, w.re,
|
||||
w.im);
|
||||
f128::cplx_f128_add_assign(u[i].re, u[i].im, u[i].re, u[i].im, w.re,
|
||||
w.im);
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// store registers in SM
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
|
||||
F128x2_TO_F64x4(u[i], tid * 2);
|
||||
F128x2_TO_F64x4(v[i], (tid * 2 + 1));
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
template <class params>
|
||||
__device__ void negacyclic_backward_fft_f128(double *dt_re_hi, double *dt_re_lo,
|
||||
double *dt_im_hi,
|
||||
double *dt_im_lo) {
|
||||
__syncthreads();
|
||||
constexpr Index BUTTERFLY_DEPTH = params::opt >> 1;
|
||||
constexpr Index LOG2_DEGREE = params::log2_degree;
|
||||
constexpr Index DEGREE = params::degree;
|
||||
constexpr Index HALF_DEGREE = params::degree >> 1;
|
||||
constexpr Index STRIDE = params::degree / params::opt;
|
||||
|
||||
size_t tid = threadIdx.x;
|
||||
f128x2 u[BUTTERFLY_DEPTH], v[BUTTERFLY_DEPTH], w;
|
||||
|
||||
// load into registers and divide by compressed polynomial size
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
|
||||
F64x4_TO_F128x2(u[i], 2 * tid);
|
||||
F64x4_TO_F128x2(v[i], 2 * tid + 1);
|
||||
tid += STRIDE;
|
||||
}
|
||||
|
||||
Index twiddle_shift = DEGREE;
|
||||
for (Index l = 1; l <= LOG2_DEGREE - 1; ++l) {
|
||||
Index lane_mask = 1 << (l - 1);
|
||||
Index thread_mask = (1 << l) - 1;
|
||||
tid = threadIdx.x;
|
||||
twiddle_shift >>= 1;
|
||||
|
||||
// at this point registers are ready for the butterfly
|
||||
tid = threadIdx.x;
|
||||
__syncthreads();
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
|
||||
w = (u[i] - v[i]);
|
||||
u[i] += v[i];
|
||||
v[i] = w * NEG_TWID(tid / lane_mask + twiddle_shift).conjugate();
|
||||
|
||||
// keep one of the register for next iteration and store another one in sm
|
||||
Index rank = tid & thread_mask;
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F128x2_TO_F64x4(((u_stays_in_register) ? v[i] : u[i]), tid);
|
||||
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// prepare registers for next butterfly iteration
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
|
||||
Index rank = tid & thread_mask;
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F64x4_TO_F128x2(w, tid ^ lane_mask);
|
||||
|
||||
u[i] = (u_stays_in_register) ? u[i] : w;
|
||||
v[i] = (u_stays_in_register) ? w : v[i];
|
||||
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
}
|
||||
|
||||
// last iteration
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
|
||||
w = (u[i] - v[i]);
|
||||
u[i] = u[i] + v[i];
|
||||
v[i] = w * NEG_TWID(1).conjugate();
|
||||
}
|
||||
__syncthreads();
|
||||
// store registers in SM
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
|
||||
F128x2_TO_F64x4(u[i], tid);
|
||||
F128x2_TO_F64x4(v[i], tid + HALF_DEGREE);
|
||||
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// params is expected to be full degree not half degree
|
||||
template <class params>
|
||||
__device__ void convert_u128_to_f128_as_integer(
|
||||
double *out_re_hi, double *out_re_lo, double *out_im_hi, double *out_im_lo,
|
||||
const __uint128_t *in_re, const __uint128_t *in_im) {
|
||||
|
||||
Index tid = threadIdx.x;
|
||||
// #pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; i++) {
|
||||
auto out_re = u128_to_signed_to_f128(in_re[tid]);
|
||||
auto out_im = u128_to_signed_to_f128(in_im[tid]);
|
||||
|
||||
out_re_hi[tid] = out_re.hi;
|
||||
out_re_lo[tid] = out_re.lo;
|
||||
out_im_hi[tid] = out_im.hi;
|
||||
out_im_lo[tid] = out_im.lo;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
}
|
||||
|
||||
// params is expected to be full degree not half degree
|
||||
template <class params>
|
||||
__device__ void convert_u128_to_f128_as_torus(
|
||||
double *out_re_hi, double *out_re_lo, double *out_im_hi, double *out_im_lo,
|
||||
const __uint128_t *in_re, const __uint128_t *in_im) {
|
||||
|
||||
const double normalization = pow(2., -128.);
|
||||
Index tid = threadIdx.x;
|
||||
// #pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; i++) {
|
||||
auto out_re = u128_to_signed_to_f128(in_re[tid]);
|
||||
auto out_im = u128_to_signed_to_f128(in_im[tid]);
|
||||
|
||||
out_re_hi[tid] = out_re.hi * normalization;
|
||||
out_re_lo[tid] = out_re.lo * normalization;
|
||||
out_im_hi[tid] = out_im.hi * normalization;
|
||||
out_im_lo[tid] = out_im.lo * normalization;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
}
|
||||
|
||||
template <class params>
|
||||
__device__ void
|
||||
convert_f128_to_u128_as_torus(__uint128_t *out_re, __uint128_t *out_im,
|
||||
const double *in_re_hi, const double *in_re_lo,
|
||||
const double *in_im_hi, const double *in_im_lo) {
|
||||
|
||||
const double normalization = 1. / (params::degree / 2);
|
||||
Index tid = threadIdx.x;
|
||||
// #pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; i++) {
|
||||
|
||||
f128 in_re(in_re_hi[tid] * normalization, in_re_lo[tid] * normalization);
|
||||
f128 in_im(in_im_hi[tid] * normalization, in_im_lo[tid] * normalization);
|
||||
|
||||
out_re[tid] = u128_from_torus_f128(in_re);
|
||||
out_im[tid] = u128_from_torus_f128(in_im);
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
}
|
||||
|
||||
// params is expected to be full degree not half degree
|
||||
template <class params>
|
||||
__global__ void
|
||||
batch_convert_u128_to_f128_as_integer(double *out_re_hi, double *out_re_lo,
|
||||
double *out_im_hi, double *out_im_lo,
|
||||
const __uint128_t *in) {
|
||||
|
||||
convert_u128_to_f128_as_integer<params>(
|
||||
&out_re_hi[blockIdx.x * params::degree / 2],
|
||||
&out_re_lo[blockIdx.x * params::degree / 2],
|
||||
&out_im_hi[blockIdx.x * params::degree / 2],
|
||||
&out_im_lo[blockIdx.x * params::degree / 2],
|
||||
&in[blockIdx.x * params::degree],
|
||||
&in[blockIdx.x * params::degree + params::degree / 2]);
|
||||
}
|
||||
|
||||
// params is expected to be full degree not half degree
|
||||
// converts standqard input into complex<128> represented by 4 double
|
||||
// with following pattern: [re_hi_0, re_hi_1, ... re_hi_n, re_lo_0, re_lo_1,
|
||||
// ... re_lo_n, im_hi_0, im_hi_1, ..., im_hi_n, im_lo_0, im_lo_1, ..., im_lo_n]
|
||||
template <class params>
|
||||
__global__ void
|
||||
batch_convert_u128_to_f128_as_torus(double *out_re_hi, double *out_re_lo,
|
||||
double *out_im_hi, double *out_im_lo,
|
||||
const __uint128_t *in) {
|
||||
|
||||
convert_u128_to_f128_as_torus<params>(
|
||||
&out_re_hi[blockIdx.x * params::degree / 2],
|
||||
&out_re_lo[blockIdx.x * params::degree / 2],
|
||||
&out_im_hi[blockIdx.x * params::degree / 2],
|
||||
&out_im_lo[blockIdx.x * params::degree / 2],
|
||||
&in[blockIdx.x * params::degree],
|
||||
&in[blockIdx.x * params::degree + params::degree / 2]);
|
||||
}
|
||||
|
||||
// params is expected to be full degree not half degree
|
||||
// converts standqard input into complex<128> represented by 4 double
|
||||
// with following pattern: [re_hi_0, re_lo_0, im_hi_0, im_lo_0, re_hi_1,
|
||||
// re_lo_1, im_hi_1, im_lo_1,
|
||||
// ...,re_hi_n, re_lo_n, im_hi_n, im_lo_n, ]
|
||||
template <class params>
|
||||
__global__ void
|
||||
batch_convert_u128_to_f128_strided_as_torus(double *d_out,
|
||||
const __uint128_t *d_in) {
|
||||
|
||||
constexpr size_t chunk_size = params::degree / 2 * 4;
|
||||
double *chunk = &d_out[blockIdx.x * chunk_size];
|
||||
double *out_re_hi = &chunk[0 * params::degree / 2];
|
||||
double *out_re_lo = &chunk[1 * params::degree / 2];
|
||||
double *out_im_hi = &chunk[2 * params::degree / 2];
|
||||
double *out_im_lo = &chunk[3 * params::degree / 2];
|
||||
|
||||
convert_u128_to_f128_as_torus<params>(
|
||||
out_re_hi, out_re_lo, out_im_hi, out_im_lo,
|
||||
&d_in[blockIdx.x * params::degree],
|
||||
&d_in[blockIdx.x * params::degree + params::degree / 2]);
|
||||
}
|
||||
|
||||
// params is expected to be full degree not half degree
|
||||
template <class params>
|
||||
__global__ void batch_convert_f128_to_u128_as_torus(__uint128_t *out,
|
||||
const double *in_re_hi,
|
||||
const double *in_re_lo,
|
||||
const double *in_im_hi,
|
||||
const double *in_im_lo) {
|
||||
|
||||
convert_f128_to_u128_as_torus<params>(
|
||||
&out[blockIdx.x * params::degree],
|
||||
&out[blockIdx.x * params::degree + params::degree / 2],
|
||||
&in_re_hi[blockIdx.x * params::degree / 2],
|
||||
&in_re_lo[blockIdx.x * params::degree / 2],
|
||||
&in_im_hi[blockIdx.x * params::degree / 2],
|
||||
&in_im_lo[blockIdx.x * params::degree / 2]);
|
||||
}
|
||||
|
||||
template <class params, sharedMemDegree SMD>
|
||||
__global__ void
|
||||
batch_NSMFFT_128(double *in_re_hi, double *in_re_lo, double *in_im_hi,
|
||||
double *in_im_lo, double *out_re_hi, double *out_re_lo,
|
||||
double *out_im_hi, double *out_im_lo, double *buffer) {
|
||||
extern __shared__ double sharedMemoryFFT128[];
|
||||
double *re_hi, *re_lo, *im_hi, *im_lo;
|
||||
|
||||
if (SMD == NOSM) {
|
||||
re_hi =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 0];
|
||||
re_lo =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 1];
|
||||
im_hi =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 2];
|
||||
im_lo =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 3];
|
||||
} else {
|
||||
re_hi = &sharedMemoryFFT128[params::degree / 2 * 0];
|
||||
re_lo = &sharedMemoryFFT128[params::degree / 2 * 1];
|
||||
im_hi = &sharedMemoryFFT128[params::degree / 2 * 2];
|
||||
im_lo = &sharedMemoryFFT128[params::degree / 2 * 3];
|
||||
}
|
||||
|
||||
Index tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; ++i) {
|
||||
re_hi[tid] = in_re_hi[blockIdx.x * (params::degree / 2) + tid];
|
||||
re_lo[tid] = in_re_lo[blockIdx.x * (params::degree / 2) + tid];
|
||||
im_hi[tid] = in_im_hi[blockIdx.x * (params::degree / 2) + tid];
|
||||
im_lo[tid] = in_im_lo[blockIdx.x * (params::degree / 2) + tid];
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
if constexpr (params::fft_direction == 1) {
|
||||
negacyclic_backward_fft_f128<HalfDegree<params>>(re_hi, re_lo, im_hi,
|
||||
im_lo);
|
||||
} else {
|
||||
negacyclic_forward_fft_f128<HalfDegree<params>>(re_hi, re_lo, im_hi, im_lo);
|
||||
}
|
||||
__syncthreads();
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; ++i) {
|
||||
out_re_hi[blockIdx.x * (params::degree / 2) + tid] = re_hi[tid];
|
||||
out_re_lo[blockIdx.x * (params::degree / 2) + tid] = re_lo[tid];
|
||||
out_im_hi[blockIdx.x * (params::degree / 2) + tid] = im_hi[tid];
|
||||
out_im_lo[blockIdx.x * (params::degree / 2) + tid] = im_lo[tid];
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
}
|
||||
|
||||
template <class params, sharedMemDegree SMD>
|
||||
__global__ void batch_NSMFFT_strided_128(double *d_in, double *d_out,
|
||||
double *buffer) {
|
||||
extern __shared__ double sharedMemoryFFT128[];
|
||||
double *re_hi, *re_lo, *im_hi, *im_lo;
|
||||
|
||||
if (SMD == NOSM) {
|
||||
re_hi =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 0];
|
||||
re_lo =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 1];
|
||||
im_hi =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 2];
|
||||
im_lo =
|
||||
&buffer[blockIdx.x * params::degree / 2 * 4 + params::degree / 2 * 3];
|
||||
} else {
|
||||
re_hi = &sharedMemoryFFT128[params::degree / 2 * 0];
|
||||
re_lo = &sharedMemoryFFT128[params::degree / 2 * 1];
|
||||
im_hi = &sharedMemoryFFT128[params::degree / 2 * 2];
|
||||
im_lo = &sharedMemoryFFT128[params::degree / 2 * 3];
|
||||
}
|
||||
|
||||
constexpr size_t chunk_size = params::degree / 2 * 4;
|
||||
double *chunk = &d_in[blockIdx.x * chunk_size];
|
||||
double *tmp_re_hi = &chunk[0 * params::degree / 2];
|
||||
double *tmp_re_lo = &chunk[1 * params::degree / 2];
|
||||
double *tmp_im_hi = &chunk[2 * params::degree / 2];
|
||||
double *tmp_im_lo = &chunk[3 * params::degree / 2];
|
||||
|
||||
Index tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; ++i) {
|
||||
re_hi[tid] = tmp_re_hi[tid];
|
||||
re_lo[tid] = tmp_re_lo[tid];
|
||||
im_hi[tid] = tmp_im_hi[tid];
|
||||
im_lo[tid] = tmp_im_lo[tid];
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
__syncthreads();
|
||||
if constexpr (params::fft_direction == 1) {
|
||||
negacyclic_backward_fft_f128<HalfDegree<params>>(re_hi, re_lo, im_hi,
|
||||
im_lo);
|
||||
} else {
|
||||
negacyclic_forward_fft_f128<HalfDegree<params>>(re_hi, re_lo, im_hi, im_lo);
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
chunk = &d_out[blockIdx.x * chunk_size];
|
||||
tmp_re_hi = &chunk[0 * params::degree / 2];
|
||||
tmp_re_lo = &chunk[1 * params::degree / 2];
|
||||
tmp_im_hi = &chunk[2 * params::degree / 2];
|
||||
tmp_im_lo = &chunk[3 * params::degree / 2];
|
||||
|
||||
tid = threadIdx.x;
|
||||
#pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; ++i) {
|
||||
tmp_re_hi[tid] = re_hi[tid];
|
||||
tmp_re_lo[tid] = re_lo[tid];
|
||||
tmp_im_hi[tid] = im_hi[tid];
|
||||
tmp_im_lo[tid] = im_lo[tid];
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
}
|
||||
|
||||
template <class params>
|
||||
__host__ void host_fourier_transform_forward_as_integer_f128(
|
||||
cudaStream_t stream, uint32_t gpu_index, double *re0, double *re1,
|
||||
double *im0, double *im1, const __uint128_t *standard, const uint32_t N,
|
||||
const uint32_t number_of_samples) {
|
||||
|
||||
// allocate device buffers
|
||||
double *d_re0 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_re1 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_im0 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_im1 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
__uint128_t *d_standard = (__uint128_t *)cuda_malloc_async(
|
||||
N * sizeof(__uint128_t), stream, gpu_index);
|
||||
|
||||
// copy input into device
|
||||
cuda_memcpy_async_to_gpu(d_standard, standard, N * sizeof(__uint128_t),
|
||||
stream, gpu_index);
|
||||
|
||||
// setup launch parameters
|
||||
size_t required_shared_memory_size = sizeof(double) * N / 2 * 4;
|
||||
int grid_size = number_of_samples;
|
||||
int block_size = params::degree / params::opt;
|
||||
bool full_sm =
|
||||
(required_shared_memory_size <= cuda_get_max_shared_memory(gpu_index));
|
||||
size_t buffer_size = full_sm ? 0 : (size_t)number_of_samples * N / 2 * 4;
|
||||
size_t shared_memory_size = full_sm ? required_shared_memory_size : 0;
|
||||
double *buffer = (double *)cuda_malloc_async(buffer_size, stream, gpu_index);
|
||||
|
||||
// configure shared memory for batch fft kernel
|
||||
if (full_sm) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
check_cuda_error(cudaFuncSetCacheConfig(
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
|
||||
cudaFuncCachePreferShared));
|
||||
}
|
||||
|
||||
// convert u128 into 4 x double
|
||||
batch_convert_u128_to_f128_as_integer<params>
|
||||
<<<grid_size, block_size, 0, stream>>>(d_re0, d_re1, d_im0, d_im1,
|
||||
d_standard);
|
||||
|
||||
// call negacyclic 128 bit forward fft.
|
||||
if (full_sm) {
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(
|
||||
d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
|
||||
} else {
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, NOSM>
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(
|
||||
d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
|
||||
}
|
||||
|
||||
cuda_memcpy_async_to_cpu(re0, d_re0, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_cpu(re1, d_re1, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_cpu(im0, d_im0, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_cpu(im1, d_im1, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
|
||||
cuda_drop_async(d_standard, stream, gpu_index);
|
||||
cuda_drop_async(d_re0, stream, gpu_index);
|
||||
cuda_drop_async(d_re1, stream, gpu_index);
|
||||
cuda_drop_async(d_im0, stream, gpu_index);
|
||||
cuda_drop_async(d_im1, stream, gpu_index);
|
||||
}
|
||||
|
||||
template <class params>
|
||||
__host__ void host_fourier_transform_forward_as_torus_f128(
|
||||
cudaStream_t stream, uint32_t gpu_index, double *re0, double *re1,
|
||||
double *im0, double *im1, const __uint128_t *standard, const uint32_t N,
|
||||
const uint32_t number_of_samples) {
|
||||
|
||||
// allocate device buffers
|
||||
double *d_re0 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_re1 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_im0 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_im1 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
__uint128_t *d_standard = (__uint128_t *)cuda_malloc_async(
|
||||
N * sizeof(__uint128_t), stream, gpu_index);
|
||||
|
||||
// copy input into device
|
||||
cuda_memcpy_async_to_gpu(d_standard, standard, N * sizeof(__uint128_t),
|
||||
stream, gpu_index);
|
||||
|
||||
// setup launch parameters
|
||||
size_t required_shared_memory_size = sizeof(double) * N / 2 * 4;
|
||||
int grid_size = number_of_samples;
|
||||
int block_size = params::degree / params::opt;
|
||||
bool full_sm =
|
||||
(required_shared_memory_size <= cuda_get_max_shared_memory(gpu_index));
|
||||
size_t buffer_size = full_sm ? 0 : (size_t)number_of_samples * N / 2 * 4;
|
||||
size_t shared_memory_size = full_sm ? required_shared_memory_size : 0;
|
||||
double *buffer = (double *)cuda_malloc_async(buffer_size, stream, gpu_index);
|
||||
|
||||
// configure shared memory for batch fft kernel
|
||||
if (full_sm) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
check_cuda_error(cudaFuncSetCacheConfig(
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>,
|
||||
cudaFuncCachePreferShared));
|
||||
}
|
||||
|
||||
// convert u128 into 4 x double
|
||||
batch_convert_u128_to_f128_as_torus<params>
|
||||
<<<grid_size, block_size, 0, stream>>>(d_re0, d_re1, d_im0, d_im1,
|
||||
d_standard);
|
||||
|
||||
// call negacyclic 128 bit forward fft.
|
||||
if (full_sm) {
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, FULLSM>
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(
|
||||
d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
|
||||
} else {
|
||||
batch_NSMFFT_128<FFTDegree<params, ForwardFFT>, NOSM>
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(
|
||||
d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
|
||||
}
|
||||
|
||||
cuda_memcpy_async_to_cpu(re0, d_re0, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_cpu(re1, d_re1, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_cpu(im0, d_im0, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_cpu(im1, d_im1, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
|
||||
cuda_drop_async(d_standard, stream, gpu_index);
|
||||
cuda_drop_async(d_re0, stream, gpu_index);
|
||||
cuda_drop_async(d_re1, stream, gpu_index);
|
||||
cuda_drop_async(d_im0, stream, gpu_index);
|
||||
cuda_drop_async(d_im1, stream, gpu_index);
|
||||
}
|
||||
|
||||
template <class params>
|
||||
__host__ void host_fourier_transform_backward_as_torus_f128(
|
||||
cudaStream_t stream, uint32_t gpu_index, __uint128_t *standard,
|
||||
double const *re0, double const *re1, double const *im0, double const *im1,
|
||||
const uint32_t N, const uint32_t number_of_samples) {
|
||||
|
||||
// allocate device buffers
|
||||
double *d_re0 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_re1 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_im0 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
double *d_im1 =
|
||||
(double *)cuda_malloc_async(N / 2 * sizeof(double), stream, gpu_index);
|
||||
__uint128_t *d_standard = (__uint128_t *)cuda_malloc_async(
|
||||
N * sizeof(__uint128_t), stream, gpu_index);
|
||||
|
||||
// // copy input into device
|
||||
cuda_memcpy_async_to_gpu(d_re0, re0, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_gpu(d_re1, re1, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_gpu(d_im0, im0, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
cuda_memcpy_async_to_gpu(d_im1, im1, N / 2 * sizeof(double), stream,
|
||||
gpu_index);
|
||||
|
||||
// setup launch parameters
|
||||
size_t required_shared_memory_size = sizeof(double) * N / 2 * 4;
|
||||
int grid_size = number_of_samples;
|
||||
int block_size = params::degree / params::opt;
|
||||
bool full_sm =
|
||||
(required_shared_memory_size <= cuda_get_max_shared_memory(gpu_index));
|
||||
size_t buffer_size = full_sm ? 0 : (size_t)number_of_samples * N / 2 * 4;
|
||||
size_t shared_memory_size = full_sm ? required_shared_memory_size : 0;
|
||||
double *buffer = (double *)cuda_malloc_async(buffer_size, stream, gpu_index);
|
||||
|
||||
// configure shared memory for batch fft kernel
|
||||
if (full_sm) {
|
||||
check_cuda_error(cudaFuncSetAttribute(
|
||||
batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, FULLSM>,
|
||||
cudaFuncAttributeMaxDynamicSharedMemorySize, shared_memory_size));
|
||||
check_cuda_error(cudaFuncSetCacheConfig(
|
||||
batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, FULLSM>,
|
||||
cudaFuncCachePreferShared));
|
||||
batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, FULLSM>
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(
|
||||
d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
|
||||
} else {
|
||||
batch_NSMFFT_128<FFTDegree<params, BackwardFFT>, NOSM>
|
||||
<<<grid_size, block_size, shared_memory_size, stream>>>(
|
||||
d_re0, d_re1, d_im0, d_im1, d_re0, d_re1, d_im0, d_im1, buffer);
|
||||
}
|
||||
|
||||
batch_convert_f128_to_u128_as_torus<params>
|
||||
<<<grid_size, block_size, 0, stream>>>(d_standard, d_re0, d_re1, d_im0,
|
||||
d_im1);
|
||||
|
||||
cuda_memcpy_async_to_cpu(standard, d_standard, N * sizeof(__uint128_t),
|
||||
stream, gpu_index);
|
||||
cuda_drop_async(d_standard, stream, gpu_index);
|
||||
cuda_drop_async(d_re0, stream, gpu_index);
|
||||
cuda_drop_async(d_re1, stream, gpu_index);
|
||||
cuda_drop_async(d_im0, stream, gpu_index);
|
||||
cuda_drop_async(d_im1, stream, gpu_index);
|
||||
}
|
||||
|
||||
#undef NEG_TWID
|
||||
#undef F64x4_TO_F128x2
|
||||
#undef F128x2_TO_F64x4
|
||||
|
||||
#endif // TFHE_RS_BACKENDS_TFHE_CUDA_BACKEND_CUDA_SRC_FFT128_FFT128_CUH_
|
||||
16387
backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu
Normal file
16387
backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cu
Normal file
File diff suppressed because it is too large
Load Diff
11
backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cuh
Normal file
11
backends/tfhe-cuda-backend/cuda/src/fft128/twiddles.cuh
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef CUDA_FFT128_TWIDDLES_CUH
|
||||
#define CUDA_FFT128_TWIDDLES_CUH
|
||||
|
||||
/*
|
||||
* 'negtwiddles' are stored in device memory to profit caching
|
||||
*/
|
||||
extern __device__ double neg_twiddles_re_hi[4096];
|
||||
extern __device__ double neg_twiddles_re_lo[4096];
|
||||
extern __device__ double neg_twiddles_im_hi[4096];
|
||||
extern __device__ double neg_twiddles_im_lo[4096];
|
||||
#endif
|
||||
@@ -7,12 +7,12 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory, bool allocate_ms_array) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
message_modulus, carry_modulus, allocate_ms_array);
|
||||
|
||||
scratch_cuda_integer_abs_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
@@ -23,13 +23,14 @@ void scratch_cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void cuda_integer_abs_inplace_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
CudaRadixCiphertextFFI *ct, int8_t *mem_ptr, bool is_signed,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
auto mem = (int_abs_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_abs_kb<uint64_t>((cudaStream_t *)(streams), gpu_indexes,
|
||||
gpu_count, ct, bsks, (uint64_t **)(ksks), mem,
|
||||
is_signed);
|
||||
gpu_count, ct, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, mem, is_signed);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
|
||||
@@ -29,49 +29,12 @@ __host__ void scratch_cuda_integer_abs_kb(
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_abs_kb_async(
|
||||
__host__ void host_integer_abs_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *ct, void *const *bsks, uint64_t *const *ksks,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed, uint32_t num_blocks) {
|
||||
if (!is_signed)
|
||||
return;
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
auto mask = (Torus *)(mem_ptr->mask->ptr);
|
||||
|
||||
auto big_lwe_dimension = radix_params.big_lwe_dimension;
|
||||
auto big_lwe_size = big_lwe_dimension + 1;
|
||||
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
uint32_t num_bits_in_ciphertext =
|
||||
(31 - __builtin_clz(radix_params.message_modulus)) * num_blocks;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(mask, ct, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
legacy_host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
|
||||
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, num_blocks);
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
radix_params.big_lwe_dimension, num_blocks);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
legacy_host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
|
||||
// legacy bitop
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, mask, ct, bsks, ksks, num_blocks,
|
||||
mem_ptr->bitxor_mem->lut, mem_ptr->bitxor_mem->params.message_modulus);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *ct,
|
||||
void *const *bsks, uint64_t *const *ksks,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *ct, void *const *bsks,
|
||||
uint64_t *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
int_abs_buffer<uint64_t> *mem_ptr, bool is_signed) {
|
||||
if (!is_signed)
|
||||
return;
|
||||
|
||||
@@ -85,18 +48,19 @@ host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
|
||||
host_integer_radix_arithmetic_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
|
||||
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks);
|
||||
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
ct->num_radix_blocks);
|
||||
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
|
||||
nullptr, nullptr, mem_ptr->scp_mem, bsks,
|
||||
ksks, requested_flag, uses_carry);
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, ms_noise_reduction_key, requested_flag, uses_carry);
|
||||
|
||||
host_integer_radix_bitop_kb<Torus>(streams, gpu_indexes, gpu_count, ct, mask,
|
||||
ct, mem_ptr->bitxor_mem, bsks, ksks);
|
||||
ct, mem_ptr->bitxor_mem, bsks, ksks,
|
||||
ms_noise_reduction_key);
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_ABS_CUH
|
||||
|
||||
@@ -7,12 +7,12 @@ void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
BITOP_TYPE op_type, bool allocate_gpu_memory) {
|
||||
BITOP_TYPE op_type, bool allocate_gpu_memory, bool allocate_ms_array) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
message_modulus, carry_modulus, allocate_ms_array);
|
||||
|
||||
scratch_cuda_integer_radix_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
@@ -25,12 +25,13 @@ void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
host_integer_radix_bitop_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, (int_bitop_buffer<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks));
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_bitop(void *const *streams,
|
||||
|
||||
@@ -17,8 +17,15 @@ __host__ void host_integer_radix_bitop_kb(
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int_bitop_buffer<Torus> *mem_ptr,
|
||||
void *const *bsks, Torus *const *ksks) {
|
||||
void *const *bsks, Torus *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_1->num_radix_blocks ||
|
||||
lwe_array_out->num_radix_blocks != lwe_array_2->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be equal")
|
||||
if (lwe_array_out->lwe_dimension != lwe_array_1->lwe_dimension ||
|
||||
lwe_array_out->lwe_dimension != lwe_array_2->lwe_dimension)
|
||||
PANIC("Cuda error: input and output lwe dimension must be equal")
|
||||
auto lut = mem_ptr->lut;
|
||||
uint64_t degrees[lwe_array_1->num_radix_blocks];
|
||||
if (mem_ptr->op == BITOP_TYPE::BITAND) {
|
||||
@@ -37,7 +44,7 @@ __host__ void host_integer_radix_bitop_kb(
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, lut, lwe_array_out->num_radix_blocks,
|
||||
bsks, ksks, ms_noise_reduction_key, lut, lwe_array_out->num_radix_blocks,
|
||||
lut->params.message_modulus);
|
||||
|
||||
memcpy(lwe_array_out->degrees, degrees,
|
||||
|
||||
@@ -7,12 +7,12 @@ void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory, bool allocate_ms_array) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
message_modulus, carry_modulus, allocate_ms_array);
|
||||
|
||||
std::function<uint64_t(uint64_t)> predicate_lut_f =
|
||||
[](uint64_t x) -> uint64_t { return x == 1; };
|
||||
@@ -29,12 +29,14 @@ void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks) {
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
host_integer_radix_cmux_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_condition, lwe_array_true, lwe_array_false,
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks));
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(void *const *streams,
|
||||
|
||||
@@ -5,13 +5,24 @@
|
||||
#include "radix_ciphertext.cuh"
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void zero_out_if(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lwe_array_out, Torus const *lwe_array_input,
|
||||
Torus const *lwe_condition,
|
||||
int_zero_out_if_buffer<Torus> *mem_ptr,
|
||||
int_radix_lut<Torus> *predicate, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
__host__ void
|
||||
zero_out_if(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_input,
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
int_zero_out_if_buffer<Torus> *mem_ptr,
|
||||
int_radix_lut<Torus> *predicate, void *const *bsks,
|
||||
Torus *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t num_radix_blocks) {
|
||||
if (lwe_array_out->num_radix_blocks < num_radix_blocks ||
|
||||
lwe_array_input->num_radix_blocks < num_radix_blocks)
|
||||
PANIC("Cuda error: input or output radix ciphertexts does not have enough "
|
||||
"blocks")
|
||||
if (lwe_array_out->lwe_dimension != lwe_array_input->lwe_dimension ||
|
||||
lwe_array_input->lwe_dimension != lwe_condition->lwe_dimension)
|
||||
PANIC("Cuda error: input and output radix ciphertexts must have the same "
|
||||
"lwe dimension")
|
||||
cuda_set_device(gpu_indexes[0]);
|
||||
auto params = mem_ptr->params;
|
||||
|
||||
@@ -21,56 +32,11 @@ __host__ void zero_out_if(cudaStream_t const *streams,
|
||||
host_pack_bivariate_blocks_with_single_block<Torus>(
|
||||
streams, gpu_indexes, gpu_count, tmp_lwe_array_input,
|
||||
predicate->lwe_indexes_in, lwe_array_input, lwe_condition,
|
||||
predicate->lwe_indexes_in, params.big_lwe_dimension,
|
||||
params.message_modulus, num_radix_blocks);
|
||||
predicate->lwe_indexes_in, params.message_modulus, num_radix_blocks);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
|
||||
ksks, num_radix_blocks, predicate);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void legacy_host_integer_radix_cmux_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_condition,
|
||||
Torus const *lwe_array_true, Torus const *lwe_array_false,
|
||||
int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
auto params = mem_ptr->params;
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
Torus radix_lwe_size = lwe_size * num_radix_blocks;
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in->ptr, lwe_array_true,
|
||||
radix_lwe_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
(Torus *)(mem_ptr->buffer_in->ptr) + radix_lwe_size, lwe_array_false,
|
||||
radix_lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
(Torus *)(mem_ptr->condition_array->ptr) + i * lwe_size, lwe_condition,
|
||||
lwe_size * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
}
|
||||
legacy_integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, (Torus *)(mem_ptr->buffer_out->ptr),
|
||||
(Torus *)(mem_ptr->buffer_in->ptr),
|
||||
(Torus *)(mem_ptr->condition_array->ptr), bsks, ksks,
|
||||
2 * num_radix_blocks, mem_ptr->predicate_lut, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
auto mem_true = (Torus *)(mem_ptr->buffer_out->ptr);
|
||||
auto ptr = (Torus *)mem_ptr->buffer_out->ptr;
|
||||
auto mem_false = &ptr[radix_lwe_size];
|
||||
auto added_cts = mem_true;
|
||||
legacy_host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
|
||||
mem_false, params.big_lwe_dimension,
|
||||
num_radix_blocks);
|
||||
|
||||
legacy_integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
num_radix_blocks, mem_ptr->message_extract_lut);
|
||||
ksks, ms_noise_reduction_key, predicate, num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -80,7 +46,8 @@ __host__ void host_integer_radix_cmux_kb(
|
||||
CudaRadixCiphertextFFI const *lwe_condition,
|
||||
CudaRadixCiphertextFFI const *lwe_array_true,
|
||||
CudaRadixCiphertextFFI const *lwe_array_false,
|
||||
int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks) {
|
||||
int_cmux_buffer<Torus> *mem_ptr, void *const *bsks, Torus *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
if (lwe_array_out->num_radix_blocks != lwe_array_true->num_radix_blocks)
|
||||
PANIC("Cuda error: input and output num radix blocks must be the same")
|
||||
@@ -103,8 +70,8 @@ __host__ void host_integer_radix_cmux_kb(
|
||||
}
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
|
||||
mem_ptr->condition_array, bsks, ksks, mem_ptr->predicate_lut,
|
||||
2 * num_radix_blocks, params.message_modulus);
|
||||
mem_ptr->condition_array, bsks, ksks, ms_noise_reduction_key,
|
||||
mem_ptr->predicate_lut, 2 * num_radix_blocks, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
@@ -121,7 +88,7 @@ __host__ void host_integer_radix_cmux_kb(
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, &mem_true, bsks, ksks,
|
||||
mem_ptr->message_extract_lut, num_radix_blocks);
|
||||
ms_noise_reduction_key, mem_ptr->message_extract_lut, num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -7,12 +7,13 @@ void scratch_cuda_integer_radix_comparison_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
COMPARISON_TYPE op_type, bool is_signed, bool allocate_gpu_memory) {
|
||||
COMPARISON_TYPE op_type, bool is_signed, bool allocate_gpu_memory,
|
||||
bool allocate_ms_array) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
message_modulus, carry_modulus, allocate_ms_array);
|
||||
|
||||
switch (op_type) {
|
||||
case EQ:
|
||||
@@ -38,21 +39,27 @@ void scratch_cuda_integer_radix_comparison_kb_64(
|
||||
|
||||
void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_1, void const *lwe_array_2,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_radix_blocks) {
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_1,
|
||||
CudaRadixCiphertextFFI const *lwe_array_2, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key) {
|
||||
|
||||
if (lwe_array_1->num_radix_blocks != lwe_array_1->num_radix_blocks)
|
||||
PANIC("Cuda error: input num radix blocks must be the same")
|
||||
// The output ciphertext might be a boolean block or a radix ciphertext
|
||||
// depending on the case (eq/gt vs max/min) so the amount of blocks to
|
||||
// consider for calculation is the one of the input
|
||||
auto num_radix_blocks = lwe_array_1->num_radix_blocks;
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
switch (buffer->op) {
|
||||
case EQ:
|
||||
case NE:
|
||||
host_integer_radix_equality_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_1),
|
||||
static_cast<const uint64_t *>(lwe_array_2), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, buffer, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, num_radix_blocks);
|
||||
break;
|
||||
case GT:
|
||||
case GE:
|
||||
@@ -62,23 +69,18 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
PANIC("Cuda error (comparisons): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_1),
|
||||
static_cast<const uint64_t *>(lwe_array_2), buffer,
|
||||
buffer->diff_buffer->operator_f, bsks, (uint64_t **)(ksks),
|
||||
num_radix_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, buffer, buffer->diff_buffer->operator_f, bsks,
|
||||
(uint64_t **)(ksks), ms_noise_reduction_key, num_radix_blocks);
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
|
||||
host_integer_radix_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_1),
|
||||
static_cast<const uint64_t *>(lwe_array_2), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_1, lwe_array_2, buffer, bsks, (uint64_t **)(ksks),
|
||||
ms_noise_reduction_key, num_radix_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error: integer operation not supported")
|
||||
@@ -102,12 +104,12 @@ void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory, bool allocate_ms_array) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
message_modulus, carry_modulus, allocate_ms_array);
|
||||
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
@@ -117,17 +119,19 @@ void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_are_all_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_in, buffer, bsks, (uint64_t **)(ksks), ms_noise_reduction_key,
|
||||
num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
@@ -146,12 +150,12 @@ void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory, bool allocate_ms_array) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
message_modulus, carry_modulus, allocate_ms_array);
|
||||
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
@@ -161,17 +165,19 @@ void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
CudaRadixCiphertextFFI *lwe_array_out,
|
||||
CudaRadixCiphertextFFI const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks,
|
||||
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count, lwe_array_out,
|
||||
lwe_array_in, buffer, bsks, (uint64_t **)(ksks), ms_noise_reduction_key,
|
||||
num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user