mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 07:38:08 -05:00
Compare commits
442 Commits
dt/ci/cove
...
go/feature
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
00ecb8306a | ||
|
|
2000feb87e | ||
|
|
594a5cee25 | ||
|
|
401cfc5fd0 | ||
|
|
769c725c67 | ||
|
|
07d143e032 | ||
|
|
d88bba761b | ||
|
|
eaa1d07f90 | ||
|
|
663322cfa5 | ||
|
|
ddd6a6e136 | ||
|
|
abc39f0a3e | ||
|
|
8b7556667b | ||
|
|
67b1607773 | ||
|
|
5340859003 | ||
|
|
a26e68c3bc | ||
|
|
0dd622ebb9 | ||
|
|
d69dd20079 | ||
|
|
80fe45f354 | ||
|
|
33114e3946 | ||
|
|
ede0745b7f | ||
|
|
bc4cd08e7a | ||
|
|
b03921f1ae | ||
|
|
70f7af06f5 | ||
|
|
a9bb6eac5f | ||
|
|
4fa9b243e0 | ||
|
|
b88f561358 | ||
|
|
0e71ca6c1c | ||
|
|
3ba61c0694 | ||
|
|
781f78c442 | ||
|
|
ebfc1ea8ac | ||
|
|
7fa9f33776 | ||
|
|
5547d92c79 | ||
|
|
351fc476b5 | ||
|
|
53cd3c8d0f | ||
|
|
0a2ad8ca72 | ||
|
|
eba4f6a89c | ||
|
|
4b933cf421 | ||
|
|
3303cd8568 | ||
|
|
f937524f64 | ||
|
|
e7da96271c | ||
|
|
0cc716544b | ||
|
|
f53087b5ed | ||
|
|
bcefe977c9 | ||
|
|
73ea24fd51 | ||
|
|
6f1a9bdaa5 | ||
|
|
7834f699d0 | ||
|
|
b81692b2df | ||
|
|
8748d1cc22 | ||
|
|
dbb13aa35e | ||
|
|
53f4c9bfc7 | ||
|
|
4021812248 | ||
|
|
190c5e7bb7 | ||
|
|
2004333d6e | ||
|
|
e7c06ef956 | ||
|
|
7b14fe6fee | ||
|
|
55f4df97b4 | ||
|
|
2144ec8107 | ||
|
|
fb862ddbbc | ||
|
|
ab0b01f7e1 | ||
|
|
6c4318b8bb | ||
|
|
d3f2ecd367 | ||
|
|
19dc0f02f9 | ||
|
|
95d50368fa | ||
|
|
c117798b10 | ||
|
|
da0934d4bc | ||
|
|
b522de3273 | ||
|
|
9205703454 | ||
|
|
a1b92a6db8 | ||
|
|
8d7c45bf17 | ||
|
|
91f05b00b9 | ||
|
|
ebb11b15c4 | ||
|
|
18270714d8 | ||
|
|
6c6525b1ea | ||
|
|
79f8971712 | ||
|
|
17db09bf2a | ||
|
|
fc9bfcaf61 | ||
|
|
d93c412dc5 | ||
|
|
ea222007d8 | ||
|
|
3470d6c2d8 | ||
|
|
fffdc3862e | ||
|
|
d9eca01631 | ||
|
|
95ef13f6ce | ||
|
|
230fa5a8f0 | ||
|
|
b443855b8b | ||
|
|
ba80c33328 | ||
|
|
e5dc45c084 | ||
|
|
b450f0eb30 | ||
|
|
7479cc826b | ||
|
|
b2beac2d2c | ||
|
|
b700416597 | ||
|
|
42609987a1 | ||
|
|
5b37a838ba | ||
|
|
c1fcd95d72 | ||
|
|
ffb8b4f930 | ||
|
|
3b8dace975 | ||
|
|
44f326824f | ||
|
|
f41d133fc7 | ||
|
|
52d43961b8 | ||
|
|
35b89704aa | ||
|
|
b578cf19c2 | ||
|
|
dd68ce67ad | ||
|
|
f8d8cc90fe | ||
|
|
eac37a7749 | ||
|
|
4342efecc8 | ||
|
|
a3ec84729d | ||
|
|
90d6b221d7 | ||
|
|
b1491734b2 | ||
|
|
436dd6a687 | ||
|
|
39534cb4c4 | ||
|
|
723443589d | ||
|
|
d58a1b68cb | ||
|
|
b29c477462 | ||
|
|
bed3d88426 | ||
|
|
35201b06b6 | ||
|
|
c8ddc0f008 | ||
|
|
4d934f512a | ||
|
|
52b0907c47 | ||
|
|
8ea647dc26 | ||
|
|
8f72677fa6 | ||
|
|
36a58cf16c | ||
|
|
de79f3a280 | ||
|
|
e9051419cd | ||
|
|
ac37c3883d | ||
|
|
72fb770308 | ||
|
|
34d07f5558 | ||
|
|
4176b3dcb5 | ||
|
|
abf9c3efb7 | ||
|
|
ebf1fd9e84 | ||
|
|
cef055b7f3 | ||
|
|
d65a4d8690 | ||
|
|
928bc13ed2 | ||
|
|
c81abae989 | ||
|
|
aff50fcb85 | ||
|
|
757606fdb4 | ||
|
|
7542c89679 | ||
|
|
dd74063959 | ||
|
|
f6845a988b | ||
|
|
6a3ff21de2 | ||
|
|
74cafd0e9d | ||
|
|
d8241942a6 | ||
|
|
46f0bf442a | ||
|
|
81c837c837 | ||
|
|
7b96f55900 | ||
|
|
f19e892053 | ||
|
|
2a989d64f9 | ||
|
|
eeb4accf66 | ||
|
|
0370bf6a3f | ||
|
|
a62c19b735 | ||
|
|
721a5a57ba | ||
|
|
3f101d5e8b | ||
|
|
e01f4abb65 | ||
|
|
a2ca189283 | ||
|
|
e0e9668b0b | ||
|
|
bd23d18c9d | ||
|
|
491112ffc1 | ||
|
|
83c3dadb5d | ||
|
|
7692643ca4 | ||
|
|
29cf2b83b8 | ||
|
|
1b47c74360 | ||
|
|
cd329729d7 | ||
|
|
8ec24d1bb7 | ||
|
|
13f61e4d67 | ||
|
|
72475a385e | ||
|
|
cc8f2cb4dc | ||
|
|
a153ea98ae | ||
|
|
60773497fe | ||
|
|
d632c916c2 | ||
|
|
6e4ea82db8 | ||
|
|
a7df399de3 | ||
|
|
90dc9a004e | ||
|
|
a4508f8396 | ||
|
|
c8e1998167 | ||
|
|
85d3ba6238 | ||
|
|
e9772953bf | ||
|
|
c407f3d5a6 | ||
|
|
5f0bff98dd | ||
|
|
634b7ada32 | ||
|
|
734edb3bdc | ||
|
|
ee181506c4 | ||
|
|
cf1576efbd | ||
|
|
d215359a75 | ||
|
|
1b5d5eeb94 | ||
|
|
bbaaa53656 | ||
|
|
88ad88e71c | ||
|
|
f338df0079 | ||
|
|
1e0ed88767 | ||
|
|
97ebccbb5b | ||
|
|
2dc5c8a891 | ||
|
|
22e9505380 | ||
|
|
7c80f295f7 | ||
|
|
a34ddd7b54 | ||
|
|
3deff5fbfd | ||
|
|
05a0327874 | ||
|
|
879699c072 | ||
|
|
e8b3617926 | ||
|
|
d9701d99d3 | ||
|
|
a339025b48 | ||
|
|
0e1a2ea7f6 | ||
|
|
a44be90a44 | ||
|
|
f026fa5076 | ||
|
|
b06beabfa2 | ||
|
|
773adcc26f | ||
|
|
ee1c90403c | ||
|
|
b9cedfec7f | ||
|
|
3992aa7f15 | ||
|
|
2b002f81ec | ||
|
|
2b695a9563 | ||
|
|
fd72858c4d | ||
|
|
3a2bb4470f | ||
|
|
6120fab886 | ||
|
|
53b68619b0 | ||
|
|
e854823233 | ||
|
|
19e00c484b | ||
|
|
818e480dac | ||
|
|
a7fc8a90e1 | ||
|
|
3fad6d194c | ||
|
|
23efcb8dd4 | ||
|
|
33c69d9d1f | ||
|
|
960d287e92 | ||
|
|
662e5402a3 | ||
|
|
bb7bdee25a | ||
|
|
3503d5b484 | ||
|
|
0390f1ce56 | ||
|
|
d290935de1 | ||
|
|
39dffcf742 | ||
|
|
89bb5756cc | ||
|
|
501907498f | ||
|
|
d712c0fcd0 | ||
|
|
66bee500a1 | ||
|
|
0687d12459 | ||
|
|
ecfe6e9a09 | ||
|
|
1366c33034 | ||
|
|
c2a57f15ab | ||
|
|
cb679fbbcb | ||
|
|
08ddabb3be | ||
|
|
892a6ae276 | ||
|
|
c37bac2438 | ||
|
|
e42733fb67 | ||
|
|
a45f3d7435 | ||
|
|
95a08ef0c2 | ||
|
|
1da159f0f0 | ||
|
|
7c76ce2cfb | ||
|
|
3f499c85b3 | ||
|
|
d736aa170e | ||
|
|
9aa4e0f0b5 | ||
|
|
7cf4f0219f | ||
|
|
97c10df6c2 | ||
|
|
5b530152fe | ||
|
|
49ffeba87c | ||
|
|
679d76e7a6 | ||
|
|
7613ef2ba9 | ||
|
|
6a9e959edf | ||
|
|
034da67ff2 | ||
|
|
aac136909a | ||
|
|
54cf162db5 | ||
|
|
ac211cf71f | ||
|
|
a02896b9bc | ||
|
|
4a4ad23cee | ||
|
|
fbf38a82ad | ||
|
|
444ebbde57 | ||
|
|
c227bf4a49 | ||
|
|
c1916b82ca | ||
|
|
4b8a3a15e8 | ||
|
|
cd33712b43 | ||
|
|
e20114e8e2 | ||
|
|
1f7ef064fb | ||
|
|
f16458147b | ||
|
|
82b4b63d0e | ||
|
|
34616ae4f7 | ||
|
|
26aa4e3a61 | ||
|
|
872d51f5f0 | ||
|
|
69fb7aa7ae | ||
|
|
ea73ec0832 | ||
|
|
d62e365bdc | ||
|
|
1579fb249a | ||
|
|
0624a5c5e2 | ||
|
|
ec4350edb4 | ||
|
|
904cd00076 | ||
|
|
44c64210ca | ||
|
|
9cd7aeccf5 | ||
|
|
987d68942d | ||
|
|
2ff3b75ef7 | ||
|
|
9242b2a725 | ||
|
|
bd674fe5bc | ||
|
|
4e5b9986b6 | ||
|
|
1e535c83a6 | ||
|
|
915eafac15 | ||
|
|
1c760a31e2 | ||
|
|
369d6df350 | ||
|
|
bbd12b8a30 | ||
|
|
deebe09a8c | ||
|
|
dcd8224a7e | ||
|
|
2f7ad4cdcd | ||
|
|
4c8d791a2d | ||
|
|
c60cb88367 | ||
|
|
f53c0df449 | ||
|
|
6be983db34 | ||
|
|
8caa0f780e | ||
|
|
75e2be2ca2 | ||
|
|
cd40176a56 | ||
|
|
65737e83db | ||
|
|
e4643c7919 | ||
|
|
baa3075f19 | ||
|
|
9cc97f9ab5 | ||
|
|
2bd9f7aab4 | ||
|
|
833d52c1f1 | ||
|
|
4f2de51012 | ||
|
|
134bec8f78 | ||
|
|
f2713a12c7 | ||
|
|
503fad69d2 | ||
|
|
30ccb34ef9 | ||
|
|
2ff64ccba0 | ||
|
|
aeed5b70f3 | ||
|
|
8f707611a0 | ||
|
|
2d0671cdd8 | ||
|
|
f9307754ef | ||
|
|
3af990b044 | ||
|
|
0d8b1c6509 | ||
|
|
c35cb4998d | ||
|
|
e825277219 | ||
|
|
71112231b9 | ||
|
|
b78c719816 | ||
|
|
7152f9c5c9 | ||
|
|
d3a6b4a7d8 | ||
|
|
f49684bdac | ||
|
|
cf5fd87efb | ||
|
|
179fbfc9bb | ||
|
|
ddf236ecbb | ||
|
|
e3fdb961b6 | ||
|
|
2185bcf80e | ||
|
|
418409231b | ||
|
|
ce27c7c44a | ||
|
|
ccb6f98b09 | ||
|
|
6014968655 | ||
|
|
6687695d19 | ||
|
|
c7a0493715 | ||
|
|
24aeac7843 | ||
|
|
21a749541a | ||
|
|
b3b8f3273a | ||
|
|
f2b4ebb863 | ||
|
|
919a40077c | ||
|
|
ac6c90d13f | ||
|
|
b8991229ec | ||
|
|
5f0ca54150 | ||
|
|
dddf85fb2c | ||
|
|
d000f8ddf7 | ||
|
|
70b643a1db | ||
|
|
3f9c1b0ca6 | ||
|
|
301537a81b | ||
|
|
76338de99f | ||
|
|
019efb7fef | ||
|
|
772a70d838 | ||
|
|
f024e8abae | ||
|
|
31685387ea | ||
|
|
4db77e236f | ||
|
|
bc02216470 | ||
|
|
228afe80e7 | ||
|
|
e4a21db7ee | ||
|
|
3e37759f5f | ||
|
|
dc0d72436d | ||
|
|
8a31abfca4 | ||
|
|
154c2e61b8 | ||
|
|
b3e6f8522f | ||
|
|
3d2e3b389a | ||
|
|
f6f07714cb | ||
|
|
57bc1f5abe | ||
|
|
fd88c3ead2 | ||
|
|
4bbb3570d1 | ||
|
|
e2413ff69e | ||
|
|
82043fb7e2 | ||
|
|
3097c964e3 | ||
|
|
e5d6f60a1b | ||
|
|
05105c9d9e | ||
|
|
a798e1fb52 | ||
|
|
9cf4be09fb | ||
|
|
484bddfebd | ||
|
|
57c5ef6b52 | ||
|
|
a7e84dd9cf | ||
|
|
a0346f8fea | ||
|
|
4ffadfd517 | ||
|
|
b32eafdf74 | ||
|
|
fa6b1e7966 | ||
|
|
d35acae047 | ||
|
|
71a10464e3 | ||
|
|
0112521008 | ||
|
|
f8545a72a4 | ||
|
|
e62a5232e3 | ||
|
|
cb67eeba3c | ||
|
|
3481054f49 | ||
|
|
201e38506d | ||
|
|
97260a3330 | ||
|
|
3da2678584 | ||
|
|
80aaf8a8cb | ||
|
|
e91f403f57 | ||
|
|
fedd1ca7b2 | ||
|
|
fadb48a86d | ||
|
|
bad827d4a5 | ||
|
|
4a4a53adc3 | ||
|
|
ba818117e5 | ||
|
|
520010e208 | ||
|
|
2602ed236b | ||
|
|
f605e7c2ee | ||
|
|
c7e6d76346 | ||
|
|
ce430bb3cb | ||
|
|
e2ee5fef5d | ||
|
|
73b3bf6b5b | ||
|
|
68439c7037 | ||
|
|
636cf0f766 | ||
|
|
dd917901a0 | ||
|
|
3b4dbb3cbb | ||
|
|
bc44940081 | ||
|
|
ebc22bb995 | ||
|
|
5b4833a1c2 | ||
|
|
310047a848 | ||
|
|
4166dd1c8a | ||
|
|
5a1c8a49e4 | ||
|
|
6cb68a8464 | ||
|
|
2912e4c00c | ||
|
|
1b9c61c2d1 | ||
|
|
d167e9b38d | ||
|
|
80fda99068 | ||
|
|
e09ea6c484 | ||
|
|
f214cfc474 | ||
|
|
c0733f535f | ||
|
|
08e4e4f993 | ||
|
|
b55bae1e0f | ||
|
|
c29ae73710 | ||
|
|
cc1712c67a | ||
|
|
246f4c299e | ||
|
|
29b0786fd8 | ||
|
|
89f7df0e12 | ||
|
|
51d5e27ab3 | ||
|
|
4f29db404c | ||
|
|
f20176b6f0 | ||
|
|
575ce1b2fb | ||
|
|
53a4d54a54 | ||
|
|
cf051dd65f | ||
|
|
0f331f0b88 | ||
|
|
b8bf1c286f | ||
|
|
051e993503 | ||
|
|
9420c408dd | ||
|
|
1fe1a349b7 |
2
.github/actionlint.yaml
vendored
2
.github/actionlint.yaml
vendored
@@ -3,6 +3,8 @@ self-hosted-runner:
|
||||
labels:
|
||||
- m1mac
|
||||
- 4090-desktop
|
||||
- large_windows_16_latest
|
||||
- large_ubuntu_16
|
||||
# Configuration variables in array of strings defined in your repository or
|
||||
# organization. `null` means disabling configuration variables check.
|
||||
# Empty array means no configuration variable is allowed.
|
||||
|
||||
8
.github/workflows/approve_label.yml
vendored
8
.github/workflows/approve_label.yml
vendored
@@ -1,5 +1,5 @@
|
||||
# Manage approved label in pull request
|
||||
name: PR approved label manager
|
||||
# Add labels in pull request
|
||||
name: PR label manager
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
@@ -27,7 +27,9 @@ jobs:
|
||||
# Add label only if the review is approved and if the label doesn't already exist
|
||||
- name: Add approved label
|
||||
uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf
|
||||
if: ${{ github.event_name == 'pull_request_review' && github.event.review.state == 'approved' && !contains(fromJSON(env.LABELS), 'approved') }}
|
||||
if: ${{ github.event_name == 'pull_request_review'
|
||||
&& github.event.review.state == 'approved'
|
||||
&& !contains(fromJSON(env.LABELS), 'approved') }}
|
||||
with:
|
||||
# We need to use a PAT to be able to trigger `labeled` event for the other workflow.
|
||||
github_token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
120
.github/workflows/aws_tfhe_backward_compat_tests.yml
vendored
Normal file
120
.github/workflows/aws_tfhe_backward_compat_tests.yml
vendored
Normal file
@@ -0,0 +1,120 @@
|
||||
# Run backward compatibility tests
|
||||
name: Backward compatibility Tests on CPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (backward-compat-tests)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
backward-compat-tests:
|
||||
name: Backward compatibility tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Install git-lfs
|
||||
run: |
|
||||
sudo apt update && sudo apt -y install git-lfs
|
||||
|
||||
- name: Use specific data branch
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'data_PR') }}
|
||||
env:
|
||||
PR_BRANCH: ${{ github.head_ref || github.ref_name }}
|
||||
run: |
|
||||
echo "BACKWARD_COMPAT_DATA_BRANCH=${PR_BRANCH}" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Get backward compat branch
|
||||
id: backward_compat_branch
|
||||
run: |
|
||||
BRANCH="$(make backward_compat_branch)"
|
||||
echo "branch=${BRANCH}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Clone test data
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
repository: zama-ai/tfhe-backward-compat-data
|
||||
path: tfhe/tfhe-backward-compat-data
|
||||
lfs: 'true'
|
||||
ref: ${{ steps.backward_compat_branch.outputs.branch }}
|
||||
|
||||
- name: Run backward compatibility tests
|
||||
run: |
|
||||
make test_backward_compatibility_ci
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Backward compatibility tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (backward-compat-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, backward-compat-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (backward-compat-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
155
.github/workflows/aws_tfhe_fast_tests.yml
vendored
155
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
# Run a small subset of shortint and integer tests to ensure quick feedback.
|
||||
# Run a small subset of tests to ensure quick feedback.
|
||||
name: Fast AWS Tests on CPU
|
||||
|
||||
env:
|
||||
@@ -11,6 +11,7 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -18,89 +19,192 @@ on:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (fast-tests)
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
|
||||
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
core_crypto_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.core_crypto_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
boolean_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.boolean_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
shortint_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.shortint_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
integer_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.integer_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
wasm_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.wasm_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
high_level_api_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.high_level_api_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.user_docs_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
- tfhe/Cargo.toml
|
||||
- concrete-csprng/**
|
||||
- tfhe-zk-pok/**
|
||||
csprng:
|
||||
- concrete-csprng/**
|
||||
zk_pok:
|
||||
- tfhe-zk-pok/**
|
||||
core_crypto:
|
||||
- tfhe/src/core_crypto/**
|
||||
boolean:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/boolean/**
|
||||
shortint:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
integer:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
wasm:
|
||||
- tfhe/src/**
|
||||
- tfhe/js_on_wasm_tests/**
|
||||
- tfhe/web_wasm_parallel_tests/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- '!tfhe/src/boolean/**'
|
||||
high_level_api:
|
||||
- tfhe/src/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- '!tfhe/src/boolean/**'
|
||||
- '!tfhe/src/c_api/**'
|
||||
- '!tfhe/src/js_on_wasm_api/**'
|
||||
user_docs:
|
||||
- tfhe/src/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- 'tfhe/docs/**.md'
|
||||
- README.md
|
||||
|
||||
- name: Aggregate file changes
|
||||
id: aggregated-changes
|
||||
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.core_crypto_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.boolean_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.shortint_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.integer_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.wasm_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.high_level_api_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.user_docs_any_changed == 'true')
|
||||
run: |
|
||||
echo "any_changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (fast-tests)
|
||||
if: github.event_name != 'pull_request' ||
|
||||
needs.should-run.outputs.any_file_changed == 'true'
|
||||
needs: should-run
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
fast-tests:
|
||||
name: Fast CPU tests
|
||||
needs: setup-ec2
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Run concrete-csprng tests
|
||||
if: needs.should-run.outputs.csprng_test == 'true'
|
||||
run: |
|
||||
make test_concrete_csprng
|
||||
|
||||
- name: Run tfhe-zk-pok tests
|
||||
if: needs.should-run.outputs.zk_pok_test == 'true'
|
||||
run: |
|
||||
make test_zk_pok
|
||||
|
||||
- name: Run core tests
|
||||
if: needs.should-run.outputs.core_crypto_test == 'true'
|
||||
run: |
|
||||
AVX512_SUPPORT=ON make test_core_crypto
|
||||
|
||||
- name: Run boolean tests
|
||||
if: needs.should-run.outputs.boolean_test == 'true'
|
||||
run: |
|
||||
make test_boolean
|
||||
|
||||
- name: Run user docs tests
|
||||
if: needs.should-run.outputs.user_docs_test == 'true'
|
||||
run: |
|
||||
make test_user_doc
|
||||
|
||||
- name: Run js on wasm API tests
|
||||
if: needs.should-run.outputs.wasm_test == 'true'
|
||||
run: |
|
||||
make test_nodejs_wasm_api_in_docker
|
||||
|
||||
- name: Gen Keys if required
|
||||
if: needs.should-run.outputs.shortint_test == 'true' ||
|
||||
needs.should-run.outputs.integer_test == 'true'
|
||||
run: |
|
||||
make gen_key_cache
|
||||
|
||||
- name: Run shortint tests
|
||||
if: needs.should-run.outputs.shortint_test == 'true'
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_shortint_ci
|
||||
|
||||
- name: Run integer tests
|
||||
if: needs.should-run.outputs.integer_test == 'true'
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_integer_ci
|
||||
|
||||
- name: Run shortint multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_shortint_multi_bit_ci
|
||||
|
||||
- name: Run integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE FAST_TESTS=TRUE make test_integer_multi_bit_ci
|
||||
|
||||
- name: Run high-level API tests
|
||||
if: needs.should-run.outputs.high_level_api_test == 'true'
|
||||
run: |
|
||||
make test_high_level_api
|
||||
|
||||
@@ -116,22 +220,21 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (fast-tests)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, fast-tests ]
|
||||
teardown-instance:
|
||||
name: Teardown instance (fast-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, fast-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -139,4 +242,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
202
.github/workflows/aws_tfhe_gpu_tests.yml
vendored
202
.github/workflows/aws_tfhe_gpu_tests.yml
vendored
@@ -1,202 +0,0 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: TFHE Cuda Backend - Full tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (cuda-tests)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
profile: gpu-test
|
||||
|
||||
cuda-pcc:
|
||||
name: CUDA post-commit checks
|
||||
needs: setup-ec2
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run fmt checks
|
||||
run: |
|
||||
make check_fmt_gpu
|
||||
|
||||
- name: Run clippy checks
|
||||
run: |
|
||||
make pcc_gpu
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests
|
||||
needs: [ setup-ec2, cuda-pcc ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@1d96c772d19495a3b5c517cd2bc0cb401ea0529f
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run core crypto, integer and internal CUDA backend tests
|
||||
run: |
|
||||
make test_gpu
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
make test_high_level_api_gpu
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (cuda-tests)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, cuda-pcc, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
60
.github/workflows/aws_tfhe_integer_tests.yml
vendored
60
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -10,53 +10,72 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
NO_BIG_PARAMS: FALSE
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Nightly tests @ 3AM after each work day
|
||||
- cron: "0 3 * * MON-FRI"
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (unsigned-integer-tests)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
if: (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
unsigned-integer-tests:
|
||||
name: Unsigned integer tests
|
||||
needs: setup-ec2
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Should skip big parameters set
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "NO_BIG_PARAMS=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Gen Keys if required
|
||||
run: |
|
||||
make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
|
||||
@@ -71,7 +90,7 @@ jobs:
|
||||
|
||||
- name: Run unsigned integer tests
|
||||
run: |
|
||||
AVX512_SUPPORT=ON BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_ci
|
||||
AVX512_SUPPORT=ON NO_BIG_PARAMS=${{ env.NO_BIG_PARAMS }} BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_ci
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() }}
|
||||
@@ -81,22 +100,21 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Unsigned Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (unsigned-integer-tests)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, unsigned-integer-tests ]
|
||||
teardown-instance:
|
||||
name: Teardown instance (unsigned-integer-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, unsigned-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -104,4 +122,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
@@ -10,53 +10,72 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
NO_BIG_PARAMS: FALSE
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Nightly tests @ 3AM after each work day
|
||||
- cron: "0 3 * * MON-FRI"
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (signed-integer-tests)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
setup-instance:
|
||||
name: Setup instance (signed-integer-tests)
|
||||
if: (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'pull_request' && contains(github.event.label.name, 'approved')) ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
signed-integer-tests:
|
||||
name: Signed integer tests
|
||||
needs: setup-ec2
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Should skip big parameters set
|
||||
if: github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "NO_BIG_PARAMS=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Gen Keys if required
|
||||
run: |
|
||||
make GEN_KEY_CACHE_MULTI_BIT_ONLY=TRUE gen_key_cache
|
||||
@@ -75,7 +94,7 @@ jobs:
|
||||
|
||||
- name: Run signed integer tests
|
||||
run: |
|
||||
AVX512_SUPPORT=ON BIG_TESTS_INSTANCE=TRUE make test_signed_integer_ci
|
||||
AVX512_SUPPORT=ON NO_BIG_PARAMS=${{ env.NO_BIG_PARAMS }} BIG_TESTS_INSTANCE=TRUE make test_signed_integer_ci
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() }}
|
||||
@@ -85,22 +104,21 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Signed Integer tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (signed-integer-tests)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, signed-integer-tests ]
|
||||
teardown-instance:
|
||||
name: Teardown instance (signed-integer-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, signed-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -108,4 +126,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
155
.github/workflows/aws_tfhe_tests.yml
vendored
155
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -10,95 +10,213 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (cpu-tests)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
csprng_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.csprng_any_changed }}
|
||||
zk_pok_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
core_crypto_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.core_crypto_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
boolean_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.boolean_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
shortint_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.shortint_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
high_level_api_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.high_level_api_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
c_api_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.c_api_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
examples_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.examples_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
apps_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.apps_any_changed || steps.changed-files.outputs.dependencies_any_changed }}
|
||||
user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
|
||||
steps.changed-files.outputs.user_docs_any_changed ||
|
||||
steps.changed-files.outputs.dependencies_any_changed }}
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
dependencies:
|
||||
- tfhe/Cargo.toml
|
||||
- concrete-csprng/**
|
||||
- tfhe-zk-pok/**
|
||||
csprng:
|
||||
- concrete-csprng/**
|
||||
zk_pok:
|
||||
- tfhe-zk-pok/**
|
||||
core_crypto:
|
||||
- tfhe/src/core_crypto/**
|
||||
boolean:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/boolean/**
|
||||
shortint:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
high_level_api:
|
||||
- tfhe/src/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- '!tfhe/src/boolean/**'
|
||||
- '!tfhe/src/js_on_wasm_api/**'
|
||||
c_api:
|
||||
- tfhe/src/**
|
||||
examples:
|
||||
- tfhe/src/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- tfhe/examples/**
|
||||
apps:
|
||||
- tfhe/src/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- apps/trivium/src/**
|
||||
user_docs:
|
||||
- tfhe/src/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- 'tfhe/docs/**.md'
|
||||
- README.md
|
||||
|
||||
- name: Aggregate file changes
|
||||
id: aggregated-changes
|
||||
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.core_crypto_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.boolean_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.shortint_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.high_level_api_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.c_api_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.examples_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.apps_any_changed == 'true' ||
|
||||
steps.changed-files.outputs.user_docs_any_changed == 'true')
|
||||
run: |
|
||||
echo "any_changed=true" >> "$GITHUB_OUTPUT"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cpu-tests)
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.any_file_changed == 'true')
|
||||
needs: should-run
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
cpu-tests:
|
||||
name: CPU tests
|
||||
needs: setup-ec2
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Run concrete-csprng tests
|
||||
if: needs.should-run.outputs.csprng_test == 'true'
|
||||
run: |
|
||||
make test_concrete_csprng
|
||||
|
||||
- name: Run tfhe-zk-pok tests
|
||||
if: needs.should-run.outputs.zk_pok_test == 'true'
|
||||
run: |
|
||||
make test_zk_pok
|
||||
|
||||
- name: Run core tests
|
||||
if: needs.should-run.outputs.core_crypto_test == 'true'
|
||||
run: |
|
||||
AVX512_SUPPORT=ON make test_core_crypto
|
||||
|
||||
- name: Run boolean tests
|
||||
if: needs.should-run.outputs.boolean_test == 'true'
|
||||
run: |
|
||||
make test_boolean
|
||||
|
||||
- name: Run C API tests
|
||||
if: needs.should-run.outputs.c_api_test == 'true'
|
||||
run: |
|
||||
make test_c_api
|
||||
|
||||
- name: Run user docs tests
|
||||
if: needs.should-run.outputs.user_docs_test == 'true'
|
||||
run: |
|
||||
make test_user_doc
|
||||
|
||||
- name: Gen Keys if required
|
||||
if: needs.should-run.outputs.shortint_test == 'true'
|
||||
run: |
|
||||
make gen_key_cache
|
||||
|
||||
- name: Run shortint tests
|
||||
if: needs.should-run.outputs.shortint_test == 'true'
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_shortint_ci
|
||||
|
||||
- name: Run high-level API tests
|
||||
if: needs.should-run.outputs.high_level_api_test == 'true'
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api
|
||||
|
||||
- name: Run example tests
|
||||
if: needs.should-run.outputs.examples_test == 'true'
|
||||
run: |
|
||||
make test_examples
|
||||
make dark_market
|
||||
|
||||
- name: Run apps tests
|
||||
if: needs.should-run.outputs.apps_test == 'true'
|
||||
run: |
|
||||
make test_trivium
|
||||
make test_kreyvium
|
||||
@@ -111,22 +229,21 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CPU tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (cpu-tests)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, cpu-tests ]
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cpu-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -134,4 +251,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
36
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
36
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -18,42 +18,43 @@ on:
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (wasm-tests)
|
||||
setup-instance:
|
||||
name: Setup instance (wasm-tests)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
wasm-tests:
|
||||
name: WASM tests
|
||||
needs: setup-ec2
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -71,7 +72,7 @@ jobs:
|
||||
|
||||
- name: Run parallel wasm tests
|
||||
run: |
|
||||
make ci_test_web_js_api_parallel
|
||||
make test_web_js_api_parallel_ci
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() }}
|
||||
@@ -81,22 +82,21 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "WASM tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (wasm-tests)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, wasm-tests ]
|
||||
teardown-instance:
|
||||
name: Teardown instance (wasm-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, wasm-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -104,4 +104,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
123
.github/workflows/boolean_benchmark.yml
vendored
123
.github/workflows/boolean_benchmark.yml
vendored
@@ -3,30 +3,9 @@ name: Boolean benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -34,36 +13,60 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
run-boolean-benchmarks:
|
||||
name: Execute boolean benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
setup-instance:
|
||||
name: Setup instance (boolean-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: bench
|
||||
|
||||
boolean-benchmarks:
|
||||
name: Execute boolean benchmarks in EC2
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -73,14 +76,12 @@ jobs:
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--hardware "hpc7a.96xlarge" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
@@ -97,13 +98,13 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_boolean
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -129,8 +130,28 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Boolean benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (boolean-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, boolean-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (boolean-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
4
.github/workflows/cargo_build.yml
vendored
4
.github/workflows/cargo_build.yml
vendored
@@ -19,11 +19,11 @@ jobs:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest-large, windows-latest]
|
||||
os: [large_ubuntu_16, macos-latest-large, large_windows_16_latest]
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
|
||||
- name: Install and run newline linter checks
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
|
||||
2
.github/workflows/check_commit.yml
vendored
2
.github/workflows/check_commit.yml
vendored
@@ -10,7 +10,7 @@ jobs:
|
||||
- name: Check first line
|
||||
uses: gsactions/commit-message-checker@16fa2d5de096ae0d35626443bcd24f1e756cafee
|
||||
with:
|
||||
pattern: '^((feat|fix|chore|refactor|style|test|docs|doc)(\(\w+\))?\:) .+$'
|
||||
pattern: '^((feat|fix|chore|refactor|style|test|docs|doc)(\([\w\-_]+\))?\!?\:) .+$'
|
||||
flags: "gs"
|
||||
error: 'Your first line has to contain a commit type and scope like "feat(my_feature): msg".'
|
||||
excludeDescription: "true" # optional: this excludes the description body of a pull request
|
||||
|
||||
2
.github/workflows/ci_lint.yml
vendored
2
.github/workflows/ci_lint.yml
vendored
@@ -13,7 +13,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
|
||||
- name: Get actionlint
|
||||
run: |
|
||||
|
||||
110
.github/workflows/code_coverage.yml
vendored
110
.github/workflows/code_coverage.yml
vendored
@@ -6,70 +6,58 @@ env:
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
# All the inputs are provided by Slab
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "AWS instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "AWS instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "AWS instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: 'Slab request ID'
|
||||
type: string
|
||||
fork_repo:
|
||||
description: 'Name of forked repo as user/repo'
|
||||
type: string
|
||||
fork_git_sha:
|
||||
description: 'Git SHA to checkout from fork'
|
||||
type: string
|
||||
# Code coverage workflow is only run via workflow_dispatch event since execution duration is not stabilized yet.
|
||||
|
||||
jobs:
|
||||
code-coverage:
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}_${{ inputs.instance_image_id }}_${{ inputs.instance_type }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ inputs.runner_name }}
|
||||
timeout-minutes: 11520 # 8 days
|
||||
setup-instance:
|
||||
name: Setup instance (code-coverage)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
# Step used for log purpose.
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "ID: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
echo "Fork repo: ${{ inputs.fork_repo }}"
|
||||
echo "Fork git sha: ${{ inputs.fork_git_sha }}"
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
repository: ${{ inputs.fork_repo }}
|
||||
ref: ${{ inputs.fork_git_sha }}
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
code-coverage:
|
||||
name: Code coverage tests
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.event_name }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 5760 # 4 days
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@0874344d6ebbaa00a27da73276ae7162fadcaf69
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -99,7 +87,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@84508663e988701840491b86de86b666e8a86bed
|
||||
uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -113,7 +101,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@84508663e988701840491b86de86b666e8a86bed
|
||||
uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -127,8 +115,28 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Code coverage finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (code-coverage)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, code-coverage ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (code-coverage) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
120
.github/workflows/core_crypto_benchmark.yml
vendored
120
.github/workflows/core_crypto_benchmark.yml
vendored
@@ -3,30 +3,6 @@ name: Core crypto benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -34,67 +10,89 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
run-core-crypto-benchmarks:
|
||||
name: Execute core crypto benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
setup-instance:
|
||||
name: Setup instance (core-crypto-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: bench
|
||||
|
||||
core-crypto-benchmarks:
|
||||
name: Execute core crypto benchmarks in EC2
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_pbs
|
||||
make bench_pbs128
|
||||
make bench_ks
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--hardware "hpc7a.96xlarge" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--name-suffix avx512 \
|
||||
--walk-subdirs \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -120,8 +118,28 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "PBS benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (core-crypto-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, core-crypto-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (core-crypto-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
141
.github/workflows/core_crypto_gpu_benchmark.yml
vendored
141
.github/workflows/core_crypto_gpu_benchmark.yml
vendored
@@ -1,43 +1,45 @@
|
||||
# Run core crypto benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
|
||||
# Run core crypto benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Core crypto GPU benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
run-core-crypto-benchmarks:
|
||||
name: Execute GPU core crypto benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-core-crypto-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-core-crypto-benchmarks:
|
||||
name: Execute GPU core crypto benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
@@ -45,33 +47,43 @@ jobs:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -103,28 +115,26 @@ jobs:
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--name-suffix avx512 \
|
||||
--walk-subdirs \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -144,14 +154,39 @@ jobs:
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-core-crypto-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-core-crypto-benchmarks.result }}
|
||||
SLACK_MESSAGE: "PBS GPU benchmarks finished with status: ${{ needs.cuda-core-crypto-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-core-crypto-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "PBS GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-core-crypto-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
35
.github/workflows/csprng_randomness_tests.yml
vendored
35
.github/workflows/csprng_randomness_tests.yml
vendored
@@ -17,44 +17,44 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (csprng-randomness-tests)
|
||||
setup-instance:
|
||||
name: Setup instance (csprng-randomness-tests)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, 'approved') }}
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
csprng-randomness-tests:
|
||||
name: CSPRNG randomness tests
|
||||
needs: setup-ec2
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -70,22 +70,21 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "concrete-csprng randomness check finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (csprng-randomness-tests)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, csprng-randomness-tests ]
|
||||
teardown-instance:
|
||||
name: Teardown instance (csprng-randomness-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, csprng-randomness-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -93,4 +92,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (csprng-randomness-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
123
.github/workflows/data_pr_close.yml
vendored
Normal file
123
.github/workflows/data_pr_close.yml
vendored
Normal file
@@ -0,0 +1,123 @@
|
||||
name: Close or Merge corresponding PR on the data repo
|
||||
|
||||
# When a PR with the data_PR tag is closed or merged, this will close the corresponding PR in the data repo.
|
||||
|
||||
env:
|
||||
TARGET_REPO_API_URL: ${{ github.api_url }}/repos/zama-ai/tfhe-backward-compat-data
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
PR_BRANCH: ${{ github.head_ref || github.ref_name }}
|
||||
CLOSE_TYPE: ${{ github.event.pull_request.merged && 'merge' || 'close' }}
|
||||
|
||||
# only trigger on pull request closed events
|
||||
on:
|
||||
pull_request:
|
||||
types: [ closed ]
|
||||
|
||||
# The same pattern is used for jobs that use the github api:
|
||||
# - save the result of the API call in the env var "GH_API_RES". Since the var is multiline
|
||||
# we use this trick: https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#example-of-a-multiline-string
|
||||
# - "set +e" will make sure we reach the last "echo EOF" even in case of error
|
||||
# - "set -o" pipefail makes one line piped command return the error of the first failure
|
||||
# - 'RES="$?"' and 'exit $RES' are used to return the error code if a command failed. Without it, with "set +e"
|
||||
# the script will always return 0 because of the "echo EOF".
|
||||
|
||||
|
||||
jobs:
|
||||
auto_close_job:
|
||||
if: ${{ contains(github.event.pull_request.labels.*.name, 'data_PR') }}
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Find corresponding Pull Request in the data repo
|
||||
run: |
|
||||
{
|
||||
set +e
|
||||
set -o pipefail
|
||||
echo 'TARGET_REPO_PR<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X GET \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
${{ env.TARGET_REPO_API_URL }}/pulls\?head=${{ github.repository_owner }}:${{ env.PR_BRANCH }} | jq -e '.[0]' | sed 's/null/{ "message": "corresponding PR not found" }/'
|
||||
RES="$?"
|
||||
echo EOF
|
||||
} >> "${GITHUB_ENV}"
|
||||
exit $RES
|
||||
|
||||
- name: Comment on the PR to indicate the reason of the close
|
||||
run: |
|
||||
{
|
||||
set +e
|
||||
set -o pipefail
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X POST \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
${{ fromJson(env.TARGET_REPO_PR).comments_url }} \
|
||||
-d '{ "body": "PR ${{ env.CLOSE_TYPE }}d because the corresponding PR in main repo was ${{ env.CLOSE_TYPE }}d: ${{ github.repository }}#${{ github.event.number }}" }'
|
||||
RES="$?"
|
||||
echo EOF
|
||||
} >> "${GITHUB_ENV}"
|
||||
exit $RES
|
||||
|
||||
- name: Merge the Pull Request in the data repo
|
||||
if: ${{ github.event.pull_request.merged }}
|
||||
run: |
|
||||
{
|
||||
set +e
|
||||
set -o pipefail
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X PUT \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
${{ fromJson(env.TARGET_REPO_PR).url }}/merge \
|
||||
-d '{ "merge_method": "rebase" }'
|
||||
RES="$?"
|
||||
echo EOF
|
||||
} >> "${GITHUB_ENV}"
|
||||
exit $RES
|
||||
|
||||
- name: Close the Pull Request in the data repo
|
||||
if: ${{ !github.event.pull_request.merged }}
|
||||
run: |
|
||||
{
|
||||
set +e
|
||||
set -o pipefail
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X PATCH \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
${{ fromJson(env.TARGET_REPO_PR).url }} \
|
||||
-d '{ "state": "closed" }'
|
||||
RES="$?"
|
||||
echo EOF
|
||||
} >> "${GITHUB_ENV}"
|
||||
exit $RES
|
||||
|
||||
- name: Delete the associated branch in the data repo
|
||||
run: |
|
||||
{
|
||||
set +e
|
||||
set -o pipefail
|
||||
echo 'GH_API_RES<<EOF'
|
||||
curl --fail-with-body --no-progress-meter -L -X DELETE \
|
||||
-H "Accept: application/vnd.github+json" \
|
||||
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
|
||||
-H "X-GitHub-Api-Version: 2022-11-28" \
|
||||
${{ env.TARGET_REPO_API_URL }}/git/refs/heads/${{ env.PR_BRANCH }}
|
||||
RES="$?"
|
||||
echo EOF
|
||||
} >> "${GITHUB_ENV}"
|
||||
exit $RES
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() && job.status == 'failure' }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Failed to auto-${{ env.CLOSE_TYPE }} PR on data repo: ${{ fromJson(env.GH_API_RES || env.TARGET_REPO_PR).message }}"
|
||||
@@ -1,5 +1,5 @@
|
||||
# Run all benchmarks on an RTX 4090 machine and return parsed results to Slab CI bot.
|
||||
name: TFHE Cuda Backend - 4090 full benchmarks
|
||||
# Run benchmarks on an RTX 4090 machine and return parsed results to Slab CI bot.
|
||||
name: TFHE Cuda Backend - 4090 benchmarks
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -11,6 +11,7 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
@@ -23,23 +24,22 @@ on:
|
||||
|
||||
jobs:
|
||||
cuda-integer-benchmarks:
|
||||
name: Cuda integer benchmarks for all operations flavor (RTX 4090)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || github.event_name == 'schedule' || contains(github.event.label.name, '4090_bench') }}
|
||||
name: Cuda integer benchmarks (RTX 4090)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' ||
|
||||
github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs' ||
|
||||
contains(github.event.label.name, '4090_bench') }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}_cuda_integer_bench
|
||||
cancel-in-progress: true
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [integer, integer_multi_bit]
|
||||
op_flavor: [default, unchecked]
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -50,14 +50,15 @@ jobs:
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -65,7 +66,7 @@ jobs:
|
||||
|
||||
- name: Run integer benchmarks
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
|
||||
make BENCH_OP_FLAVOR=default bench_integer_multi_bit_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -81,9 +82,9 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
name: ${{ github.sha }}_integer_multi_bit_gpu_default
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
@@ -114,13 +115,13 @@ jobs:
|
||||
needs: cuda-integer-benchmarks
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}_cuda_core_crypto_bench
|
||||
cancel-in-progress: true
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -133,18 +134,18 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run integer benchmarks
|
||||
- name: Run core crypto benchmarks
|
||||
run: |
|
||||
make bench_pbs_gpu
|
||||
make bench_ks_gpu
|
||||
@@ -163,7 +164,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -16,12 +16,17 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
types: [ labeled ]
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
jobs:
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests (RTX 4090)
|
||||
if: ${{ github.event_name == 'workflow_dispatch' || contains(github.event.label.name, '4090_test') }}
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
contains(github.event.label.name, '4090_test') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
@@ -29,10 +34,12 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
199
.github/workflows/gpu_fast_h100_tests.yml
vendored
Normal file
199
.github/workflows/gpu_fast_h100_tests.yml
vendored
Normal file
@@ -0,0 +1,199 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: TFHE Cuda Backend - Fast tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**.md'
|
||||
- Makefile
|
||||
- '.github/workflows/gpu_fast_h100_tests.yml'
|
||||
- scripts/**
|
||||
- ci/**
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
|
||||
BIG_TESTS_INSTANCE=TRUE make test_cuda_backend
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
197
.github/workflows/gpu_fast_tests.yml
vendored
Normal file
197
.github/workflows/gpu_fast_tests.yml
vendored
Normal file
@@ -0,0 +1,197 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: TFHE Cuda Backend - Fast tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**.md'
|
||||
- '.github/workflows/gpu_fast_tests.yml'
|
||||
- Makefile
|
||||
- scripts/**
|
||||
- ci/**
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests)
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
needs.should-run.outputs.gpu_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
make test_core_crypto_gpu
|
||||
make test_cuda_backend
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
199
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
Normal file
199
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
Normal file
@@ -0,0 +1,199 @@
|
||||
# Compile and test tfhe-cuda-backend on an AWS instance
|
||||
name: TFHE Cuda Backend - Full tests multi-GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**.md'
|
||||
- Makefile
|
||||
- '.github/workflows/**_multi_gpu_tests.yml'
|
||||
- scripts/**
|
||||
- ci/**
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-tests-multi-gpu)
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-gpu-test
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA multi-GPU tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
# No need to test core_crypto and classic PBS in integer since it's already tested on single GPU.
|
||||
- name: Run multi-bit CUDA integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_integer_multi_bit_gpu_ci
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Multi-GPU tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests-multi-gpu)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-tests-multi-gpu) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
126
.github/workflows/gpu_pcc.yml
vendored
Normal file
126
.github/workflows/gpu_pcc.yml
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
# Perfom tfhe-cuda-backend post-commit checks on an AWS instance
|
||||
name: TFHE Cuda Backend - Post-commit Checks
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-pcc)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: gpu-build
|
||||
|
||||
cuda-pcc:
|
||||
name: CUDA post-commit checks
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run fmt checks
|
||||
run: |
|
||||
make check_fmt_gpu
|
||||
|
||||
- name: Run clippy checks
|
||||
run: |
|
||||
make pcc_gpu
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ always() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CUDA AWS post-commit checks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-pcc)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-pcc ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-pcc) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
188
.github/workflows/gpu_signed_integer_h100_tests.yml
vendored
Normal file
188
.github/workflows/gpu_signed_integer_h100_tests.yml
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
# Signed integer GPU tests on an H100 VM on hyperstack
|
||||
name: TFHE Cuda Backend - Signed integer tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**.md'
|
||||
- Makefile
|
||||
- '.github/workflows/gpu_signed_integer_h100_tests.yml'
|
||||
- scripts/**
|
||||
- ci/**
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 signed integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
202
.github/workflows/gpu_signed_integer_tests.yml
vendored
Normal file
202
.github/workflows/gpu_signed_integer_tests.yml
vendored
Normal file
@@ -0,0 +1,202 @@
|
||||
# Compile and test tfhe-cuda-backend signed integer on an AWS instance
|
||||
name: TFHE Cuda Backend - Signed integer tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
- labeled
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**.md'
|
||||
- '.github/workflows/gpu_signed_integer_tests.yml'
|
||||
- Makefile
|
||||
- scripts/**
|
||||
- ci/**
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-signed-integer-tests)
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
cuda-signed-integer-tests:
|
||||
name: CUDA signed integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
{
|
||||
echo "FAST_TESTS=FALSE";
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
make test_signed_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-signed-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-signed-integer-tests.result != 'skipped' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-signed-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Base GPU tests finished with status: ${{ needs.cuda-signed-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-signed-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-signed-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
188
.github/workflows/gpu_unsigned_integer_h100_tests.yml
vendored
Normal file
188
.github/workflows/gpu_unsigned_integer_h100_tests.yml
vendored
Normal file
@@ -0,0 +1,188 @@
|
||||
# Test unsigned integers on an H100 VM on hyperstack
|
||||
name: TFHE Cuda Backend - Unsigned integer tests on H100
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**.md'
|
||||
- Makefile
|
||||
- '.github/workflows/gpu_unsigned_integer_tests.yml'
|
||||
- scripts/**
|
||||
- ci/**
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-h100-tests)
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-tests-linux:
|
||||
name: CUDA H100 unsigned integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-h100-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
199
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
Normal file
199
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
Normal file
@@ -0,0 +1,199 @@
|
||||
# Compile and test tfhe-cuda-backend unsigned integer on an AWS instance
|
||||
name: TFHE Cuda Backend - Unsigned integer tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_TESTS: TRUE
|
||||
NIGHTLY_TESTS: FALSE
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types:
|
||||
- opened
|
||||
- synchronize
|
||||
- labeled
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**.md'
|
||||
- '.github/workflows/gpu_unsigned_integer_tests.yml'
|
||||
- Makefile
|
||||
- scripts/**
|
||||
- ci/**
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-unsigned-integer-tests)
|
||||
needs: should-run
|
||||
if: (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: gpu-test
|
||||
|
||||
cuda-unsigned-integer-tests:
|
||||
name: CUDA unsigned integer tests
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
{
|
||||
echo "FAST_TESTS=FALSE";
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
make test_unsigned_integer_multi_bit_gpu_ci
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-unsigned-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-unsigned-integer-tests.result != 'skipped' }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-unsigned-integer-tests.result }}
|
||||
SLACK_MESSAGE: "Unsigned integer GPU tests finished with status: ${{ needs.cuda-unsigned-integer-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-unsigned-integer-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-unsigned-integer-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
130
.github/workflows/integer_benchmark.yml
vendored
130
.github/workflows/integer_benchmark.yml
vendored
@@ -1,130 +0,0 @@
|
||||
# Run integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Integer benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
name: Execute integer benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make FAST_BENCH=TRUE bench_integer
|
||||
|
||||
- name: Parse benchmarks to csv
|
||||
run: |
|
||||
make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
@@ -1,28 +1,20 @@
|
||||
# Run all integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Integer full benchmarks
|
||||
name: Integer benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
# Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
|
||||
# These benchmarks are far longer to execute hence the reason to run them only four time a year.
|
||||
- cron: '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -30,21 +22,29 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
|
||||
steps:
|
||||
- name: Weekly benchmarks
|
||||
if: ${{ github.event.inputs.user_inputs == 'weekly_benchmarks' }}
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
github.event.schedule == '0 1 * * 6'
|
||||
run: |
|
||||
echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Quarterly benchmarks
|
||||
if: ${{ github.event.inputs.user_inputs == 'quarterly_benchmarks' }}
|
||||
if: github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
run: |
|
||||
echo "OP_FLAVOR=[\"default\", \"smart\", \"unchecked\", \"misc\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
@@ -53,11 +53,31 @@ jobs:
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (integer-benchmarks)
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: bench
|
||||
|
||||
integer-benchmarks:
|
||||
name: Execute integer benchmarks for all operations flavor
|
||||
needs: prepare-matrix
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
@@ -66,15 +86,8 @@ jobs:
|
||||
command: [ integer, integer_multi_bit]
|
||||
op_flavor: ${{ fromJson(needs.prepare-matrix.outputs.op_flavor) }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -92,17 +105,22 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}
|
||||
@@ -111,7 +129,7 @@ jobs:
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--hardware "hpc7a.96xlarge" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
@@ -121,7 +139,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -140,19 +158,34 @@ jobs:
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notification:
|
||||
name: Slack Notification
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ failure() }}
|
||||
needs: integer-benchmarks
|
||||
steps:
|
||||
- name: Notify
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (integer-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, integer-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
141
.github/workflows/integer_gpu_benchmark.yml
vendored
141
.github/workflows/integer_gpu_benchmark.yml
vendored
@@ -1,24 +1,11 @@
|
||||
# Run integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
|
||||
# Run integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer GPU benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -27,12 +14,35 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
name: Execute integer benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-integer-benchmarks:
|
||||
name: Execute GPU integer benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
@@ -40,33 +50,43 @@ jobs:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -90,6 +110,10 @@ jobs:
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_gpu
|
||||
@@ -100,35 +124,33 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -148,14 +170,39 @@ jobs:
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-integer-benchmarks.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ needs.cuda-integer-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
130
.github/workflows/integer_gpu_full_benchmark.yml
vendored
130
.github/workflows/integer_gpu_full_benchmark.yml
vendored
@@ -1,32 +1,11 @@
|
||||
# Run all integer benchmarks on an AWS instance with CUDA and return parsed results to Slab CI bot.
|
||||
# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer GPU full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -34,13 +13,36 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
integer-benchmarks:
|
||||
name: Execute integer benchmarks for all operations flavor
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-full-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-integer-full-benchmarks:
|
||||
name: Execute GPU integer benchmarks for all operations flavor
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
if: ${{ !cancelled() }}
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
@@ -52,19 +54,25 @@ jobs:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -82,7 +90,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -107,12 +115,16 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
|
||||
@@ -121,7 +133,7 @@ jobs:
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
@@ -132,7 +144,7 @@ jobs:
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -151,19 +163,39 @@ jobs:
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notification:
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
needs: [ setup-instance, cuda-integer-full-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
needs: integer-benchmarks
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Notify
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-full-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ needs.cuda-integer-full-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-full-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-full-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
130
.github/workflows/integer_multi_bit_benchmark.yml
vendored
130
.github/workflows/integer_multi_bit_benchmark.yml
vendored
@@ -1,130 +0,0 @@
|
||||
# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Integer Multi-bit benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
name: Execute integer multi-bit benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
make FAST_BENCH=TRUE bench_integer_multi_bit
|
||||
|
||||
- name: Parse benchmarks to csv
|
||||
run: |
|
||||
make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
@@ -1,24 +1,21 @@
|
||||
# Run integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
|
||||
# Run integer benchmarks with multi-bit cryptographic parameters on an instance and return parsed results to Slab CI bot.
|
||||
name: Integer GPU Multi-bit benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
type: boolean
|
||||
default: false
|
||||
fast_default:
|
||||
description: "Run only deduplicated default operations without scalar variants"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -27,13 +24,38 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
BENCH_OP_FLAVOR: default
|
||||
|
||||
jobs:
|
||||
cuda-integer-benchmarks:
|
||||
name: Execute integer multi-bit benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-multi-bit-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
cuda-integer-multi-bit-benchmarks:
|
||||
name: Execute GPU integer multi-bit benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
if: ${{ !cancelled() }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
@@ -41,33 +63,43 @@ jobs:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 9
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -91,9 +123,23 @@ jobs:
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run fast subset benchmarks
|
||||
if: inputs.fast_default
|
||||
run: |
|
||||
echo "BENCH_OP_FLAVOR=fast_default" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
make FAST_BENCH=TRUE BENCH_OP_FLAVOR=default bench_integer_multi_bit_gpu
|
||||
make bench_unsigned_integer_multi_bit_gpu
|
||||
|
||||
- name: Parse benchmarks to csv
|
||||
run: |
|
||||
@@ -101,35 +147,33 @@ jobs:
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--hardware "n3-H100x1" \
|
||||
--backend gpu \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -149,14 +193,40 @@ jobs:
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-multi-bit-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU multi-bit benchmarks finished with status: ${{ needs.cuda-integer-multi-bit-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-benchmarks, slack-notify ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Integer GPU benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
221
.github/workflows/integer_multi_bit_multi_gpu_benchmark.yml
vendored
Normal file
221
.github/workflows/integer_multi_bit_multi_gpu_benchmark.yml
vendored
Normal file
@@ -0,0 +1,221 @@
|
||||
# Run 64-bit multi-bit integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer multi GPU Multi-bit benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
type: boolean
|
||||
default: false
|
||||
fast_default:
|
||||
description: "Run only deduplicated default operations without scalar variants"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
BENCH_OP_FLAVOR: default
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-multi-bit-multi-gpu-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch' }}
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-h100
|
||||
|
||||
cuda-integer-multi-bit-multi-gpu-benchmarks:
|
||||
name: Execute multi GPU integer multi-bit benchmarks
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run fast subset benchmarks
|
||||
if: inputs.fast_default
|
||||
run: |
|
||||
echo "BENCH_OP_FLAVOR=fast_default" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
make bench_unsigned_integer_multi_bit_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x8" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer multi GPU multi-bit benchmarks finished with status: ${{ needs.cuda-integer-multi-bit-multi-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-multi-bit-multi-gpu-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-multi-bit-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-multi-bit-multi-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
201
.github/workflows/integer_multi_gpu_full_benchmark.yml
vendored
Normal file
201
.github/workflows/integer_multi_gpu_full_benchmark.yml
vendored
Normal file
@@ -0,0 +1,201 @@
|
||||
# Run all integer benchmarks on an instance with CUDA and return parsed results to Slab CI bot.
|
||||
name: Integer multi GPU full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cuda-integer-full-multi-gpu-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-h100
|
||||
|
||||
cuda-integer-full-multi-gpu-benchmarks:
|
||||
name: Execute multi GPU integer benchmarks for all operations flavor
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
fail-fast: false
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [integer, integer_multi_bit]
|
||||
op_flavor: [default, unchecked]
|
||||
# explicit include-based build matrix, of known valid options
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_${{ matrix.command }}_gpu
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "n3-H100x8" \
|
||||
--backend gpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }}
|
||||
SLACK_MESSAGE: "Integer GPU full benchmarks finished with status: ${{ needs.cuda-integer-full-multi-gpu-benchmarks.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cuda-integer-full-multi-gpu-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-integer-full-multi-gpu-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-integer-full-multi-gpu-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
11
.github/workflows/m1_tests.yml
vendored
11
.github/workflows/m1_tests.yml
vendored
@@ -3,7 +3,7 @@ name: Tests on M1 CPU
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [labeled]
|
||||
types: [ labeled ]
|
||||
# Have a nightly build for M1 tests
|
||||
schedule:
|
||||
# * is a special character in YAML so you have to quote this string
|
||||
@@ -18,6 +18,9 @@ env:
|
||||
RUST_MIN_STACK: "8388608"
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
FAST_TESTS: "TRUE"
|
||||
# We clear the cache to reduce memory pressure because of the numerous processes of cargo
|
||||
# nextest
|
||||
TFHE_RS_CLEAR_IN_MEMORY_KEY_CACHE: "1"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref }}
|
||||
@@ -31,10 +34,12 @@ jobs:
|
||||
timeout-minutes: 720
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
- uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
80
.github/workflows/make_release.yml
vendored
80
.github/workflows/make_release.yml
vendored
@@ -20,20 +20,72 @@ on:
|
||||
description: "Push node js package"
|
||||
type: boolean
|
||||
default: true
|
||||
npm_latest_tag:
|
||||
description: "Set NPM tag as latest"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
NPM_TAG: ""
|
||||
|
||||
jobs:
|
||||
publish_release:
|
||||
name: Publish Release
|
||||
package:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
- name: generate hash
|
||||
id: hash
|
||||
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
provenance:
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
|
||||
permissions:
|
||||
# Needed to detect the GitHub Actions environment
|
||||
actions: read
|
||||
# Needed to create the provenance via GitHub OIDC
|
||||
id-token: write
|
||||
# Needed to upload assets/artifacts
|
||||
contents: write
|
||||
with:
|
||||
# SHA-256 hashes of the Crate package.
|
||||
base64-subjects: ${{ needs.package.outputs.hash }}
|
||||
|
||||
publish_release:
|
||||
name: Publish Release
|
||||
needs: [package] # for comparing hashes
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
id-token: write
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
|
||||
- name: Download artifact
|
||||
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
|
||||
with:
|
||||
name: crate
|
||||
path: target/package
|
||||
- name: Publish crate.io package
|
||||
if: ${{ inputs.push_to_crates }}
|
||||
env:
|
||||
@@ -42,10 +94,26 @@ jobs:
|
||||
run: |
|
||||
cargo publish -p tfhe --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
|
||||
- name: Generate hash
|
||||
id: published_hash
|
||||
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Slack notification (hashes comparison)
|
||||
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: failure
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "SLSA tfhe crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
- name: Build web package
|
||||
if: ${{ inputs.push_web_package }}
|
||||
run: |
|
||||
make build_web_js_api
|
||||
make build_web_js_api_parallel
|
||||
|
||||
- name: Publish web package
|
||||
if: ${{ inputs.push_web_package }}
|
||||
@@ -54,6 +122,8 @@ jobs:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
|
||||
- name: Build Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
@@ -70,6 +140,8 @@ jobs:
|
||||
token: ${{ secrets.NPM_TOKEN }}
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
tag: ${{ env.NPM_TAG }}
|
||||
provenance: true
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
|
||||
@@ -18,7 +18,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
||||
32
.github/workflows/make_release_cuda.yml
vendored
32
.github/workflows/make_release_cuda.yml
vendored
@@ -21,28 +21,27 @@ env:
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
setup-ec2:
|
||||
name: Setup EC2 instance (publish-cuda-release)
|
||||
setup-instance:
|
||||
name: Setup instance (publish-cuda-release)
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
instance-id: ${{ steps.start-instance.outputs.ec2-instance-id }}
|
||||
aws-region: ${{ steps.start-instance.outputs.aws-region }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: gpu-test
|
||||
|
||||
publish-cuda-release:
|
||||
name: Publish CUDA Release
|
||||
needs: setup-ec2
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
@@ -55,7 +54,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -64,7 +63,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -105,22 +104,21 @@ jobs:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-ec2:
|
||||
name: Teardown EC2 instance (publish-release)
|
||||
if: ${{ always() && needs.setup-ec2.result != 'skipped' }}
|
||||
needs: [ setup-ec2, publish-cuda-release ]
|
||||
teardown-instance:
|
||||
name: Teardown instance (publish-release)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, publish-cuda-release ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@8562abbdc96b3619bd5debe1fb934db298f9a044
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
region: ${{ needs.setup-ec2.outputs.aws-region }}
|
||||
label: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
@@ -128,4 +126,4 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (publish-cuda-release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Instance teardown (publish-cuda-release) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
42
.github/workflows/make_release_zk_pok.yml
vendored
Normal file
42
.github/workflows/make_release_zk_pok.yml
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# Publish new release of tfhe-zk-pok on crates.io.
|
||||
name: Publish tfhe-zk-pok release
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
dry_run:
|
||||
description: "Dry-run"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
|
||||
jobs:
|
||||
publish_release:
|
||||
name: Publish tfhe-zk-pok Release
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
|
||||
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
|
||||
run: |
|
||||
cargo publish -p tfhe-zk-pok --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "tfhe-zk-pok release failed: (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
8
.github/workflows/parameters_check.yml
vendored
8
.github/workflows/parameters_check.yml
vendored
@@ -14,17 +14,17 @@ on:
|
||||
|
||||
jobs:
|
||||
params-curves-security-check:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: large_ubuntu_16
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
|
||||
- name: Checkout lattice-estimator
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: malb/lattice-estimator
|
||||
path: lattice_estimator
|
||||
ref: '53508253629d3b5d31a2ad110e85dc69391ccb95'
|
||||
ref: 'e80ec6bbbba212428b0e92d0467c18629cf9ed67'
|
||||
|
||||
- name: Install Sage
|
||||
run: |
|
||||
|
||||
128
.github/workflows/shortint_benchmark.yml
vendored
128
.github/workflows/shortint_benchmark.yml
vendored
@@ -1,128 +0,0 @@
|
||||
# Run shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Shortint benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-shortint-benchmarks:
|
||||
name: Execute shortint benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_shortint
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Measure key sizes
|
||||
run: |
|
||||
make measure_shortint_key_sizes
|
||||
|
||||
- name: Parse key sizes results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/shortint_key_sizes.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--key-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Shortint benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
193
.github/workflows/shortint_cpu_benchmark.yml
vendored
Normal file
193
.github/workflows/shortint_cpu_benchmark.yml
vendored
Normal file
@@ -0,0 +1,193 @@
|
||||
# Run all shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Shortint full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
# Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
|
||||
# These benchmarks are far longer to execute hence the reason to run them only four time a year.
|
||||
- cron: '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
|
||||
steps:
|
||||
- name: Weekly benchmarks
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
github.event.schedule == '0 1 * * 6'
|
||||
run: |
|
||||
echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Quarterly benchmarks
|
||||
if: github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
run: |
|
||||
echo "OP_FLAVOR=[\"default\", \"smart\", \"unchecked\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (shortint-benchmarks)
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: bench
|
||||
|
||||
shortint-benchmarks:
|
||||
name: Execute shortint benchmarks for all operations flavor
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
op_flavor: ${{ fromJson(needs.prepare-matrix.outputs.op_flavor) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_shortint
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "hpc7a.96xlarge" \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
# This small benchmark needs to be executed only once.
|
||||
- name: Measure key sizes
|
||||
if: matrix.op_flavor == 'default'
|
||||
run: |
|
||||
make measure_shortint_key_sizes
|
||||
|
||||
- name: Parse key sizes results
|
||||
if: matrix.op_flavor == 'default'
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/shortint_key_sizes.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--key-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Shortint full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (shortint-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, shortint-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (shortint-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
152
.github/workflows/shortint_full_benchmark.yml
vendored
152
.github/workflows/shortint_full_benchmark.yml
vendored
@@ -1,152 +0,0 @@
|
||||
# Run all shortint benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Shortint full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
shortint-benchmarks:
|
||||
name: Execute shortint benchmarks for all operations flavor
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
op_flavor: [ default, smart, unchecked ]
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_shortint
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
# This small benchmark needs to be executed only once.
|
||||
- name: Measure key sizes
|
||||
if: matrix.op_flavor == 'default'
|
||||
run: |
|
||||
make measure_shortint_key_sizes
|
||||
|
||||
- name: Parse key sizes results
|
||||
if: matrix.op_flavor == 'default'
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/shortint_key_sizes.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--key-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notification:
|
||||
name: Slack Notification
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ failure() }}
|
||||
needs: shortint-benchmarks
|
||||
steps:
|
||||
- name: Notify
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Shortint full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
130
.github/workflows/signed_integer_benchmark.yml
vendored
130
.github/workflows/signed_integer_benchmark.yml
vendored
@@ -1,130 +0,0 @@
|
||||
# Run signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Signed Integer benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
name: Execute signed integer benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make FAST_BENCH=TRUE bench_signed_integer
|
||||
|
||||
- name: Parse benchmarks to csv
|
||||
run: |
|
||||
make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Signed integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
191
.github/workflows/signed_integer_cpu_benchmark.yml
vendored
Normal file
191
.github/workflows/signed_integer_cpu_benchmark.yml
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
# Run all signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Signed Integer full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
all_precisions:
|
||||
description: "Run all precisions"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
# Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
|
||||
# These benchmarks are far longer to execute hence the reason to run them only four time a year.
|
||||
- cron: '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
op_flavor: ${{ steps.set_op_flavor.outputs.op_flavor }}
|
||||
steps:
|
||||
- name: Weekly benchmarks
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
github.event.schedule == '0 1 * * 6'
|
||||
run: |
|
||||
echo "OP_FLAVOR=[\"default\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Quarterly benchmarks
|
||||
if: github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
run: |
|
||||
echo "OP_FLAVOR=[\"default\", \"unchecked\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set operation flavor output
|
||||
id: set_op_flavor
|
||||
run: |
|
||||
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (signed-integer-benchmarks)
|
||||
needs: prepare-matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: bench
|
||||
|
||||
signed-integer-benchmarks:
|
||||
name: Execute signed integer benchmarks for all operations flavor
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [ integer, integer_multi_bit ]
|
||||
op_flavor: [ default, unchecked ]
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
echo "FAST_BENCH=FALSE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_signed_${{ matrix.command }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "hpc7a.96xlarge" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Signed integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (integer-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, signed-integer-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (signed-integer-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
136
.github/workflows/signed_integer_full_benchmark.yml
vendored
136
.github/workflows/signed_integer_full_benchmark.yml
vendored
@@ -1,136 +0,0 @@
|
||||
# Run all signed integer benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Signed Integer full benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
integer-benchmarks:
|
||||
name: Execute signed integer benchmarks for all operations flavor
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
continue-on-error: true
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
command: [ integer, integer_multi_bit ]
|
||||
op_flavor: [ default, unchecked ]
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make BENCH_OP_FLAVOR=${{ matrix.op_flavor }} bench_signed_${{ matrix.command }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
slack-notification:
|
||||
name: Slack Notification
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ failure() }}
|
||||
needs: integer-benchmarks
|
||||
steps:
|
||||
- name: Notify
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Signed integer full benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
@@ -1,130 +0,0 @@
|
||||
# Run signed integer benchmarks with multi-bit cryptographic parameters on an AWS instance and return parsed results to Slab CI bot.
|
||||
name: Signed Integer Multi-bit benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
|
||||
jobs:
|
||||
run-integer-benchmarks:
|
||||
name: Execute signed integer multi-bit benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run multi-bit benchmarks with AVX512
|
||||
run: |
|
||||
make FAST_BENCH=TRUE bench_signed_integer_multi_bit
|
||||
|
||||
- name: Parse benchmarks to csv
|
||||
run: |
|
||||
make PARSE_INTEGER_BENCH_CSV_FILE=${{ env.PARSE_INTEGER_BENCH_CSV_FILE }} \
|
||||
parse_integer_benches
|
||||
|
||||
- name: Upload csv results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_csv_integer
|
||||
path: ${{ env.PARSE_INTEGER_BENCH_CSV_FILE }}
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: ${{ github.sha }}_integer
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "Signed integer benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
135
.github/workflows/start_benchmarks.yml
vendored
135
.github/workflows/start_benchmarks.yml
vendored
@@ -1,135 +0,0 @@
|
||||
# Start all benchmark jobs on Slab CI bot.
|
||||
name: Start all benchmarks
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
# The input name must be the name of the slab command to launch
|
||||
boolean_bench:
|
||||
description: "Run Boolean benches"
|
||||
type: boolean
|
||||
default: true
|
||||
shortint_bench:
|
||||
description: "Run shortint benches"
|
||||
type: boolean
|
||||
default: true
|
||||
integer_bench:
|
||||
description: "Run integer benches"
|
||||
type: boolean
|
||||
default: true
|
||||
signed_integer_bench:
|
||||
description: "Run signed integer benches"
|
||||
type: boolean
|
||||
default: true
|
||||
integer_multi_bit_bench:
|
||||
description: "Run integer multi bit benches"
|
||||
type: boolean
|
||||
default: true
|
||||
signed_integer_multi_bit_bench:
|
||||
description: "Run signed integer multi bit benches"
|
||||
type: boolean
|
||||
default: true
|
||||
core_crypto_bench:
|
||||
description: "Run core crypto benches"
|
||||
type: boolean
|
||||
default: true
|
||||
core_crypto_gpu_bench:
|
||||
description: "Run core crypto benches on GPU"
|
||||
type: boolean
|
||||
default: true
|
||||
wasm_client_bench:
|
||||
description: "Run WASM client benches"
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
jobs:
|
||||
start-benchmarks:
|
||||
if: ${{ (github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
|
||||
strategy:
|
||||
matrix:
|
||||
command: [ boolean_bench, shortint_bench,
|
||||
integer_bench, integer_multi_bit_bench,
|
||||
signed_integer_bench, signed_integer_multi_bit_bench,
|
||||
integer_gpu_bench, integer_multi_bit_gpu_bench,
|
||||
core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@0874344d6ebbaa00a27da73276ae7162fadcaf69
|
||||
with:
|
||||
files_yaml: |
|
||||
common_benches:
|
||||
- toolchain.txt
|
||||
- Makefile
|
||||
- ci/slab.toml
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/src/core_crypto/**
|
||||
- .github/workflows/start_benchmarks.yml
|
||||
boolean_bench:
|
||||
- tfhe/src/boolean/**
|
||||
- tfhe/benches/boolean/**
|
||||
- .github/workflows/boolean_benchmark.yml
|
||||
shortint_bench:
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/benches/shortint/**
|
||||
- .github/workflows/shortint_benchmark.yml
|
||||
integer_bench:
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- tfhe/benches/integer/bench.rs
|
||||
- .github/workflows/integer_benchmark.yml
|
||||
integer_multi_bit_bench:
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- tfhe/benches/integer/bench.rs
|
||||
- .github/workflows/integer_multi_bit_benchmark.yml
|
||||
signed_integer_bench:
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- tfhe/benches/integer/signed_bench.rs
|
||||
- .github/workflows/signed_integer_benchmark.yml
|
||||
signed_integer_multi_bit_bench:
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- tfhe/benches/integer/signed_bench.rs
|
||||
- .github/workflows/signed_integer_multi_bit_benchmark.yml
|
||||
core_crypto_bench:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/benches/core_crypto/**
|
||||
- .github/workflows/core_crypto_benchmark.yml
|
||||
wasm_client_bench:
|
||||
- tfhe/web_wasm_parallel_tests/**
|
||||
- .github/workflows/wasm_client_benchmark.yml
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Start AWS job in Slab
|
||||
# If manually triggered check that the current bench has been requested
|
||||
# Otherwise if it's on push check that files relevant to benchmarks have changed
|
||||
if: (github.event_name == 'workflow_dispatch' && github.event.inputs[matrix.command] == 'true') || (github.event_name == 'push' && (steps.changed-files.outputs.common_benches_any_changed == 'true' || steps.changed-files.outputs[format('{0}_any_changed', matrix.command)] == 'true'))
|
||||
shell: bash
|
||||
run: |
|
||||
echo -n '{"command": "${{ matrix.command }}", "git_ref": "${{ github.ref }}", "sha": "${{ github.sha }}"}' > command.json
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh command.json '${{ secrets.JOB_SECRET }}')"
|
||||
curl -v -k \
|
||||
--fail-with-body \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: start_aws" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @command.json \
|
||||
${{ secrets.SLAB_URL }}
|
||||
66
.github/workflows/start_full_benchmarks.yml
vendored
66
.github/workflows/start_full_benchmarks.yml
vendored
@@ -1,66 +0,0 @@
|
||||
# Start all benchmark jobs, including full shortint and integer, on Slab CI bot.
|
||||
name: Start full suite benchmarks
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
# Quarterly benchmarks will be triggered right before end of quarter, the 25th of the current month at 4a.m.
|
||||
# These benchmarks are far longer to execute hence the reason to run them only four time a year.
|
||||
- cron: '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
benchmark_type:
|
||||
description: 'Benchmark type'
|
||||
required: true
|
||||
default: 'weekly'
|
||||
type: choice
|
||||
options:
|
||||
- weekly
|
||||
- quarterly
|
||||
|
||||
jobs:
|
||||
start-benchmarks:
|
||||
if: ${{ (github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') || github.event_name == 'workflow_dispatch' }}
|
||||
strategy:
|
||||
matrix:
|
||||
command: [ boolean_bench, shortint_full_bench,
|
||||
integer_full_bench, signed_integer_full_bench, integer_gpu_full_bench,
|
||||
core_crypto_bench, core_crypto_gpu_bench, wasm_client_bench ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Set benchmarks type as weekly
|
||||
if: (github.event_name == 'workflow_dispatch' && inputs.benchmark_type == 'weekly') || github.event.schedule == '0 1 * * 6'
|
||||
run: |
|
||||
echo "BENCH_TYPE=weekly_benchmarks" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set benchmarks type as quarterly
|
||||
if: (github.event_name == 'workflow_dispatch' && inputs.benchmark_type == 'quarterly') || github.event.schedule == '0 4 25 MAR,JUN,SEP,DEC *'
|
||||
run: |
|
||||
echo "BENCH_TYPE=quarterly_benchmarks" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Start AWS job in Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo -n '{"command": "${{ matrix.command }}", "git_ref": "${{ github.ref }}", "sha": "${{ github.sha }}", "user_inputs": "${{ env.BENCH_TYPE }}"}' > command.json
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh command.json '${{ secrets.JOB_SECRET }}')"
|
||||
curl -v -k \
|
||||
--fail-with-body \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: start_aws" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @command.json \
|
||||
${{ secrets.SLAB_URL }}
|
||||
7
.github/workflows/sync_on_push.yml
vendored
7
.github/workflows/sync_on_push.yml
vendored
@@ -13,14 +13,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
- name: Save repo
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
with:
|
||||
name: repo-archive
|
||||
path: '.'
|
||||
- name: git-sync
|
||||
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
|
||||
with:
|
||||
|
||||
164
.github/workflows/wasm_client_benchmark.yml
vendored
164
.github/workflows/wasm_client_benchmark.yml
vendored
@@ -1,32 +1,14 @@
|
||||
# Run WASM client benchmarks on an AWS instance and return parsed results to Slab CI bot.
|
||||
# Run WASM client benchmarks on an instance and return parsed results to Slab CI bot.
|
||||
name: WASM client benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
instance_id:
|
||||
description: "Instance ID"
|
||||
type: string
|
||||
instance_image_id:
|
||||
description: "Instance AMI ID"
|
||||
type: string
|
||||
instance_type:
|
||||
description: "Instance product type"
|
||||
type: string
|
||||
runner_name:
|
||||
description: "Action runner name"
|
||||
type: string
|
||||
request_id:
|
||||
description: "Slab request ID"
|
||||
type: string
|
||||
# This input is not used in this workflow but still mandatory since a calling workflow could
|
||||
# use it. If a triggering command include a user_inputs field, then the triggered workflow
|
||||
# must include this very input, otherwise the workflow won't be called.
|
||||
# See start_full_benchmarks.yml as example.
|
||||
user_inputs:
|
||||
description: "Type of benchmarks to run"
|
||||
type: string
|
||||
default: "weekly_benchmarks"
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 1a.m.
|
||||
- cron: '0 1 * * 6'
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -34,56 +16,106 @@ env:
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
run-wasm-client-benchmarks:
|
||||
name: Execute WASM client benchmarks in EC2
|
||||
runs-on: ${{ github.event.inputs.runner_name }}
|
||||
if: ${{ !cancelled() }}
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
|
||||
steps:
|
||||
- name: Instance configuration used
|
||||
run: |
|
||||
echo "IDs: ${{ inputs.instance_id }}"
|
||||
echo "AMI: ${{ inputs.instance_image_id }}"
|
||||
echo "Type: ${{ inputs.instance_type }}"
|
||||
echo "Request ID: ${{ inputs.request_id }}"
|
||||
|
||||
- name: Get benchmark date
|
||||
run: |
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
wasm_bench:
|
||||
- tfhe/Cargo.toml
|
||||
- concrete-csprng/**
|
||||
- tfhe-zk-pok/**
|
||||
- tfhe/src/**
|
||||
- '!tfhe/src/c_api/**'
|
||||
- tfhe/web_wasm_parallel_tests/**
|
||||
- .github/workflows/wasm_client_benchmark.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (wasm-client-benchmarks)
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs' && needs.should-run.outputs.wasm_bench)
|
||||
needs: should-run
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-small
|
||||
|
||||
wasm-client-benchmarks:
|
||||
name: Execute WASM client benchmarks
|
||||
needs: setup-instance
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@bb45937a053e097f8591208d8e74c90db1873d07
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make install_node
|
||||
make ci_bench_web_js_api_parallel
|
||||
make bench_web_js_api_parallel_ci
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
make parse_wasm_benchmarks
|
||||
|
||||
COMMIT_DATE="$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})"
|
||||
COMMIT_HASH="$(git describe --tags --dirty)"
|
||||
python3 ./ci/benchmark_parser.py tfhe/wasm_pk_gen.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware ${{ inputs.instance_type }} \
|
||||
--project-version "${COMMIT_HASH}" \
|
||||
--hardware "m6i.4xlarge" \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${COMMIT_DATE}" \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--key-gen
|
||||
|
||||
@@ -98,13 +130,13 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -130,8 +162,28 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_MESSAGE: "WASM benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (wasm-client-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, wasm-client-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (wasm-client-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
197
.github/workflows/zk_pke_benchmark.yml
vendored
Normal file
197
.github/workflows/zk_pke_benchmark.yml
vendored
Normal file
@@ -0,0 +1,197 @@
|
||||
# Run PKE Zero-Knowledge benchmarks on an instance and return parsed results to Slab CI bot.
|
||||
name: PKE ZK benchmarks
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
push:
|
||||
branches:
|
||||
- main
|
||||
schedule:
|
||||
# Weekly benchmarks will be triggered each Saturday at 3a.m.
|
||||
- cron: '0 3 * * 6'
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
RESULTS_FILENAME: parsed_benchmark_results_${{ github.sha }}.json
|
||||
PARSE_INTEGER_BENCH_CSV_FILE: tfhe_rs_integer_benches_${{ github.sha }}.csv
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
((github.event_name == 'push' || github.event_name == 'schedule') && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
zk_pok_changed: ${{ steps.changed-files.outputs.zk_pok_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c65cd883420fd2eb864698a825fc4162dd94482c
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
zk_pok:
|
||||
- tfhe/Cargo.toml
|
||||
- concrete-csprng/**
|
||||
- tfhe-zk-pok/**
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- tfhe/src/zk.rs
|
||||
- tfhe/benches/integer/zk_pke.rs
|
||||
- .github/workflows/zk_pke_benchmark.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (pke-zk-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' &&
|
||||
github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.should-run.outputs.zk_pok_changed == 'true')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: bench
|
||||
|
||||
pke-zk-benchmarks:
|
||||
name: Execute PKE ZK benchmarks
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
needs: setup-instance
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
echo "BENCH_DATE=$(date --iso-8601=seconds)";
|
||||
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
|
||||
echo "COMMIT_HASH=$(git describe --tags --dirty)";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set up home
|
||||
# "Install rust" step require root user to have a HOME directory which is not set.
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_integer_zk
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py target/criterion ${{ env.RESULTS_FILENAME }} \
|
||||
--database tfhe_rs \
|
||||
--hardware "hpc7a.96xlarge" \
|
||||
--backend cpu \
|
||||
--project-version "${{ env.COMMIT_HASH }}" \
|
||||
--branch ${{ github.ref_name }} \
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
|
||||
- name: Parse CRS sizes results
|
||||
run: |
|
||||
python3 ./ci/benchmark_parser.py tfhe/pke_zk_crs_sizes.csv ${{ env.RESULTS_FILENAME }} \
|
||||
--key-sizes \
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_zk
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Computing HMac on results file"
|
||||
SIGNATURE="$(slab/scripts/hmac_calculator.sh ${{ env.RESULTS_FILENAME }} '${{ secrets.JOB_SECRET }}')"
|
||||
echo "Sending results to Slab..."
|
||||
curl -v -k \
|
||||
-H "Content-Type: application/json" \
|
||||
-H "X-Slab-Repository: ${{ github.repository }}" \
|
||||
-H "X-Slab-Command: store_data_v2" \
|
||||
-H "X-Hub-Signature-256: sha256=${SIGNATURE}" \
|
||||
-d @${{ env.RESULTS_FILENAME }} \
|
||||
${{ secrets.SLAB_URL }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ !success() && !cancelled() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "PKE ZK benchmarks finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (pke-zk-benchmarks)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, pke-zk-benchmarks ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@447a2d0fd2d1a9d647aa0d0723a6e9255372f261
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@4e5fb42d249be6a45a298f3c9543b111b02f7907
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (pke-zk-benchmarks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
7
.gitignore
vendored
7
.gitignore
vendored
@@ -7,6 +7,7 @@ target/
|
||||
# In case of symlinked keys
|
||||
/keys
|
||||
|
||||
**/*.rmeta
|
||||
**/Cargo.lock
|
||||
**/*.bin
|
||||
|
||||
@@ -22,3 +23,9 @@ dieharder_run.log
|
||||
|
||||
# Cuda local build
|
||||
backends/tfhe-cuda-backend/cuda/cmake-build-debug/
|
||||
|
||||
# WASM tests
|
||||
tfhe/web_wasm_parallel_tests/server.PID
|
||||
|
||||
# Dir used for backward compatibility test data
|
||||
tfhe/tfhe-backward-compat-data/
|
||||
|
||||
@@ -7,6 +7,14 @@ members = [
|
||||
"apps/trivium",
|
||||
"concrete-csprng",
|
||||
"backends/tfhe-cuda-backend",
|
||||
"utils/tfhe-versionable",
|
||||
"utils/tfhe-versionable-derive",
|
||||
]
|
||||
|
||||
exclude = [
|
||||
"tfhe/backward_compatibility_tests",
|
||||
"utils/cargo-tfhe-lints-inner",
|
||||
"utils/cargo-tfhe-lints"
|
||||
]
|
||||
|
||||
[profile.bench]
|
||||
|
||||
244
Makefile
244
Makefile
@@ -16,19 +16,15 @@ GEN_KEY_CACHE_COVERAGE_ONLY?=FALSE
|
||||
PARSE_INTEGER_BENCH_CSV_FILE?=tfhe_rs_integer_benches.csv
|
||||
FAST_TESTS?=FALSE
|
||||
FAST_BENCH?=FALSE
|
||||
NIGHTLY_TESTS?=FALSE
|
||||
BENCH_OP_FLAVOR?=DEFAULT
|
||||
NODE_VERSION=20
|
||||
NODE_VERSION=22.4
|
||||
FORWARD_COMPAT?=OFF
|
||||
# sed: -n, do not print input stream, -e means a script/expression
|
||||
# 1,/version/ indicates from the first line, to the line matching version at the start of the line
|
||||
# p indicates to print, so we keep only the start of the Cargo.toml until we hit the first version
|
||||
# entry which should be the version of tfhe
|
||||
TFHE_CURRENT_VERSION:=\
|
||||
$(shell sed -n -e '1,/^version/p' tfhe/Cargo.toml | \
|
||||
grep '^version[[:space:]]*=' | cut -d '=' -f 2 | xargs)
|
||||
# Cargo has a hard time distinguishing between our package from the workspace and a package that
|
||||
# could be a dependency, so we build an unambiguous spec here
|
||||
TFHE_SPEC:=tfhe@$(TFHE_CURRENT_VERSION)
|
||||
BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
|
||||
BACKWARD_COMPAT_DATA_BRANCH?=v0.1
|
||||
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
|
||||
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
|
||||
TFHE_SPEC:=tfhe
|
||||
# This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
|
||||
# copy paste the command in the terminal and change them if required without forgetting the flags
|
||||
export RUSTFLAGS?=-C target-cpu=native
|
||||
@@ -115,7 +111,7 @@ install_cargo_nextest: install_rs_build_toolchain
|
||||
.PHONY: install_wasm_pack # Install wasm-pack to build JS packages
|
||||
install_wasm_pack: install_rs_build_toolchain
|
||||
@wasm-pack --version > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install wasm-pack || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.0 || \
|
||||
( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_node # Install last version of NodeJS via nvm
|
||||
@@ -145,6 +141,11 @@ install_tarpaulin: install_rs_build_toolchain
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
|
||||
( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_tfhe_lints # Install custom tfhe-rs lints
|
||||
install_tfhe_lints:
|
||||
(cd utils/cargo-tfhe-lints-inner && cargo install --path .) && \
|
||||
cd utils/cargo-tfhe-lints && cargo install --path .
|
||||
|
||||
.PHONY: check_linelint_installed # Check if linelint newline linter is installed
|
||||
check_linelint_installed:
|
||||
@printf "\n" | linelint - > /dev/null 2>&1 || \
|
||||
@@ -160,6 +161,12 @@ check_nvm_installed:
|
||||
@source ~/.nvm/nvm.sh && nvm --version > /dev/null 2>&1 || \
|
||||
( echo "Unable to locate Node. Run 'make install_node'" && exit 1 )
|
||||
|
||||
.PHONY: install_mlc # Install mlc (Markup Link Checker)
|
||||
install_mlc: install_rs_build_toolchain
|
||||
@mlc --version > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install mlc --locked || \
|
||||
( echo "Unable to install mlc, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: fmt # Format rust code
|
||||
fmt: install_rs_check_toolchain
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
|
||||
@@ -258,6 +265,17 @@ clippy: install_rs_check_toolchain
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
|
||||
clippy_rustdoc: install_rs_check_toolchain
|
||||
if [[ "$(OS)" != "Linux" && "$(OS)" != "Darwin" ]]; then \
|
||||
echo "WARNING: skipped clippy_rustdoc, unsupported OS $(OS)"; \
|
||||
exit 0; \
|
||||
fi && \
|
||||
CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok,pbs-stats \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
|
||||
clippy_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
@@ -267,7 +285,7 @@ clippy_c_api: install_rs_check_toolchain
|
||||
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
|
||||
clippy_js_wasm_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api \
|
||||
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
|
||||
@@ -283,7 +301,7 @@ clippy_trivium: install_rs_check_toolchain
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok-experimental \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_concrete_csprng # Run clippy lints on concrete-csprng
|
||||
@@ -298,18 +316,23 @@ clippy_zk_pok: install_rs_check_toolchain
|
||||
-p tfhe-zk-pok -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_all # Run all clippy targets
|
||||
clippy_all: clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets clippy_c_api \
|
||||
clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_zk_pok clippy_trivium
|
||||
clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
|
||||
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_concrete_csprng clippy_zk_pok clippy_trivium
|
||||
|
||||
.PHONY: clippy_fast # Run main clippy targets
|
||||
clippy_fast: clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core \
|
||||
clippy_concrete_csprng
|
||||
clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
|
||||
clippy_core clippy_concrete_csprng
|
||||
|
||||
.PHONY: clippy_cuda_backend # Run clippy lints on the tfhe-cuda-backend
|
||||
clippy_cuda_backend: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
-p tfhe-cuda-backend -- --no-deps -D warnings
|
||||
|
||||
.PHONY: tfhe_lints # Run custom tfhe-rs lints
|
||||
tfhe_lints: install_tfhe_lints
|
||||
cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -- -D warnings
|
||||
|
||||
.PHONY: build_core # Build core_crypto without experimental features
|
||||
build_core: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
@@ -362,21 +385,21 @@ symlink_c_libs_without_fingerprint:
|
||||
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
|
||||
build_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok-experimental,$(FORWARD_COMPAT_FEATURE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,$(FORWARD_COMPAT_FEATURE) \
|
||||
-p $(TFHE_SPEC)
|
||||
@"$(MAKE)" symlink_c_libs_without_fingerprint
|
||||
|
||||
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
|
||||
build_c_api_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok-experimental,gpu \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
|
||||
-p $(TFHE_SPEC)
|
||||
@"$(MAKE)" symlink_c_libs_without_fingerprint
|
||||
|
||||
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
|
||||
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok-experimental,experimental-force_fft_algo_dif4,$(FORWARD_COMPAT_FEATURE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4,$(FORWARD_COMPAT_FEATURE) \
|
||||
-p $(TFHE_SPEC)
|
||||
@"$(MAKE)" symlink_c_libs_without_fingerprint
|
||||
|
||||
@@ -385,7 +408,7 @@ build_web_js_api: install_rs_build_toolchain install_wasm_pack
|
||||
cd tfhe && \
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
|
||||
wasm-pack build --release --target=web \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok-experimental
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok
|
||||
|
||||
.PHONY: build_web_js_api_parallel # Build the js API targeting the web browser with parallelism support
|
||||
build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
|
||||
@@ -393,15 +416,16 @@ build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
|
||||
rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
|
||||
wasm-pack build --release --target=web \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok-experimental \
|
||||
-Z build-std=panic_abort,std
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok \
|
||||
-Z build-std=panic_abort,std && \
|
||||
find pkg/snippets -type f -iname workerHelpers.worker.js -exec sed -i "s|from '..\/..\/..\/';|from '..\/..\/..\/tfhe.js';|" {} \;
|
||||
|
||||
.PHONY: build_node_js_api # Build the js API targeting nodejs
|
||||
build_node_js_api: install_rs_build_toolchain install_wasm_pack
|
||||
cd tfhe && \
|
||||
RUSTFLAGS="$(WASM_RUSTFLAGS)" rustup run "$(RS_BUILD_TOOLCHAIN)" \
|
||||
wasm-pack build --release --target=nodejs \
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok-experimental
|
||||
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,zk-pok
|
||||
|
||||
.PHONY: build_concrete_csprng # Build concrete_csprng
|
||||
build_concrete_csprng: install_rs_build_toolchain
|
||||
@@ -411,10 +435,10 @@ build_concrete_csprng: install_rs_build_toolchain
|
||||
.PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
|
||||
test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok-experimental -p $(TFHE_SPEC) -- core_crypto::
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok-experimental,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- core_crypto::; \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok,$(AVX512_FEATURE) -p $(TFHE_SPEC) -- core_crypto::; \
|
||||
fi
|
||||
|
||||
.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
|
||||
@@ -437,8 +461,8 @@ test_cuda_backend:
|
||||
mkdir -p "$(TFHECUDA_BUILD)" && \
|
||||
cd "$(TFHECUDA_BUILD)" && \
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Release -DTFHE_CUDA_BACKEND_BUILD_TESTS=ON && \
|
||||
make -j "$(CPU_COUNT)" && \
|
||||
make test
|
||||
"$(MAKE)" -j "$(CPU_COUNT)" && \
|
||||
"$(MAKE)" test
|
||||
|
||||
.PHONY: test_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
|
||||
test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
|
||||
@@ -453,10 +477,64 @@ test_core_crypto_gpu: install_rs_build_toolchain
|
||||
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
|
||||
.PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
|
||||
test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_unsigned_integer_gpu_ci # Run the tests for unsigned integer ci on gpu backend
|
||||
test_unsigned_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
|
||||
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_signed_integer_gpu_ci # Run the tests for signed integer ci on gpu backend
|
||||
test_signed_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_multi_bit_gpu_ci # Run the tests for integer ci on gpu backend running only multibit tests
|
||||
test_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_unsigned_integer_multi_bit_gpu_ci # Run the tests for unsigned integer ci on gpu backend running only multibit tests
|
||||
test_unsigned_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
|
||||
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_signed_integer_multi_bit_gpu_ci # Run the tests for signed integer ci on gpu backend running only multibit tests
|
||||
test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_boolean # Run the tests of the boolean module
|
||||
test_boolean: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -519,6 +597,7 @@ test_shortint_cov: install_rs_check_toolchain install_tarpaulin
|
||||
test_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
@@ -527,6 +606,7 @@ test_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
|
||||
@@ -535,6 +615,7 @@ test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
@@ -543,22 +624,25 @@ test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
|
||||
.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for unsigned integer ci running only multibit tests
|
||||
test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_signed_integer_multi_bit_ci # Run the tests for nsigned integer ci running only multibit tests
|
||||
.PHONY: test_signed_integer_multi_bit_ci # Run the tests for signed integer ci running only multibit tests
|
||||
test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
FAST_TESTS="$(FAST_TESTS)" \
|
||||
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
@@ -585,7 +669,7 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
|
||||
.PHONY: test_high_level_api # Run all the tests for high_level_api
|
||||
test_high_level_api: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok-experimental -p $(TFHE_SPEC) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
|
||||
-- high_level_api::
|
||||
|
||||
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
@@ -596,14 +680,14 @@ test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
.PHONY: test_user_doc # Run tests from the .md documentation
|
||||
test_user_doc: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok-experimental \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
-p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
|
||||
test_user_doc_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok-experimental -p $(TFHE_SPEC) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_fhe_strings # Run tests for fhe_strings example
|
||||
@@ -642,18 +726,38 @@ test_concrete_csprng: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE) -p concrete-csprng
|
||||
|
||||
.PHONY: test_zk_pok # Run tfhe-zk-pok-experimental tests
|
||||
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
|
||||
test_zk_pok: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-zk-pok
|
||||
|
||||
.PHONY: test_versionable # Run tests for tfhe-versionable subcrate
|
||||
test_versionable: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-versionable
|
||||
|
||||
# The backward compat data repo holds historical binary data but also rust code to generate and load them.
|
||||
# Here we use the "patch" functionality of Cargo to make sure the repo used for the data is the same as the one used for the code.
|
||||
.PHONY: test_backward_compatibility_ci
|
||||
test_backward_compatibility_ci: install_rs_build_toolchain
|
||||
TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,integer -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
|
||||
|
||||
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
|
||||
test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
|
||||
|
||||
.PHONY: backward_compat_branch # Prints the required backward compatibility branch
|
||||
backward_compat_branch:
|
||||
@echo "$(BACKWARD_COMPAT_DATA_BRANCH)"
|
||||
|
||||
.PHONY: doc # Build rust doc
|
||||
doc: install_rs_check_toolchain
|
||||
@# Even though we are not in docs.rs, this allows to "just" build the doc
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental --no-deps -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: docs # Build rust doc alias for doc
|
||||
docs: doc
|
||||
@@ -664,7 +768,7 @@ lint_doc: install_rs_check_toolchain
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental -p $(TFHE_SPEC) --no-deps
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
|
||||
|
||||
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
|
||||
lint_docs: lint_doc
|
||||
@@ -682,6 +786,14 @@ format_doc_latex:
|
||||
check_md_docs_are_tested:
|
||||
RUSTFLAGS="" cargo xtask check_tfhe_docs_are_tested
|
||||
|
||||
.PHONY: check_intra_md_links # Checks broken internal links in Markdown docs
|
||||
check_intra_md_links: install_mlc
|
||||
mlc --offline --match-file-extension tfhe/docs
|
||||
|
||||
.PHONY: check_md_links # Checks all broken links in Markdown docs
|
||||
check_md_links: install_mlc
|
||||
mlc --match-file-extension tfhe/docs
|
||||
|
||||
.PHONY: check_compile_tests # Build tests in debug without running them
|
||||
check_compile_tests: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
@@ -701,7 +813,7 @@ check_compile_tests_benches_gpu: install_rs_build_toolchain
|
||||
mkdir -p "$(TFHECUDA_BUILD)" && \
|
||||
cd "$(TFHECUDA_BUILD)" && \
|
||||
cmake .. -DCMAKE_BUILD_TYPE=Debug -DTFHE_CUDA_BACKEND_BUILD_TESTS=ON -DTFHE_CUDA_BACKEND_BUILD_BENCHMARKS=ON && \
|
||||
make -j "$(CPU_COUNT)"
|
||||
"$(MAKE)" -j "$(CPU_COUNT)"
|
||||
|
||||
.PHONY: build_nodejs_test_docker # Build a docker image with tools to run nodejs tests for wasm API
|
||||
build_nodejs_test_docker:
|
||||
@@ -721,14 +833,14 @@ test_nodejs_wasm_api_in_docker: build_nodejs_test_docker
|
||||
|
||||
.PHONY: test_nodejs_wasm_api # Run tests for the nodejs on wasm API
|
||||
test_nodejs_wasm_api: build_node_js_api
|
||||
cd tfhe && node --test js_on_wasm_tests
|
||||
cd tfhe/js_on_wasm_tests && npm run test
|
||||
|
||||
.PHONY: test_web_js_api_parallel # Run tests for the web wasm api
|
||||
test_web_js_api_parallel: build_web_js_api_parallel
|
||||
$(MAKE) -C tfhe/web_wasm_parallel_tests test
|
||||
|
||||
.PHONY: ci_test_web_js_api_parallel # Run tests for the web wasm api
|
||||
ci_test_web_js_api_parallel: build_web_js_api_parallel
|
||||
.PHONY: test_web_js_api_parallel_ci # Run tests for the web wasm api
|
||||
test_web_js_api_parallel_ci: build_web_js_api_parallel
|
||||
source ~/.nvm/nvm.sh && \
|
||||
nvm install $(NODE_VERSION) && \
|
||||
nvm use $(NODE_VERSION) && \
|
||||
@@ -795,6 +907,22 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
|
||||
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=MULTI_BIT \
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-pke-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
bench_shortint: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
@@ -802,16 +930,12 @@ bench_shortint: install_rs_check_toolchain
|
||||
--bench shortint-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_oprf # Run benchmarks for shortint
|
||||
bench_oprf: install_rs_check_toolchain
|
||||
.PHONY: bench_shortint_oprf # Run benchmarks for shortint
|
||||
bench_shortint_oprf: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench oprf-shortint-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench oprf-integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
|
||||
bench_shortint_multi_bit: install_rs_check_toolchain
|
||||
@@ -833,9 +957,15 @@ bench_pbs: install_rs_check_toolchain
|
||||
--bench pbs-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
|
||||
bench_pbs128: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs128-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
@@ -855,10 +985,10 @@ bench_ks_gpu: install_rs_check_toolchain
|
||||
bench_web_js_api_parallel: build_web_js_api_parallel
|
||||
$(MAKE) -C tfhe/web_wasm_parallel_tests bench
|
||||
|
||||
.PHONY: ci_bench_web_js_api_parallel # Run benchmarks for the web wasm api
|
||||
ci_bench_web_js_api_parallel: build_web_js_api_parallel
|
||||
.PHONY: bench_web_js_api_parallel_ci # Run benchmarks for the web wasm api
|
||||
bench_web_js_api_parallel_ci: build_web_js_api_parallel
|
||||
source ~/.nvm/nvm.sh && \
|
||||
nvm use node && \
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) -C tfhe/web_wasm_parallel_tests bench-ci
|
||||
|
||||
#
|
||||
@@ -915,6 +1045,12 @@ write_params_to_file: install_rs_check_toolchain
|
||||
--example write_params_to_file \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
|
||||
|
||||
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
|
||||
clone_backward_compat_data:
|
||||
./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tfhe/$(BACKWARD_COMPAT_DATA_DIR)
|
||||
|
||||
tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
|
||||
|
||||
#
|
||||
# Real use case examples
|
||||
#
|
||||
@@ -940,8 +1076,8 @@ sha256_bool: install_rs_check_toolchain
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean
|
||||
|
||||
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
|
||||
pcc: no_tfhe_typo no_dbg_log check_fmt lint_doc check_md_docs_are_tested clippy_all \
|
||||
check_compile_tests
|
||||
pcc: no_tfhe_typo no_dbg_log check_fmt lint_doc check_md_docs_are_tested check_intra_md_links \
|
||||
clippy_all tfhe_lints check_compile_tests
|
||||
|
||||
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
|
||||
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu
|
||||
|
||||
30
README.md
30
README.md
@@ -50,7 +50,7 @@ production-ready library for all the advanced features of TFHE.
|
||||
<br></br>
|
||||
|
||||
## Table of Contents
|
||||
- **[Getting Started](#getting-started)**
|
||||
- **[Getting started](#getting-started)**
|
||||
- [Cargo.toml configuration](#cargotoml-configuration)
|
||||
- [A simple example](#a-simple-example)
|
||||
- **[Resources](#resources)**
|
||||
@@ -65,7 +65,7 @@ production-ready library for all the advanced features of TFHE.
|
||||
- **[Support](#support)**
|
||||
<br></br>
|
||||
|
||||
## Getting Started
|
||||
## Getting started
|
||||
|
||||
### Cargo.toml configuration
|
||||
To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:
|
||||
@@ -198,7 +198,7 @@ Full, comprehensive documentation is available here: [https://docs.zama.ai/tfhe-
|
||||
|
||||
### Disclaimers
|
||||
|
||||
#### Security Estimation
|
||||
#### Security estimation
|
||||
|
||||
Security estimations are done using the
|
||||
[Lattice Estimator](https://github.com/malb/lattice-estimator)
|
||||
@@ -206,13 +206,13 @@ with `red_cost_model = reduction.RC.BDGL16`.
|
||||
|
||||
When a new update is published in the Lattice Estimator, we update parameters accordingly.
|
||||
|
||||
### Security Model
|
||||
### Security model
|
||||
|
||||
The default parameters for the TFHE-rs library are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at p_error = $2^{-40}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [1].
|
||||
The default parameters for the TFHE-rs library are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at p_error = $2^{-64}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [1].
|
||||
|
||||
[1] Li, Baiyu, et al. "Securing approximate homomorphic encryption using differential privacy." Annual International Cryptology Conference. Cham: Springer Nature Switzerland, 2022. https://eprint.iacr.org/2022/816.pdf
|
||||
|
||||
#### Side-Channel Attacks
|
||||
#### Side-channel attacks
|
||||
|
||||
Mitigation for side-channel attacks has not yet been implemented in TFHE-rs,
|
||||
and will be released in upcoming versions.
|
||||
@@ -241,7 +241,23 @@ Becoming an approved contributor involves signing our Contributor License Agreem
|
||||
<br></br>
|
||||
|
||||
### License
|
||||
This software is distributed under the **BSD-3-Clause-Clear** license. If you have any questions, please contact us at hello@zama.ai.
|
||||
This software is distributed under the **BSD-3-Clause-Clear** license. Read [this](LICENSE) for more details.
|
||||
|
||||
#### FAQ
|
||||
**Is Zama’s technology free to use?**
|
||||
>Zama’s libraries are free to use under the BSD 3-Clause Clear license only for development, research, prototyping, and experimentation purposes. However, for any commercial use of Zama's open source code, companies must purchase Zama’s commercial patent license.
|
||||
>
|
||||
>Everything we do is open source and we are very transparent on what it means for our users, you can read more about how we monetize our open source products at Zama in [this blogpost](https://www.zama.ai/post/open-source).
|
||||
|
||||
**What do I need to do if I want to use Zama’s technology for commercial purposes?**
|
||||
>To commercially use Zama’s technology you need to be granted Zama’s patent license. Please contact us hello@zama.ai for more information.
|
||||
|
||||
**Do you file IP on your technology?**
|
||||
>Yes, all Zama’s technologies are patented.
|
||||
|
||||
**Can you customize a solution for my specific use case?**
|
||||
>We are open to collaborating and advancing the FHE space with our partners. If you have specific needs, please email us at hello@zama.ai.
|
||||
|
||||
<p align="right">
|
||||
<a href="#about" > ↑ Back to top </a>
|
||||
</p>
|
||||
|
||||
@@ -4,9 +4,8 @@ use tfhe::{generate_keys, ConfigBuilder, FheUint64, FheUint8};
|
||||
use tfhe_trivium::{KreyviumStreamByte, TransCiphering};
|
||||
|
||||
pub fn kreyvium_byte_gen(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.enable_function_evaluation()
|
||||
.build();
|
||||
let config = ConfigBuilder::default().build();
|
||||
|
||||
let (client_key, server_key) = generate_keys(config);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -33,9 +32,8 @@ pub fn kreyvium_byte_gen(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
pub fn kreyvium_byte_trans(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.enable_function_evaluation()
|
||||
.build();
|
||||
let config = ConfigBuilder::default().build();
|
||||
|
||||
let (client_key, server_key) = generate_keys(config);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -63,9 +61,8 @@ pub fn kreyvium_byte_trans(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
pub fn kreyvium_byte_warmup(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.enable_function_evaluation()
|
||||
.build();
|
||||
let config = ConfigBuilder::default().build();
|
||||
|
||||
let (client_key, server_key) = generate_keys(config);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -13,7 +13,7 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
@@ -63,7 +63,7 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
@@ -108,7 +108,7 @@ pub fn kreyvium_shortint_trans(c: &mut Criterion) {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
@@ -13,7 +13,7 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
@@ -63,7 +63,7 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
@@ -108,7 +108,7 @@ pub fn trivium_shortint_trans(c: &mut Criterion) {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
@@ -119,7 +119,7 @@ impl KreyviumStreamByte<FheUint8> {
|
||||
}
|
||||
|
||||
// Key and iv are stored in reverse in their shift registers
|
||||
let mut key = key_bytes.map(|b| b.map(|x| (x as u8).reverse_bits() as u64));
|
||||
let mut key = key_bytes.map(|b| b.reverse_bits());
|
||||
let mut iv = iv_bytes.map(|x| FheUint8::encrypt_trivial(x.reverse_bits()));
|
||||
key.reverse();
|
||||
iv.reverse();
|
||||
|
||||
@@ -224,7 +224,7 @@ fn kreyvium_test_shortint_long() {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
@@ -299,9 +299,8 @@ fn kreyvium_test_clear_byte() {
|
||||
|
||||
#[test]
|
||||
fn kreyvium_test_byte_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.enable_function_evaluation()
|
||||
.build();
|
||||
let config = ConfigBuilder::default().build();
|
||||
|
||||
let (client_key, server_key) = generate_keys(config);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -338,9 +337,8 @@ fn kreyvium_test_byte_long() {
|
||||
|
||||
#[test]
|
||||
fn kreyvium_test_fhe_byte_transciphering_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.enable_function_evaluation()
|
||||
.build();
|
||||
let config = ConfigBuilder::default().build();
|
||||
|
||||
let (client_key, server_key) = generate_keys(config);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -360,7 +360,7 @@ fn trivium_test_shortint_long() {
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, &server_key),
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-cuda-backend"
|
||||
version = "0.2.0"
|
||||
version = "0.4.0-alpha.0"
|
||||
edition = "2021"
|
||||
authors = ["Zama team"]
|
||||
license = "BSD-3-Clause-Clear"
|
||||
@@ -14,6 +14,3 @@ keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
|
||||
[build-dependencies]
|
||||
cmake = { version = "0.1" }
|
||||
pkg-config = { version = "0.3" }
|
||||
|
||||
[dependencies]
|
||||
thiserror = "1.0"
|
||||
|
||||
@@ -8,7 +8,24 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
// This is a workaround to the current nightly toolchain (2024-06-27 which started with
|
||||
// toolchain 2024-05-05) build issue
|
||||
// Essentially if cbindgen is running, a wrong argument ends up forwarded to the cuda backend
|
||||
// "make" command during macro expansions for TFHE-rs C API, crashing make for make < 4.4 and
|
||||
// thus crashing the build
|
||||
// On the other hand, this speeds up C API build greatly given we don't have macro expansions
|
||||
// in the CUDA backend so this skips the second compilation of TFHE-rs for macro inspection by
|
||||
// cbindgen
|
||||
if std::env::var("_CBINDGEN_IS_RUNNING").is_ok() {
|
||||
return;
|
||||
}
|
||||
|
||||
println!("Build tfhe-cuda-backend");
|
||||
println!("cargo::rerun-if-changed=cuda/include");
|
||||
println!("cargo::rerun-if-changed=cuda/src");
|
||||
println!("cargo::rerun-if-changed=cuda/tests_and_benchmarks");
|
||||
println!("cargo::rerun-if-changed=cuda/CMakeLists.txt");
|
||||
println!("cargo::rerun-if-changed=src");
|
||||
if env::consts::OS == "linux" {
|
||||
let output = Command::new("./get_os_name.sh").output().unwrap();
|
||||
let distribution = String::from_utf8(output.stdout).unwrap();
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
|
||||
@@ -1,18 +1,25 @@
|
||||
#ifndef CUDA_CIPHERTEXT_H
|
||||
#define CUDA_CIPHERTEXT_H
|
||||
|
||||
#include "device.h"
|
||||
#include <cstdint>
|
||||
|
||||
extern "C" {
|
||||
void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
|
||||
void *v_stream,
|
||||
void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *stream,
|
||||
uint32_t gpu_index,
|
||||
void *dest, void *src,
|
||||
uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension);
|
||||
void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
|
||||
void *v_stream,
|
||||
void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream,
|
||||
uint32_t gpu_index,
|
||||
void *dest, void *src,
|
||||
uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension);
|
||||
|
||||
void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out, void *glwe_array_in,
|
||||
uint32_t *nth_array, uint32_t num_glwes,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size);
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <cuda_runtime.h>
|
||||
#include <vector>
|
||||
|
||||
#define synchronize_threads_in_block() __syncthreads()
|
||||
extern "C" {
|
||||
@@ -26,31 +27,15 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
|
||||
std::abort(); \
|
||||
}
|
||||
|
||||
struct cuda_stream_t {
|
||||
cudaStream_t stream;
|
||||
uint32_t gpu_index;
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index);
|
||||
|
||||
cuda_stream_t(uint32_t gpu_index) {
|
||||
this->gpu_index = gpu_index;
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
check_cuda_error(cudaStreamCreate(&stream));
|
||||
}
|
||||
|
||||
void release() {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamDestroy(stream));
|
||||
}
|
||||
|
||||
void synchronize() { check_cuda_error(cudaStreamSynchronize(stream)); }
|
||||
};
|
||||
|
||||
cuda_stream_t *cuda_create_stream(uint32_t gpu_index);
|
||||
|
||||
void cuda_destroy_stream(cuda_stream_t *stream);
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream);
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
@@ -59,16 +44,16 @@ bool cuda_check_support_cooperative_groups();
|
||||
bool cuda_check_support_thread_block_clusters();
|
||||
|
||||
void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
|
||||
cuda_stream_t *stream);
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
cuda_stream_t *stream);
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cuda_stream_t *stream);
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cuda_stream_t *stream);
|
||||
cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
int cuda_get_number_of_gpus();
|
||||
|
||||
@@ -76,20 +61,12 @@ void cuda_synchronize_device(uint32_t gpu_index);
|
||||
|
||||
void cuda_drop(void *ptr, uint32_t gpu_index);
|
||||
|
||||
void cuda_drop_async(void *ptr, cuda_stream_t *stream);
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
int cuda_get_max_shared_memory(uint32_t gpu_index);
|
||||
|
||||
void cuda_synchronize_stream(cuda_stream_t *stream);
|
||||
|
||||
void cuda_stream_add_callback(cuda_stream_t *stream,
|
||||
cudaStreamCallback_t callback, void *user_data);
|
||||
|
||||
void host_free_on_stream_callback(cudaStream_t stream, cudaError_t status,
|
||||
void *host_pointer);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
|
||||
Torus n);
|
||||
void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *d_array, Torus value, Torus n);
|
||||
#endif
|
||||
|
||||
34
backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
Normal file
34
backends/tfhe-cuda-backend/cuda/include/helper_multi_gpu.h
Normal file
@@ -0,0 +1,34 @@
|
||||
#ifndef HELPER_MULTI_GPU_H
|
||||
#define HELPER_MULTI_GPU_H
|
||||
#include <mutex>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
extern std::mutex m;
|
||||
extern bool p2p_enabled;
|
||||
|
||||
extern "C" {
|
||||
int cuda_setup_multi_gpu();
|
||||
}
|
||||
|
||||
// Define a variant type that can be either a vector or a single pointer
|
||||
template <typename Torus>
|
||||
using LweArrayVariant = std::variant<std::vector<Torus *>, Torus *>;
|
||||
|
||||
// Macro to define the visitor logic using std::holds_alternative for vectors
|
||||
#define GET_VARIANT_ELEMENT(variant, index) \
|
||||
[&] { \
|
||||
if (std::holds_alternative<std::vector<Torus *>>(variant)) { \
|
||||
return std::get<std::vector<Torus *>>(variant)[index]; \
|
||||
} else { \
|
||||
return std::get<Torus *>(variant); \
|
||||
} \
|
||||
}()
|
||||
|
||||
int get_active_gpu_count(int num_inputs, int gpu_count);
|
||||
|
||||
int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);
|
||||
|
||||
int get_gpu_offset(int total_num_inputs, int gpu_index, int gpu_count);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -6,16 +6,16 @@
|
||||
extern "C" {
|
||||
|
||||
void cuda_keyswitch_lwe_ciphertext_vector_32(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *ksk,
|
||||
uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lwe_array_in, void *lwe_input_indexes,
|
||||
void *ksk, uint32_t lwe_dimension_in, uint32_t lwe_dimension_out,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples);
|
||||
|
||||
void cuda_keyswitch_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *ksk,
|
||||
uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lwe_array_in, void *lwe_input_indexes,
|
||||
void *ksk, uint32_t lwe_dimension_in, uint32_t lwe_dimension_out,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples);
|
||||
}
|
||||
|
||||
#endif // CNCRT_KS_H_
|
||||
|
||||
@@ -7,42 +7,42 @@
|
||||
|
||||
extern "C" {
|
||||
|
||||
void cuda_negate_lwe_ciphertext_vector_32(cuda_stream_t *stream,
|
||||
void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_negate_lwe_ciphertext_vector_64(cuda_stream_t *stream,
|
||||
void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_32(cuda_stream_t *stream,
|
||||
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in_1,
|
||||
void *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_64(cuda_stream_t *stream,
|
||||
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out,
|
||||
void *lwe_array_in_1,
|
||||
void *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
|
||||
void *plaintext_array_in, uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
|
||||
void *plaintext_array_in, uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
|
||||
void *cleartext_array_in, uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out, void *lwe_array_in,
|
||||
void *cleartext_array_in, uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
@@ -8,111 +8,110 @@ enum PBS_TYPE { MULTI_BIT = 0, CLASSICAL = 1 };
|
||||
enum PBS_VARIANT { DEFAULT = 0, CG = 1, TBC = 2 };
|
||||
|
||||
extern "C" {
|
||||
void cuda_fourier_polynomial_mul(void *input1, void *input2, void *output,
|
||||
cuda_stream_t *stream,
|
||||
void cuda_fourier_polynomial_mul(cudaStream_t stream, uint32_t gpu_index,
|
||||
void *input1, void *input2, void *output,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t total_polynomials);
|
||||
|
||||
void cuda_convert_lwe_programmable_bootstrap_key_32(
|
||||
void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
|
||||
uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size);
|
||||
void *stream, uint32_t gpu_index, void *dest, void *src,
|
||||
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
|
||||
uint32_t polynomial_size);
|
||||
|
||||
void cuda_convert_lwe_programmable_bootstrap_key_64(
|
||||
void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
|
||||
uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size);
|
||||
void *stream, uint32_t gpu_index, void *dest, void *src,
|
||||
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
|
||||
uint32_t polynomial_size);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_amortized_32(
|
||||
cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t max_shared_memory, bool allocate_gpu_memory);
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_amortized_64(
|
||||
cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t max_shared_memory, bool allocate_gpu_memory);
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_programmable_bootstrap_amortized_lwe_ciphertext_vector_32(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lut_vector, void *lut_vector_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key,
|
||||
int8_t *pbs_buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples);
|
||||
|
||||
void cuda_programmable_bootstrap_amortized_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lut_vector, void *lut_vector_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key,
|
||||
int8_t *pbs_buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples);
|
||||
|
||||
void cleanup_cuda_programmable_bootstrap_amortized(cuda_stream_t *stream,
|
||||
void cleanup_cuda_programmable_bootstrap_amortized(void *stream,
|
||||
uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_32(
|
||||
cuda_stream_t *stream, int8_t **buffer, uint32_t glwe_dimension,
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory);
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
void scratch_cuda_programmable_bootstrap_64(
|
||||
cuda_stream_t *stream, int8_t **buffer, uint32_t glwe_dimension,
|
||||
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory);
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lut_vector, void *lut_vector_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples);
|
||||
|
||||
void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lut_vector, void *lut_vector_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples);
|
||||
|
||||
void cleanup_cuda_programmable_bootstrap(cuda_stream_t *stream,
|
||||
void cleanup_cuda_programmable_bootstrap(void *stream, uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
uint64_t get_buffer_size_programmable_bootstrap_amortized_64(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
|
||||
uint64_t get_buffer_size_programmable_bootstrap_64(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory);
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_full_sm_programmable_bootstrap_step_one(
|
||||
uint64_t get_buffer_size_full_sm_programmable_bootstrap_step_one(
|
||||
uint32_t polynomial_size) {
|
||||
return sizeof(Torus) * polynomial_size + // accumulator_rotated
|
||||
sizeof(double2) * polynomial_size / 2; // accumulator fft
|
||||
}
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_full_sm_programmable_bootstrap_step_two(
|
||||
uint64_t get_buffer_size_full_sm_programmable_bootstrap_step_two(
|
||||
uint32_t polynomial_size) {
|
||||
return sizeof(Torus) * polynomial_size + // accumulator
|
||||
sizeof(double2) * polynomial_size / 2; // accumulator fft
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
uint64_t
|
||||
get_buffer_size_partial_sm_programmable_bootstrap(uint32_t polynomial_size) {
|
||||
return sizeof(double2) * polynomial_size / 2; // accumulator fft
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
uint64_t
|
||||
get_buffer_size_full_sm_programmable_bootstrap_tbc(uint32_t polynomial_size) {
|
||||
return sizeof(Torus) * polynomial_size + // accumulator_rotated
|
||||
sizeof(Torus) * polynomial_size + // accumulator
|
||||
@@ -120,21 +119,19 @@ get_buffer_size_full_sm_programmable_bootstrap_tbc(uint32_t polynomial_size) {
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_partial_sm_programmable_bootstrap_tbc(
|
||||
uint64_t get_buffer_size_partial_sm_programmable_bootstrap_tbc(
|
||||
uint32_t polynomial_size) {
|
||||
return sizeof(double2) * polynomial_size / 2; // accumulator fft mask & body
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_sm_dsm_plus_tbc_classic_programmable_bootstrap(
|
||||
uint64_t get_buffer_size_sm_dsm_plus_tbc_classic_programmable_bootstrap(
|
||||
uint32_t polynomial_size) {
|
||||
return sizeof(double2) * polynomial_size / 2; // tbc
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
uint64_t
|
||||
get_buffer_size_full_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
|
||||
return sizeof(Torus) * polynomial_size + // accumulator_rotated
|
||||
sizeof(Torus) * polynomial_size + // accumulator
|
||||
@@ -142,15 +139,14 @@ get_buffer_size_full_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
uint64_t
|
||||
get_buffer_size_partial_sm_programmable_bootstrap_cg(uint32_t polynomial_size) {
|
||||
return sizeof(double2) * polynomial_size / 2; // accumulator fft mask & body
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ bool
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap(
|
||||
uint32_t polynomial_size, uint32_t max_shared_memory);
|
||||
bool supports_distributed_shared_memory_on_classic_programmable_bootstrap(
|
||||
uint32_t polynomial_size);
|
||||
|
||||
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer;
|
||||
|
||||
@@ -162,13 +158,14 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
|
||||
PBS_VARIANT pbs_variant;
|
||||
|
||||
pbs_buffer(cuda_stream_t *stream, uint32_t glwe_dimension,
|
||||
pbs_buffer(cudaStream_t stream, uint32_t gpu_index, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
this->pbs_variant = pbs_variant;
|
||||
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(stream->gpu_index);
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
|
||||
if (allocate_gpu_memory) {
|
||||
switch (pbs_variant) {
|
||||
@@ -200,17 +197,17 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
level_count * (glwe_dimension + 1);
|
||||
}
|
||||
// Otherwise, both kernels run all in shared memory
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream);
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream, gpu_index);
|
||||
|
||||
global_accumulator_fft = (double2 *)cuda_malloc_async(
|
||||
(glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
|
||||
(polynomial_size / 2) * sizeof(double2),
|
||||
stream);
|
||||
stream, gpu_index);
|
||||
|
||||
global_accumulator = (Torus *)cuda_malloc_async(
|
||||
(glwe_dimension + 1) * input_lwe_ciphertext_count *
|
||||
polynomial_size * sizeof(Torus),
|
||||
stream);
|
||||
stream, gpu_index);
|
||||
} break;
|
||||
case PBS_VARIANT::CG: {
|
||||
uint64_t full_sm =
|
||||
@@ -233,19 +230,19 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
}
|
||||
|
||||
// Otherwise, both kernels run all in shared memory
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream);
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream, gpu_index);
|
||||
|
||||
global_accumulator_fft = (double2 *)cuda_malloc_async(
|
||||
(glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
|
||||
polynomial_size / 2 * sizeof(double2),
|
||||
stream);
|
||||
stream, gpu_index);
|
||||
} break;
|
||||
#if CUDA_ARCH >= 900
|
||||
case PBS_VARIANT::TBC: {
|
||||
|
||||
bool supports_dsm =
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
|
||||
Torus>(polynomial_size, max_shared_memory);
|
||||
Torus>(polynomial_size);
|
||||
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_tbc<Torus>(
|
||||
@@ -280,12 +277,12 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
}
|
||||
|
||||
// Otherwise, both kernels run all in shared memory
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream);
|
||||
d_mem = (int8_t *)cuda_malloc_async(device_mem, stream, gpu_index);
|
||||
|
||||
global_accumulator_fft = (double2 *)cuda_malloc_async(
|
||||
(glwe_dimension + 1) * level_count * input_lwe_ciphertext_count *
|
||||
polynomial_size / 2 * sizeof(double2),
|
||||
stream);
|
||||
stream, gpu_index);
|
||||
} break;
|
||||
#endif
|
||||
default:
|
||||
@@ -294,20 +291,20 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
|
||||
}
|
||||
}
|
||||
|
||||
void release(cuda_stream_t *stream) {
|
||||
cuda_drop_async(d_mem, stream);
|
||||
cuda_drop_async(global_accumulator_fft, stream);
|
||||
void release(cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_drop_async(d_mem, stream, gpu_index);
|
||||
cuda_drop_async(global_accumulator_fft, stream, gpu_index);
|
||||
|
||||
if (pbs_variant == DEFAULT)
|
||||
cuda_drop_async(global_accumulator, stream);
|
||||
cuda_drop_async(global_accumulator, stream, gpu_index);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t get_buffer_size_programmable_bootstrap_cg(
|
||||
uint64_t get_buffer_size_programmable_bootstrap_cg(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory) {
|
||||
|
||||
uint32_t input_lwe_ciphertext_count) {
|
||||
int max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
uint64_t full_sm =
|
||||
get_buffer_size_full_sm_programmable_bootstrap_cg<Torus>(polynomial_size);
|
||||
uint64_t partial_sm =
|
||||
@@ -333,74 +330,72 @@ template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_cg(uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count,
|
||||
uint32_t num_samples,
|
||||
uint32_t max_shared_memory);
|
||||
uint32_t num_samples);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
|
||||
Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
|
||||
Torus *lwe_input_indexes, double2 *bootstrapping_key,
|
||||
void *stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus *lwe_output_indexes, Torus *lut_vector, Torus *lut_vector_indexes,
|
||||
Torus *lwe_array_in, Torus *lwe_input_indexes, double2 *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_luts,
|
||||
uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
uint32_t level_count, uint32_t num_samples);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
|
||||
Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
|
||||
Torus *lwe_input_indexes, double2 *bootstrapping_key,
|
||||
void *stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus *lwe_output_indexes, Torus *lut_vector, Torus *lut_vector_indexes,
|
||||
Torus *lwe_array_in, Torus *lwe_input_indexes, double2 *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_luts,
|
||||
uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
uint32_t level_count, uint32_t num_samples);
|
||||
|
||||
#if (CUDA_ARCH >= 900)
|
||||
template <typename Torus>
|
||||
void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
|
||||
Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
|
||||
Torus *lwe_input_indexes, double2 *bootstrapping_key,
|
||||
void *stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus *lwe_output_indexes, Torus *lut_vector, Torus *lut_vector_indexes,
|
||||
Torus *lwe_array_in, Torus *lwe_input_indexes, double2 *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_luts,
|
||||
uint32_t lwe_idx, uint32_t max_shared_memory);
|
||||
uint32_t level_count, uint32_t num_samples);
|
||||
|
||||
template <typename Torus, typename STorus>
|
||||
template <typename Torus>
|
||||
void scratch_cuda_programmable_bootstrap_tbc(
|
||||
cuda_stream_t *stream, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory);
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
#endif
|
||||
|
||||
template <typename Torus, typename STorus>
|
||||
template <typename Torus>
|
||||
void scratch_cuda_programmable_bootstrap_cg(
|
||||
cuda_stream_t *stream, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory);
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
template <typename Torus, typename STorus>
|
||||
template <typename Torus>
|
||||
void scratch_cuda_programmable_bootstrap(
|
||||
cuda_stream_t *stream, pbs_buffer<Torus, CLASSICAL> **buffer,
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, CLASSICAL> **buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory);
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_tbc(uint32_t num_samples,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count,
|
||||
uint32_t max_shared_memory);
|
||||
uint32_t level_count);
|
||||
|
||||
#ifdef __CUDACC__
|
||||
__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
|
||||
int glwe_dimension,
|
||||
uint32_t level_count);
|
||||
|
||||
template <typename T>
|
||||
__device__ const T *get_ith_mask_kth_block(const T *ptr, int i, int k,
|
||||
int level, uint32_t polynomial_size,
|
||||
int glwe_dimension,
|
||||
uint32_t level_count);
|
||||
|
||||
template <typename T>
|
||||
__device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level,
|
||||
uint32_t polynomial_size,
|
||||
@@ -412,8 +407,8 @@ __device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level,
|
||||
int glwe_dimension, uint32_t level_count);
|
||||
|
||||
template <typename T>
|
||||
__device__ T *get_multi_bit_ith_lwe_gth_group_kth_block(
|
||||
T *ptr, int g, int i, int k, int level, uint32_t grouping_factor,
|
||||
__device__ const T *get_multi_bit_ith_lwe_gth_group_kth_block(
|
||||
const T *ptr, int g, int i, int k, int level, uint32_t grouping_factor,
|
||||
uint32_t polynomial_size, uint32_t glwe_dimension, uint32_t level_count);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -8,153 +8,116 @@ extern "C" {
|
||||
|
||||
bool has_support_to_cuda_programmable_bootstrap_cg_multi_bit(
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t max_shared_memory);
|
||||
uint32_t num_samples);
|
||||
|
||||
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
|
||||
void *dest, void *src, cuda_stream_t *stream, uint32_t input_lwe_dim,
|
||||
uint32_t glwe_dim, uint32_t level_count, uint32_t polynomial_size,
|
||||
uint32_t grouping_factor);
|
||||
void *stream, uint32_t gpu_index, void *dest, void *src,
|
||||
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor);
|
||||
|
||||
void scratch_cuda_multi_bit_programmable_bootstrap_64(
|
||||
cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t max_shared_memory, bool allocate_gpu_memory,
|
||||
uint32_t chunk_size = 0);
|
||||
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t grouping_factor,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
|
||||
uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0);
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lut_vector, void *lut_vector_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples);
|
||||
|
||||
void scratch_cuda_generic_multi_bit_programmable_bootstrap_64(
|
||||
cuda_stream_t *stream, int8_t **pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t max_shared_memory, bool allocate_gpu_memory,
|
||||
uint32_t lwe_chunk_size = 0);
|
||||
|
||||
void cuda_generic_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lut_vector, void *lut_vector_indexes, void *lwe_array_in,
|
||||
void *lwe_input_indexes, void *bootstrapping_key, int8_t *pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t grouping_factor, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t num_luts, uint32_t lwe_idx,
|
||||
uint32_t max_shared_memory, uint32_t lwe_chunk_size = 0);
|
||||
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap(cuda_stream_t *stream,
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
|
||||
uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ bool
|
||||
supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size, uint32_t max_shared_memory);
|
||||
bool supports_distributed_shared_memory_on_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size);
|
||||
|
||||
template <typename Torus>
|
||||
bool has_support_to_cuda_programmable_bootstrap_tbc_multi_bit(
|
||||
uint32_t num_samples, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t max_shared_memory);
|
||||
uint32_t level_count);
|
||||
|
||||
#if CUDA_ARCH >= 900
|
||||
template <typename Torus, typename STorus>
|
||||
template <typename Torus>
|
||||
void scratch_cuda_tbc_multi_bit_programmable_bootstrap(
|
||||
cuda_stream_t *stream, pbs_buffer<Torus, MULTI_BIT> **buffer,
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t grouping_factor,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory, uint32_t lwe_chunk_size);
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
|
||||
Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
|
||||
Torus *lwe_input_indexes, Torus *bootstrapping_key,
|
||||
void *stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus *lwe_output_indexes, Torus *lut_vector, Torus *lut_vector_indexes,
|
||||
Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *bootstrapping_key,
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory,
|
||||
uint32_t lwe_chunk_size);
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples);
|
||||
#endif
|
||||
|
||||
template <typename Torus, typename STorus>
|
||||
template <typename Torus>
|
||||
void scratch_cuda_cg_multi_bit_programmable_bootstrap(
|
||||
cuda_stream_t *stream, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t grouping_factor,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0);
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
|
||||
Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
|
||||
Torus *lwe_input_indexes, Torus *bootstrapping_key,
|
||||
void *stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus *lwe_output_indexes, Torus *lut_vector, Torus *lut_vector_indexes,
|
||||
Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *bootstrapping_key,
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory,
|
||||
uint32_t lwe_chunk_size = 0);
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples);
|
||||
|
||||
template <typename Torus, typename STorus>
|
||||
template <typename Torus>
|
||||
void scratch_cuda_multi_bit_programmable_bootstrap(
|
||||
cuda_stream_t *stream, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
|
||||
void *stream, uint32_t gpu_index, pbs_buffer<Torus, MULTI_BIT> **pbs_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t level_count, uint32_t grouping_factor,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t max_shared_memory,
|
||||
bool allocate_gpu_memory, uint32_t lwe_chunk_size = 0);
|
||||
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
|
||||
Torus *lut_vector, Torus *lut_vector_indexes, Torus *lwe_array_in,
|
||||
Torus *lwe_input_indexes, Torus *bootstrapping_key,
|
||||
void *stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus *lwe_output_indexes, Torus *lut_vector, Torus *lut_vector_indexes,
|
||||
Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *bootstrapping_key,
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t num_luts, uint32_t lwe_idx, uint32_t max_shared_memory,
|
||||
uint32_t lwe_chunk_size = 0);
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples);
|
||||
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle(
|
||||
uint64_t get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_full_sm_multibit_programmable_bootstrap_step_one(
|
||||
uint64_t get_buffer_size_full_sm_multibit_programmable_bootstrap_step_one(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_full_sm_multibit_programmable_bootstrap_step_two(
|
||||
uint64_t get_buffer_size_full_sm_multibit_programmable_bootstrap_step_two(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_partial_sm_multibit_programmable_bootstrap_step_one(
|
||||
uint64_t get_buffer_size_partial_sm_multibit_programmable_bootstrap_step_one(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_full_sm_cg_multibit_programmable_bootstrap(
|
||||
uint64_t get_buffer_size_full_sm_cg_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_partial_sm_cg_multibit_programmable_bootstrap(
|
||||
uint64_t get_buffer_size_partial_sm_cg_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_sm_dsm_plus_tbc_multibit_programmable_bootstrap(
|
||||
uint64_t get_buffer_size_sm_dsm_plus_tbc_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_partial_sm_tbc_multibit_programmable_bootstrap(
|
||||
uint64_t get_buffer_size_partial_sm_tbc_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size);
|
||||
template <typename Torus>
|
||||
__host__ __device__ uint64_t
|
||||
get_buffer_size_full_sm_tbc_multibit_programmable_bootstrap(
|
||||
uint64_t get_buffer_size_full_sm_tbc_multibit_programmable_bootstrap(
|
||||
uint32_t polynomial_size);
|
||||
|
||||
template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
|
||||
@@ -170,12 +133,12 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
|
||||
|
||||
PBS_VARIANT pbs_variant;
|
||||
|
||||
pbs_buffer(cuda_stream_t *stream, uint32_t glwe_dimension,
|
||||
pbs_buffer(cudaStream_t stream, uint32_t gpu_index, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
|
||||
PBS_VARIANT pbs_variant, bool allocate_gpu_memory) {
|
||||
this->pbs_variant = pbs_variant;
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(stream->gpu_index);
|
||||
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
|
||||
|
||||
// default
|
||||
uint64_t full_sm_keybundle =
|
||||
@@ -225,31 +188,34 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
|
||||
// Keybundle
|
||||
if (max_shared_memory < full_sm_keybundle)
|
||||
d_mem_keybundle = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_keybundle * full_sm_keybundle, stream);
|
||||
num_blocks_keybundle * full_sm_keybundle, stream, gpu_index);
|
||||
|
||||
switch (pbs_variant) {
|
||||
case PBS_VARIANT::CG:
|
||||
// Accumulator CG
|
||||
if (max_shared_memory < partial_sm_cg_accumulate)
|
||||
d_mem_acc_cg = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_acc_cg * full_sm_cg_accumulate, stream);
|
||||
num_blocks_acc_cg * full_sm_cg_accumulate, stream, gpu_index);
|
||||
else if (max_shared_memory < full_sm_cg_accumulate)
|
||||
d_mem_acc_cg = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_acc_cg * partial_sm_cg_accumulate, stream);
|
||||
num_blocks_acc_cg * partial_sm_cg_accumulate, stream, gpu_index);
|
||||
break;
|
||||
case PBS_VARIANT::DEFAULT:
|
||||
// Accumulator step one
|
||||
if (max_shared_memory < partial_sm_accumulate_step_one)
|
||||
d_mem_acc_step_one = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_acc_step_one * full_sm_accumulate_step_one, stream);
|
||||
num_blocks_acc_step_one * full_sm_accumulate_step_one, stream,
|
||||
gpu_index);
|
||||
else if (max_shared_memory < full_sm_accumulate_step_one)
|
||||
d_mem_acc_step_one = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_acc_step_one * partial_sm_accumulate_step_one, stream);
|
||||
num_blocks_acc_step_one * partial_sm_accumulate_step_one, stream,
|
||||
gpu_index);
|
||||
|
||||
// Accumulator step two
|
||||
if (max_shared_memory < full_sm_accumulate_step_two)
|
||||
d_mem_acc_step_two = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_acc_step_two * full_sm_accumulate_step_two, stream);
|
||||
num_blocks_acc_step_two * full_sm_accumulate_step_two, stream,
|
||||
gpu_index);
|
||||
break;
|
||||
#if CUDA_ARCH >= 900
|
||||
case TBC:
|
||||
@@ -266,10 +232,11 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
|
||||
// Accumulator TBC
|
||||
if (max_shared_memory < partial_sm_tbc_accumulate + minimum_sm_tbc)
|
||||
d_mem_acc_tbc = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_acc_tbc * full_sm_tbc_accumulate, stream);
|
||||
num_blocks_acc_tbc * full_sm_tbc_accumulate, stream, gpu_index);
|
||||
else if (max_shared_memory < full_sm_tbc_accumulate + minimum_sm_tbc)
|
||||
d_mem_acc_tbc = (int8_t *)cuda_malloc_async(
|
||||
num_blocks_acc_tbc * partial_sm_tbc_accumulate, stream);
|
||||
num_blocks_acc_tbc * partial_sm_tbc_accumulate, stream,
|
||||
gpu_index);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@@ -278,49 +245,49 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
|
||||
|
||||
keybundle_fft = (double2 *)cuda_malloc_async(
|
||||
num_blocks_keybundle * (polynomial_size / 2) * sizeof(double2),
|
||||
stream);
|
||||
stream, gpu_index);
|
||||
global_accumulator = (Torus *)cuda_malloc_async(
|
||||
num_blocks_acc_step_one * polynomial_size * sizeof(Torus), stream);
|
||||
num_blocks_acc_step_one * polynomial_size * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
global_accumulator_fft = (double2 *)cuda_malloc_async(
|
||||
num_blocks_acc_step_one * (polynomial_size / 2) * sizeof(double2),
|
||||
stream);
|
||||
stream, gpu_index);
|
||||
}
|
||||
}
|
||||
|
||||
void release(cuda_stream_t *stream) {
|
||||
void release(cudaStream_t stream, uint32_t gpu_index) {
|
||||
|
||||
if (d_mem_keybundle)
|
||||
cuda_drop_async(d_mem_keybundle, stream);
|
||||
cuda_drop_async(d_mem_keybundle, stream, gpu_index);
|
||||
switch (pbs_variant) {
|
||||
case DEFAULT:
|
||||
if (d_mem_acc_step_one)
|
||||
cuda_drop_async(d_mem_acc_step_one, stream);
|
||||
cuda_drop_async(d_mem_acc_step_one, stream, gpu_index);
|
||||
if (d_mem_acc_step_two)
|
||||
cuda_drop_async(d_mem_acc_step_two, stream);
|
||||
cuda_drop_async(d_mem_acc_step_two, stream, gpu_index);
|
||||
break;
|
||||
case CG:
|
||||
if (d_mem_acc_cg)
|
||||
cuda_drop_async(d_mem_acc_cg, stream);
|
||||
cuda_drop_async(d_mem_acc_cg, stream, gpu_index);
|
||||
break;
|
||||
#if CUDA_ARCH >= 900
|
||||
case TBC:
|
||||
if (d_mem_acc_tbc)
|
||||
cuda_drop_async(d_mem_acc_tbc, stream);
|
||||
cuda_drop_async(d_mem_acc_tbc, stream, gpu_index);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
PANIC("Cuda error (PBS): unsupported implementation variant.")
|
||||
}
|
||||
|
||||
cuda_drop_async(keybundle_fft, stream);
|
||||
cuda_drop_async(global_accumulator, stream);
|
||||
cuda_drop_async(global_accumulator_fft, stream);
|
||||
cuda_drop_async(keybundle_fft, stream, gpu_index);
|
||||
cuda_drop_async(global_accumulator, stream, gpu_index);
|
||||
cuda_drop_async(global_accumulator_fft, stream, gpu_index);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus, class params>
|
||||
__host__ uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t max_shared_memory);
|
||||
uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
|
||||
uint32_t polynomial_size);
|
||||
|
||||
#endif // CUDA_MULTI_BIT_H
|
||||
|
||||
@@ -10,7 +10,8 @@ set(SOURCES
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/keyswitch.h
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/linear_algebra.h
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/shifts.h
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/vertical_packing.h)
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/vertical_packing.h
|
||||
${CMAKE_SOURCE_DIR}/${INCLUDE_DIR}/helper_multi_gpu.h)
|
||||
file(GLOB_RECURSE SOURCES "*.cu")
|
||||
add_library(tfhe_cuda_backend STATIC ${SOURCES})
|
||||
set_target_properties(tfhe_cuda_backend PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
|
||||
|
||||
@@ -1 +1,77 @@
|
||||
#include "ciphertext.cuh"
|
||||
#include "polynomial/parameters.cuh"
|
||||
|
||||
void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *stream,
|
||||
uint32_t gpu_index,
|
||||
void *dest, void *src,
|
||||
uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_convert_lwe_ciphertext_vector_to_gpu<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)dest,
|
||||
(uint64_t *)src, number_of_cts, lwe_dimension);
|
||||
}
|
||||
|
||||
void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *stream,
|
||||
uint32_t gpu_index,
|
||||
void *dest, void *src,
|
||||
uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_convert_lwe_ciphertext_vector_to_cpu<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)dest,
|
||||
(uint64_t *)src, number_of_cts, lwe_dimension);
|
||||
}
|
||||
|
||||
void cuda_glwe_sample_extract_64(void *stream, uint32_t gpu_index,
|
||||
void *lwe_array_out, void *glwe_array_in,
|
||||
uint32_t *nth_array, uint32_t num_glwes,
|
||||
uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<256>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t *)glwe_array_in, (uint32_t *)nth_array, num_glwes,
|
||||
glwe_dimension);
|
||||
break;
|
||||
case 512:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t *)glwe_array_in, (uint32_t *)nth_array, num_glwes,
|
||||
glwe_dimension);
|
||||
break;
|
||||
case 1024:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t *)glwe_array_in, (uint32_t *)nth_array, num_glwes,
|
||||
glwe_dimension);
|
||||
break;
|
||||
case 2048:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<2048>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t *)glwe_array_in, (uint32_t *)nth_array, num_glwes,
|
||||
glwe_dimension);
|
||||
break;
|
||||
case 4096:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<4096>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t *)glwe_array_in, (uint32_t *)nth_array, num_glwes,
|
||||
glwe_dimension);
|
||||
break;
|
||||
case 8192:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<8192>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t *)glwe_array_in, (uint32_t *)nth_array, num_glwes,
|
||||
glwe_dimension);
|
||||
break;
|
||||
case 16384:
|
||||
host_sample_extract<uint64_t, AmortizedDegree<16384>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, (uint64_t *)lwe_array_out,
|
||||
(uint64_t *)glwe_array_in, (uint32_t *)nth_array, num_glwes,
|
||||
glwe_dimension);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error: unsupported polynomial size. Supported "
|
||||
"N's are powers of two in the interval [256..16384].")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,42 +3,62 @@
|
||||
|
||||
#include "ciphertext.h"
|
||||
#include "device.h"
|
||||
#include "polynomial/functions.cuh"
|
||||
#include <cstdint>
|
||||
|
||||
template <typename T>
|
||||
void cuda_convert_lwe_ciphertext_vector_to_gpu(T *dest, T *src,
|
||||
cuda_stream_t *stream,
|
||||
uint32_t number_of_cts,
|
||||
void cuda_convert_lwe_ciphertext_vector_to_gpu(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *dest,
|
||||
T *src, uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
|
||||
cuda_memcpy_async_to_gpu(dest, src, size, stream);
|
||||
}
|
||||
|
||||
void cuda_convert_lwe_ciphertext_vector_to_gpu_64(void *dest, void *src,
|
||||
cuda_stream_t *stream,
|
||||
uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_convert_lwe_ciphertext_vector_to_gpu<uint64_t>(
|
||||
(uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
|
||||
cuda_memcpy_async_to_gpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void cuda_convert_lwe_ciphertext_vector_to_cpu(T *dest, T *src,
|
||||
cuda_stream_t *stream,
|
||||
uint32_t number_of_cts,
|
||||
void cuda_convert_lwe_ciphertext_vector_to_cpu(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *dest,
|
||||
T *src, uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
uint64_t size = number_of_cts * (lwe_dimension + 1) * sizeof(T);
|
||||
cuda_memcpy_async_to_cpu(dest, src, size, stream);
|
||||
cuda_memcpy_async_to_cpu(dest, src, size, stream, gpu_index);
|
||||
}
|
||||
|
||||
void cuda_convert_lwe_ciphertext_vector_to_cpu_64(void *dest, void *src,
|
||||
cuda_stream_t *stream,
|
||||
uint32_t number_of_cts,
|
||||
uint32_t lwe_dimension) {
|
||||
cuda_convert_lwe_ciphertext_vector_to_cpu<uint64_t>(
|
||||
(uint64_t *)dest, (uint64_t *)src, stream, number_of_cts, lwe_dimension);
|
||||
template <typename Torus, class params>
|
||||
__global__ void sample_extract(Torus *lwe_array_out, Torus *glwe_array_in,
|
||||
uint32_t *nth_array, uint32_t glwe_dimension) {
|
||||
|
||||
const int input_id = blockIdx.x;
|
||||
|
||||
const int glwe_input_size = (glwe_dimension + 1) * params::degree;
|
||||
const int lwe_output_size = glwe_dimension * params::degree + 1;
|
||||
|
||||
auto lwe_out = lwe_array_out + input_id * lwe_output_size;
|
||||
|
||||
// We assume each GLWE will store the first polynomial_size inputs
|
||||
uint32_t nth_per_glwe = params::degree;
|
||||
auto glwe_in = glwe_array_in + (input_id / nth_per_glwe) * glwe_input_size;
|
||||
|
||||
auto nth = nth_array[input_id];
|
||||
|
||||
sample_extract_mask<Torus, params>(lwe_out, glwe_in, glwe_dimension, nth);
|
||||
sample_extract_body<Torus, params>(lwe_out, glwe_in, glwe_dimension, nth);
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__host__ void host_sample_extract(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *lwe_array_out, Torus *glwe_array_in,
|
||||
uint32_t *nth_array, uint32_t num_glwes,
|
||||
uint32_t glwe_dimension) {
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
dim3 grid(num_glwes);
|
||||
dim3 thds(params::degree / params::opt);
|
||||
sample_extract<Torus, params><<<grid, thds, 0, stream>>>(
|
||||
lwe_array_out, glwe_array_in, nth_array, glwe_dimension);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -20,9 +20,7 @@ private:
|
||||
uint32_t level_count;
|
||||
uint32_t base_log;
|
||||
uint32_t mask;
|
||||
uint32_t halfbg;
|
||||
uint32_t num_poly;
|
||||
T offset;
|
||||
int current_level;
|
||||
T mask_mod_b;
|
||||
T *state;
|
||||
@@ -82,72 +80,12 @@ public:
|
||||
synchronize_threads_in_block();
|
||||
}
|
||||
|
||||
// Decomposes a single polynomial
|
||||
__device__ void
|
||||
decompose_and_compress_next_polynomial_elements(double2 *result, int j) {
|
||||
if (j == 0)
|
||||
current_level -= 1;
|
||||
|
||||
int tid = threadIdx.x;
|
||||
auto state_slice = state + j * params::degree;
|
||||
for (int i = 0; i < params::opt / 2; i++) {
|
||||
T res_re = state_slice[tid] & mask_mod_b;
|
||||
T res_im = state_slice[tid + params::degree / 2] & mask_mod_b;
|
||||
state_slice[tid] >>= base_log;
|
||||
state_slice[tid + params::degree / 2] >>= base_log;
|
||||
T carry_re = ((res_re - 1ll) | state_slice[tid]) & res_re;
|
||||
T carry_im =
|
||||
((res_im - 1ll) | state_slice[tid + params::degree / 2]) & res_im;
|
||||
carry_re >>= (base_log - 1);
|
||||
carry_im >>= (base_log - 1);
|
||||
state_slice[tid] += carry_re;
|
||||
state_slice[tid + params::degree / 2] += carry_im;
|
||||
res_re -= carry_re << base_log;
|
||||
res_im -= carry_im << base_log;
|
||||
|
||||
result[i].x = (int32_t)res_re;
|
||||
result[i].y = (int32_t)res_im;
|
||||
|
||||
tid += params::degree / params::opt;
|
||||
}
|
||||
synchronize_threads_in_block();
|
||||
}
|
||||
|
||||
__device__ void decompose_and_compress_level(double2 *result, int level) {
|
||||
for (int i = 0; i < level_count - level; i++)
|
||||
decompose_and_compress_next(result);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> class GadgetMatrixSingle {
|
||||
private:
|
||||
uint32_t level_count;
|
||||
uint32_t base_log;
|
||||
uint32_t mask;
|
||||
uint32_t halfbg;
|
||||
T offset;
|
||||
|
||||
public:
|
||||
__device__ GadgetMatrixSingle(uint32_t base_log, uint32_t level_count)
|
||||
: base_log(base_log), level_count(level_count) {
|
||||
uint32_t bg = 1 << base_log;
|
||||
this->halfbg = bg / 2;
|
||||
this->mask = bg - 1;
|
||||
T temp = 0;
|
||||
for (int i = 0; i < this->level_count; i++) {
|
||||
temp += 1ULL << (sizeof(T) * 8 - (i + 1) * this->base_log);
|
||||
}
|
||||
this->offset = temp * this->halfbg;
|
||||
}
|
||||
|
||||
__device__ T decompose_one_level_single(T element, uint32_t level) {
|
||||
T s = element + this->offset;
|
||||
uint32_t decal = (sizeof(T) * 8 - (level + 1) * this->base_log);
|
||||
T temp1 = (s >> decal) & this->mask;
|
||||
return (T)(temp1 - this->halfbg);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus>
|
||||
__device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) {
|
||||
Torus res = state & mask_mod_b;
|
||||
|
||||
@@ -49,11 +49,15 @@ __global__ void device_batch_fft_ggsw_vector(double2 *dest, T *src,
|
||||
* global memory
|
||||
*/
|
||||
template <typename T, typename ST, class params>
|
||||
void batch_fft_ggsw_vector(cuda_stream_t *stream, double2 *dest, T *src,
|
||||
void batch_fft_ggsw_vector(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, double2 *dest, T *src,
|
||||
int8_t *d_mem, uint32_t r, uint32_t glwe_dim,
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t gpu_index, uint32_t max_shared_memory) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
uint32_t max_shared_memory) {
|
||||
if (gpu_count != 1)
|
||||
PANIC("GPU error (batch_fft_ggsw_vector): multi-GPU execution is not "
|
||||
"supported yet.")
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
|
||||
int shared_memory_size = sizeof(double) * polynomial_size;
|
||||
|
||||
@@ -62,11 +66,11 @@ void batch_fft_ggsw_vector(cuda_stream_t *stream, double2 *dest, T *src,
|
||||
|
||||
if (max_shared_memory < shared_memory_size) {
|
||||
device_batch_fft_ggsw_vector<T, ST, params, NOSM>
|
||||
<<<gridSize, blockSize, 0, stream->stream>>>(dest, src, d_mem);
|
||||
<<<gridSize, blockSize, 0, streams[0]>>>(dest, src, d_mem);
|
||||
} else {
|
||||
device_batch_fft_ggsw_vector<T, ST, params, FULLSM>
|
||||
<<<gridSize, blockSize, shared_memory_size, stream->stream>>>(dest, src,
|
||||
d_mem);
|
||||
<<<gridSize, blockSize, shared_memory_size, streams[0]>>>(dest, src,
|
||||
d_mem);
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
@@ -6,12 +6,13 @@
|
||||
* Head out to the equivalent operation on 64 bits for more details.
|
||||
*/
|
||||
void cuda_keyswitch_lwe_ciphertext_vector_32(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *ksk,
|
||||
uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples) {
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lwe_array_in, void *lwe_input_indexes,
|
||||
void *ksk, uint32_t lwe_dimension_in, uint32_t lwe_dimension_out,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples) {
|
||||
cuda_keyswitch_lwe_ciphertext_vector(
|
||||
stream, static_cast<uint32_t *>(lwe_array_out),
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint32_t *>(lwe_array_out),
|
||||
static_cast<uint32_t *>(lwe_output_indexes),
|
||||
static_cast<uint32_t *>(lwe_array_in),
|
||||
static_cast<uint32_t *>(lwe_input_indexes), static_cast<uint32_t *>(ksk),
|
||||
@@ -35,12 +36,13 @@ void cuda_keyswitch_lwe_ciphertext_vector_32(
|
||||
* - num_samples blocks of threads are launched
|
||||
*/
|
||||
void cuda_keyswitch_lwe_ciphertext_vector_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_output_indexes,
|
||||
void *lwe_array_in, void *lwe_input_indexes, void *ksk,
|
||||
uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples) {
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void *lwe_output_indexes, void *lwe_array_in, void *lwe_input_indexes,
|
||||
void *ksk, uint32_t lwe_dimension_in, uint32_t lwe_dimension_out,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples) {
|
||||
cuda_keyswitch_lwe_ciphertext_vector(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_output_indexes),
|
||||
static_cast<uint64_t *>(lwe_array_in),
|
||||
static_cast<uint64_t *>(lwe_input_indexes), static_cast<uint64_t *>(ksk),
|
||||
|
||||
@@ -3,8 +3,11 @@
|
||||
|
||||
#include "device.h"
|
||||
#include "gadget.cuh"
|
||||
#include "helper_multi_gpu.h"
|
||||
#include "polynomial/functions.cuh"
|
||||
#include "polynomial/polynomial_math.cuh"
|
||||
#include "torus.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
@@ -31,110 +34,124 @@ __device__ Torus *get_ith_block(Torus *ksk, int i, int level,
|
||||
* scaling factor) under key s2 instead of s1, with an increased noise
|
||||
*
|
||||
*/
|
||||
// Each thread in x are used to calculate one output.
|
||||
// threads in y are used to paralelize the lwe_dimension_in loop.
|
||||
// shared memory is used to store intermediate results of the reduction.
|
||||
template <typename Torus>
|
||||
__global__ void
|
||||
keyswitch(Torus *lwe_array_out, Torus *lwe_output_indexes, Torus *lwe_array_in,
|
||||
Torus *lwe_input_indexes, Torus *ksk, uint32_t lwe_dimension_in,
|
||||
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count,
|
||||
int lwe_lower, int lwe_upper, int cutoff) {
|
||||
int tid = threadIdx.x;
|
||||
keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
|
||||
const Torus *__restrict__ lwe_array_in,
|
||||
const Torus *__restrict__ lwe_input_indexes,
|
||||
const Torus *__restrict__ ksk, uint32_t lwe_dimension_in,
|
||||
uint32_t lwe_dimension_out, uint32_t base_log, uint32_t level_count) {
|
||||
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int shmem_index = threadIdx.x + threadIdx.y * blockDim.x;
|
||||
|
||||
extern __shared__ int8_t sharedmem[];
|
||||
|
||||
Torus *local_lwe_array_out = (Torus *)sharedmem;
|
||||
|
||||
auto block_lwe_array_in = get_chunk(
|
||||
lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
|
||||
Torus *lwe_acc_out = (Torus *)sharedmem;
|
||||
auto block_lwe_array_out = get_chunk(
|
||||
lwe_array_out, lwe_output_indexes[blockIdx.x], lwe_dimension_out + 1);
|
||||
lwe_array_out, lwe_output_indexes[blockIdx.y], lwe_dimension_out + 1);
|
||||
|
||||
auto gadget = GadgetMatrixSingle<Torus>(base_log, level_count);
|
||||
if (tid <= lwe_dimension_out) {
|
||||
|
||||
int lwe_part_per_thd;
|
||||
if (tid < cutoff) {
|
||||
lwe_part_per_thd = lwe_upper;
|
||||
} else {
|
||||
lwe_part_per_thd = lwe_lower;
|
||||
}
|
||||
__syncthreads();
|
||||
Torus local_lwe_out = 0;
|
||||
auto block_lwe_array_in = get_chunk(
|
||||
lwe_array_in, lwe_input_indexes[blockIdx.y], lwe_dimension_in + 1);
|
||||
|
||||
for (int k = 0; k < lwe_part_per_thd; k++) {
|
||||
int idx = tid + k * blockDim.x;
|
||||
local_lwe_array_out[idx] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
if (tid == lwe_dimension_out && threadIdx.y == 0) {
|
||||
local_lwe_out = block_lwe_array_in[lwe_dimension_in];
|
||||
}
|
||||
const Torus mask_mod_b = (1ll << base_log) - 1ll;
|
||||
|
||||
if (tid == 0) {
|
||||
local_lwe_array_out[lwe_dimension_out] =
|
||||
block_lwe_array_in[lwe_dimension_in];
|
||||
}
|
||||
const int pack_size = (lwe_dimension_in + blockDim.y - 1) / blockDim.y;
|
||||
const int start_i = pack_size * threadIdx.y;
|
||||
const int end_i = SEL(lwe_dimension_in, pack_size * (threadIdx.y + 1),
|
||||
pack_size * (threadIdx.y + 1) <= lwe_dimension_in);
|
||||
|
||||
for (int i = 0; i < lwe_dimension_in; i++) {
|
||||
// This loop distribution seems to benefit the global mem reads
|
||||
for (int i = start_i; i < end_i; i++) {
|
||||
Torus a_i = round_to_closest_multiple(block_lwe_array_in[i], base_log,
|
||||
level_count);
|
||||
Torus state = a_i >> (sizeof(Torus) * 8 - base_log * level_count);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
Torus a_i =
|
||||
round_to_closest_multiple(block_lwe_array_in[i], base_log, level_count);
|
||||
|
||||
Torus state = a_i >> (sizeof(Torus) * 8 - base_log * level_count);
|
||||
Torus mask_mod_b = (1ll << base_log) - 1ll;
|
||||
|
||||
for (int j = 0; j < level_count; j++) {
|
||||
auto ksk_block = get_ith_block(ksk, i, j, lwe_dimension_out, level_count);
|
||||
Torus decomposed = decompose_one<Torus>(state, mask_mod_b, base_log);
|
||||
for (int k = 0; k < lwe_part_per_thd; k++) {
|
||||
int idx = tid + k * blockDim.x;
|
||||
local_lwe_array_out[idx] -= (Torus)ksk_block[idx] * decomposed;
|
||||
for (int j = 0; j < level_count; j++) {
|
||||
auto ksk_block =
|
||||
get_ith_block(ksk, i, j, lwe_dimension_out, level_count);
|
||||
Torus decomposed = decompose_one<Torus>(state, mask_mod_b, base_log);
|
||||
local_lwe_out -= (Torus)ksk_block[tid] * decomposed;
|
||||
}
|
||||
}
|
||||
|
||||
lwe_acc_out[shmem_index] = local_lwe_out;
|
||||
}
|
||||
|
||||
for (int k = 0; k < lwe_part_per_thd; k++) {
|
||||
int idx = tid + k * blockDim.x;
|
||||
block_lwe_array_out[idx] = local_lwe_array_out[idx];
|
||||
if (tid <= lwe_dimension_out) {
|
||||
for (int offset = blockDim.y / 2; offset > 0 && threadIdx.y < offset;
|
||||
offset /= 2) {
|
||||
__syncthreads();
|
||||
lwe_acc_out[shmem_index] +=
|
||||
lwe_acc_out[shmem_index + offset * blockDim.x];
|
||||
}
|
||||
if (threadIdx.y == 0)
|
||||
block_lwe_array_out[tid] = lwe_acc_out[shmem_index];
|
||||
}
|
||||
}
|
||||
|
||||
/// assume lwe_array_in in the gpu
|
||||
template <typename Torus>
|
||||
__host__ void cuda_keyswitch_lwe_ciphertext_vector(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_output_indexes,
|
||||
Torus *lwe_array_in, Torus *lwe_input_indexes, Torus *ksk,
|
||||
uint32_t lwe_dimension_in, uint32_t lwe_dimension_out, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples) {
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus *lwe_output_indexes, Torus *lwe_array_in, Torus *lwe_input_indexes,
|
||||
Torus *ksk, uint32_t lwe_dimension_in, uint32_t lwe_dimension_out,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
constexpr int ideal_threads = 128;
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
int lwe_size = lwe_dimension_out + 1;
|
||||
int lwe_lower, lwe_upper, cutoff;
|
||||
if (lwe_size % ideal_threads == 0) {
|
||||
lwe_lower = lwe_size / ideal_threads;
|
||||
lwe_upper = lwe_size / ideal_threads;
|
||||
cutoff = 0;
|
||||
} else {
|
||||
int y = ceil((double)lwe_size / (double)ideal_threads) * ideal_threads -
|
||||
lwe_size;
|
||||
cutoff = ideal_threads - y;
|
||||
lwe_lower = lwe_size / ideal_threads;
|
||||
lwe_upper = (int)ceil((double)lwe_size / (double)ideal_threads);
|
||||
}
|
||||
constexpr int num_threads_y = 32;
|
||||
int num_blocks, num_threads_x;
|
||||
|
||||
int lwe_size_after = lwe_size * num_samples;
|
||||
getNumBlocksAndThreads2D(lwe_dimension_out + 1, 512, num_threads_y,
|
||||
num_blocks, num_threads_x);
|
||||
|
||||
int shared_mem = sizeof(Torus) * lwe_size;
|
||||
int shared_mem = sizeof(Torus) * num_threads_y * num_threads_x;
|
||||
dim3 grid(num_blocks, num_samples, 1);
|
||||
dim3 threads(num_threads_x, num_threads_y, 1);
|
||||
|
||||
cuda_memset_async(lwe_array_out, 0, sizeof(Torus) * lwe_size_after, stream);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
dim3 grid(num_samples, 1, 1);
|
||||
dim3 threads(ideal_threads, 1, 1);
|
||||
|
||||
keyswitch<Torus><<<grid, threads, shared_mem, stream->stream>>>(
|
||||
keyswitch<Torus><<<grid, threads, shared_mem, stream>>>(
|
||||
lwe_array_out, lwe_output_indexes, lwe_array_in, lwe_input_indexes, ksk,
|
||||
lwe_dimension_in, lwe_dimension_out, base_log, level_count, lwe_lower,
|
||||
lwe_upper, cutoff);
|
||||
lwe_dimension_in, lwe_dimension_out, base_log, level_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void execute_keyswitch_async(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
const LweArrayVariant<Torus> &lwe_array_out,
|
||||
const LweArrayVariant<Torus> &lwe_output_indexes,
|
||||
const LweArrayVariant<Torus> &lwe_array_in,
|
||||
const LweArrayVariant<Torus> &lwe_input_indexes,
|
||||
Torus **ksks, uint32_t lwe_dimension_in,
|
||||
uint32_t lwe_dimension_out, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples) {
|
||||
|
||||
/// If the number of radix blocks is lower than the number of GPUs, not all
|
||||
/// GPUs will be active and there will be 1 input per GPU
|
||||
for (uint i = 0; i < gpu_count; i++) {
|
||||
int num_samples_on_gpu = get_num_inputs_on_gpu(num_samples, i, gpu_count);
|
||||
|
||||
Torus *current_lwe_array_out = GET_VARIANT_ELEMENT(lwe_array_out, i);
|
||||
Torus *current_lwe_output_indexes =
|
||||
GET_VARIANT_ELEMENT(lwe_output_indexes, i);
|
||||
Torus *current_lwe_array_in = GET_VARIANT_ELEMENT(lwe_array_in, i);
|
||||
Torus *current_lwe_input_indexes =
|
||||
GET_VARIANT_ELEMENT(lwe_input_indexes, i);
|
||||
|
||||
// Compute Keyswitch
|
||||
cuda_keyswitch_lwe_ciphertext_vector<Torus>(
|
||||
streams[i], gpu_indexes[i], current_lwe_array_out,
|
||||
current_lwe_output_indexes, current_lwe_array_in,
|
||||
current_lwe_input_indexes, ksks[i], lwe_dimension_in, lwe_dimension_out,
|
||||
base_log, level_count, num_samples_on_gpu);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -39,36 +39,19 @@ __device__ inline T round_to_closest_multiple(T x, uint32_t base_log,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ void rescale_torus_element(T element, T &output,
|
||||
uint32_t log_shift) {
|
||||
output =
|
||||
round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
|
||||
(double)log_shift);
|
||||
__device__ __forceinline__ void modulus_switch(T input, T &output,
|
||||
uint32_t log_modulus) {
|
||||
constexpr uint32_t BITS = sizeof(T) * 8;
|
||||
|
||||
output = input + (((T)1) << (BITS - log_modulus - 1));
|
||||
output >>= (BITS - log_modulus);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T rescale_torus_element(T element,
|
||||
uint32_t log_shift) {
|
||||
return round((double)element / (double(std::numeric_limits<T>::max()) + 1.0) *
|
||||
(double)log_shift);
|
||||
__device__ __forceinline__ T modulus_switch(T input, uint32_t log_modulus) {
|
||||
T output;
|
||||
modulus_switch(input, output, log_modulus);
|
||||
return output;
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ __forceinline__ void
|
||||
rescale_torus_element<uint32_t>(uint32_t element, uint32_t &output,
|
||||
uint32_t log_shift) {
|
||||
output =
|
||||
round(__uint2double_rn(element) /
|
||||
(__uint2double_rn(std::numeric_limits<uint32_t>::max()) + 1.0) *
|
||||
__uint2double_rn(log_shift));
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ __forceinline__ void
|
||||
rescale_torus_element<uint64_t>(uint64_t element, uint64_t &output,
|
||||
uint32_t log_shift) {
|
||||
output = round(__ull2double_rn(element) /
|
||||
(__ull2double_rn(std::numeric_limits<uint64_t>::max()) + 1.0) *
|
||||
__uint2double_rn(log_shift));
|
||||
}
|
||||
#endif // CNCRT_TORUS_H
|
||||
|
||||
@@ -3,14 +3,23 @@
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
/// Unsafe function to create a CUDA stream, must check first that GPU exists
|
||||
cuda_stream_t *cuda_create_stream(uint32_t gpu_index) {
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cuda_stream_t *stream = new cuda_stream_t(gpu_index);
|
||||
cudaStream_t stream;
|
||||
check_cuda_error(cudaStreamCreateWithFlags(&stream, cudaStreamNonBlocking));
|
||||
return stream;
|
||||
}
|
||||
|
||||
/// Unsafe function to destroy CUDA stream, must check first the GPU exists
|
||||
void cuda_destroy_stream(cuda_stream_t *stream) { stream->release(); }
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamDestroy(stream));
|
||||
}
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
/// Unsafe function that will try to allocate even if gpu_index is invalid
|
||||
/// or if there's not enough memory. A safe wrapper around it must call
|
||||
@@ -25,20 +34,20 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
|
||||
|
||||
/// Allocates a size-byte array at the device memory. Tries to do it
|
||||
/// asynchronously.
|
||||
void *cuda_malloc_async(uint64_t size, cuda_stream_t *stream) {
|
||||
check_cuda_error(cudaSetDevice(stream->gpu_index));
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
void *ptr;
|
||||
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
#elif (CUDART_VERSION >= 11020)
|
||||
int support_async_alloc;
|
||||
check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
|
||||
cudaDevAttrMemoryPoolsSupported,
|
||||
stream->gpu_index));
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&support_async_alloc, cudaDevAttrMemoryPoolsSupported, gpu_index));
|
||||
|
||||
if (support_async_alloc) {
|
||||
check_cuda_error(cudaMallocAsync((void **)&ptr, size, stream->stream));
|
||||
check_cuda_error(cudaMallocAsync((void **)&ptr, size, stream));
|
||||
} else {
|
||||
check_cuda_error(cudaMalloc((void **)&ptr, size));
|
||||
}
|
||||
@@ -75,54 +84,57 @@ bool cuda_check_support_cooperative_groups() {
|
||||
/// false if Thread Block Cluster is not supported.
|
||||
/// true otherwise
|
||||
bool cuda_check_support_thread_block_clusters() {
|
||||
#if CUDA_ARCH >= 900
|
||||
// To-do: Is this really the best way to check support?
|
||||
int tbc_supported = 0;
|
||||
check_cuda_error(
|
||||
cudaDeviceGetAttribute(&tbc_supported, cudaDevAttrClusterLaunch, 0));
|
||||
|
||||
return tbc_supported > 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Copy memory to the GPU asynchronously
|
||||
void cuda_memcpy_async_to_gpu(void *dest, void *src, uint64_t size,
|
||||
cuda_stream_t *stream) {
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
if (size == 0)
|
||||
return;
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
|
||||
if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid device pointer in async copy to GPU.")
|
||||
}
|
||||
|
||||
check_cuda_error(cudaSetDevice(stream->gpu_index));
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream->stream));
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
/// Copy memory within a GPU asynchronously
|
||||
void cuda_memcpy_async_gpu_to_gpu(void *dest, void *src, uint64_t size,
|
||||
cuda_stream_t *stream) {
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
if (size == 0)
|
||||
return;
|
||||
cudaPointerAttributes attr_dest;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
|
||||
if (attr_dest.device != stream->gpu_index &&
|
||||
attr_dest.type != cudaMemoryTypeDevice) {
|
||||
if (attr_dest.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in copy from GPU to GPU.")
|
||||
}
|
||||
cudaPointerAttributes attr_src;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
|
||||
if (attr_src.device != stream->gpu_index &&
|
||||
attr_src.type != cudaMemoryTypeDevice) {
|
||||
if (attr_src.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid src device pointer in copy from GPU to GPU.")
|
||||
}
|
||||
if (attr_src.device != attr_dest.device) {
|
||||
PANIC("Cuda error: different devices specified in copy from GPU to GPU.")
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
if (attr_src.device == attr_dest.device) {
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
|
||||
} else {
|
||||
check_cuda_error(cudaMemcpyPeerAsync(dest, attr_dest.device, src,
|
||||
attr_src.device, size, stream));
|
||||
}
|
||||
|
||||
check_cuda_error(cudaSetDevice(stream->gpu_index));
|
||||
check_cuda_error(cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice,
|
||||
stream->stream));
|
||||
}
|
||||
|
||||
/// Synchronizes device
|
||||
@@ -132,16 +144,16 @@ void cuda_synchronize_device(uint32_t gpu_index) {
|
||||
}
|
||||
|
||||
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cuda_stream_t *stream) {
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
if (size == 0)
|
||||
return;
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
|
||||
if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda memset.")
|
||||
}
|
||||
check_cuda_error(cudaSetDevice(stream->gpu_index));
|
||||
check_cuda_error(cudaMemsetAsync(dest, val, size, stream->stream));
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -152,42 +164,47 @@ __global__ void cuda_set_value_kernel(Torus *array, Torus value, Torus n) {
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void cuda_set_value_async(cudaStream_t *stream, Torus *d_array, Torus value,
|
||||
Torus n) {
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, d_array));
|
||||
if (attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda set value.")
|
||||
}
|
||||
int block_size = 256;
|
||||
int num_blocks = (n + block_size - 1) / block_size;
|
||||
void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *d_array, Torus value, Torus n) {
|
||||
if (n > 0) {
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, d_array));
|
||||
if (attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda set value.")
|
||||
}
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
int block_size = 256;
|
||||
int num_blocks = (n + block_size - 1) / block_size;
|
||||
|
||||
// Launch the kernel
|
||||
cuda_set_value_kernel<<<num_blocks, block_size, 0, *stream>>>(d_array, value,
|
||||
n);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
// Launch the kernel
|
||||
cuda_set_value_kernel<<<num_blocks, block_size, 0, stream>>>(d_array, value,
|
||||
n);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
}
|
||||
|
||||
/// Explicitly instantiate cuda_set_value_async for 32 and 64 bits
|
||||
template void cuda_set_value_async(cudaStream_t *stream, uint64_t *d_array,
|
||||
uint64_t value, uint64_t n);
|
||||
template void cuda_set_value_async(cudaStream_t *stream, uint32_t *d_array,
|
||||
uint32_t value, uint32_t n);
|
||||
template void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint64_t *d_array, uint64_t value,
|
||||
uint64_t n);
|
||||
template void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t *d_array, uint32_t value,
|
||||
uint32_t n);
|
||||
|
||||
/// Copy memory to the CPU asynchronously
|
||||
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cuda_stream_t *stream) {
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
if (size == 0)
|
||||
return;
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, src));
|
||||
if (attr.device != stream->gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
|
||||
}
|
||||
|
||||
check_cuda_error(cudaSetDevice(stream->gpu_index));
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream->stream));
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToHost, stream));
|
||||
}
|
||||
|
||||
/// Return number of GPUs available
|
||||
@@ -204,19 +221,18 @@ void cuda_drop(void *ptr, uint32_t gpu_index) {
|
||||
}
|
||||
|
||||
/// Drop a cuda array asynchronously, if supported on the device
|
||||
void cuda_drop_async(void *ptr, cuda_stream_t *stream) {
|
||||
void cuda_drop_async(void *ptr, cudaStream_t stream, uint32_t gpu_index) {
|
||||
|
||||
check_cuda_error(cudaSetDevice(stream->gpu_index));
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
#ifndef CUDART_VERSION
|
||||
#error CUDART_VERSION Undefined!
|
||||
#elif (CUDART_VERSION >= 11020)
|
||||
int support_async_alloc;
|
||||
check_cuda_error(cudaDeviceGetAttribute(&support_async_alloc,
|
||||
cudaDevAttrMemoryPoolsSupported,
|
||||
stream->gpu_index));
|
||||
check_cuda_error(cudaDeviceGetAttribute(
|
||||
&support_async_alloc, cudaDevAttrMemoryPoolsSupported, gpu_index));
|
||||
|
||||
if (support_async_alloc) {
|
||||
check_cuda_error(cudaFreeAsync(ptr, stream->stream));
|
||||
check_cuda_error(cudaFreeAsync(ptr, stream));
|
||||
} else {
|
||||
check_cuda_error(cudaFree(ptr));
|
||||
}
|
||||
@@ -227,24 +243,18 @@ void cuda_drop_async(void *ptr, cuda_stream_t *stream) {
|
||||
|
||||
/// Get the maximum size for the shared memory
|
||||
int cuda_get_max_shared_memory(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
int max_shared_memory = 0;
|
||||
cudaDeviceGetAttribute(&max_shared_memory, cudaDevAttrMaxSharedMemoryPerBlock,
|
||||
gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
#if CUDA_ARCH == 900
|
||||
max_shared_memory = 226000;
|
||||
#elif CUDA_ARCH == 890
|
||||
max_shared_memory = 127000;
|
||||
#elif CUDA_ARCH == 800
|
||||
max_shared_memory = 163000;
|
||||
#elif CUDA_ARCH == 700
|
||||
max_shared_memory = 95000;
|
||||
#endif
|
||||
return max_shared_memory;
|
||||
}
|
||||
|
||||
void cuda_synchronize_stream(cuda_stream_t *stream) { stream->synchronize(); }
|
||||
|
||||
void cuda_stream_add_callback(cuda_stream_t *stream,
|
||||
cudaStreamCallback_t callback, void *user_data) {
|
||||
|
||||
check_cuda_error(
|
||||
cudaStreamAddCallback(stream->stream, callback, user_data, 0));
|
||||
}
|
||||
|
||||
void host_free_on_stream_callback(cudaStream_t stream, cudaError_t status,
|
||||
void *host_pointer) {
|
||||
free(host_pointer);
|
||||
}
|
||||
|
||||
@@ -294,9 +294,6 @@ template <class params> __device__ void NSMFFT_direct(double2 *A) {
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// compressed size = 8192 is actual polynomial size = 16384.
|
||||
// from this size, twiddles can't fit in constant memory,
|
||||
// so from here, butterfly operation access device memory.
|
||||
if constexpr (params::degree >= 8192) {
|
||||
// level 13
|
||||
tid = threadIdx.x;
|
||||
@@ -307,7 +304,7 @@ template <class params> __device__ void NSMFFT_direct(double2 *A) {
|
||||
(tid & (params::degree / 8192 - 1));
|
||||
i2 = i1 + params::degree / 8192;
|
||||
|
||||
w = negtwiddles13[twid_id];
|
||||
w = negtwiddles[twid_id + 4096];
|
||||
u = A[i1];
|
||||
v = A[i2] * w;
|
||||
|
||||
@@ -351,10 +348,6 @@ template <class params> __device__ void NSMFFT_inverse(double2 *A) {
|
||||
// mapping in backward fft is reversed
|
||||
// butterfly operation is started from last level
|
||||
|
||||
// compressed size = 8192 is actual polynomial size = 16384.
|
||||
// twiddles for this size can't fit in constant memory so
|
||||
// butterfly operation for this level access device memory to fetch
|
||||
// twiddles
|
||||
if constexpr (params::degree >= 8192) {
|
||||
// level 13
|
||||
tid = threadIdx.x;
|
||||
@@ -365,7 +358,7 @@ template <class params> __device__ void NSMFFT_inverse(double2 *A) {
|
||||
(tid & (params::degree / 8192 - 1));
|
||||
i2 = i1 + params::degree / 8192;
|
||||
|
||||
w = negtwiddles13[twid_id];
|
||||
w = negtwiddles[twid_id + 4096];
|
||||
u = A[i1] - A[i2];
|
||||
|
||||
A[i1] += A[i2];
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#include "cuComplex.h"
|
||||
|
||||
__constant__ double2 negtwiddles[4096] = {
|
||||
__device__ double2 negtwiddles[8192] = {
|
||||
{0, 0},
|
||||
{0.707106781186547461715008466854, 0.707106781186547572737310929369},
|
||||
{0.92387953251128673848313610506, 0.382683432365089781779232680492},
|
||||
@@ -4096,9 +4096,7 @@ __constant__ double2 negtwiddles[4096] = {
|
||||
{0.70791982920081630847874976098, 0.706292797233758484765075991163},
|
||||
{-0.706292797233758484765075991163, 0.70791982920081630847874976098},
|
||||
{0.00115048533711384847431913325266, 0.99999933819152553304832053982},
|
||||
{-0.99999933819152553304832053982, 0.00115048533711384847431913325266}};
|
||||
|
||||
__device__ double2 negtwiddles13[4096] = {
|
||||
{-0.99999933819152553304832053982, 0.00115048533711384847431913325266},
|
||||
{0.999999981616429334252416083473, 0.000191747597310703291528452552051},
|
||||
{-0.000191747597310703291528452552051, 0.999999981616429334252416083473},
|
||||
{0.706971182161065359039753275283, 0.707242354213734603085583785287},
|
||||
|
||||
@@ -2,12 +2,7 @@
|
||||
#define GPU_BOOTSTRAP_TWIDDLES_CUH
|
||||
|
||||
/*
|
||||
* 'negtwiddles' are stored in constant memory for faster access times
|
||||
* because of it's limited size, only twiddles for up to 2^12 polynomial size
|
||||
* can be stored there, twiddles for 2^13 are stored in device memory
|
||||
* 'negtwiddles13'
|
||||
* 'negtwiddles' are stored in device memory to profit caching
|
||||
*/
|
||||
|
||||
extern __constant__ double2 negtwiddles[4096];
|
||||
extern __device__ double2 negtwiddles13[4096];
|
||||
extern __device__ double2 negtwiddles[8192];
|
||||
#endif
|
||||
|
||||
49
backends/tfhe-cuda-backend/cuda/src/integer/addition.cu
Normal file
49
backends/tfhe-cuda-backend/cuda/src/integer/addition.cu
Normal file
@@ -0,0 +1,49 @@
|
||||
#include "integer/addition.cuh"
|
||||
|
||||
void scratch_cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, int8_t signed_operation,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
SIGNED_OPERATION op = (signed_operation == 1) ? SIGNED_OPERATION::ADDITION
|
||||
: SIGNED_OPERATION::SUBTRACTION;
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_signed_overflowing_add_or_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_signed_overflowing_add_or_sub_memory<uint64_t> **)mem_ptr,
|
||||
num_blocks, op, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, void *lhs,
|
||||
void *rhs, void *overflowed, int8_t signed_operation, int8_t *mem_ptr,
|
||||
void **bsks, void **ksks, uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_signed_overflowing_add_or_sub_memory<uint64_t> *)mem_ptr;
|
||||
SIGNED_OPERATION op = (signed_operation == 1) ? SIGNED_OPERATION::ADDITION
|
||||
: SIGNED_OPERATION::SUBTRACTION;
|
||||
|
||||
host_integer_signed_overflowing_add_or_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs), static_cast<uint64_t *>(rhs),
|
||||
static_cast<uint64_t *>(overflowed), op, bsks, (uint64_t **)(ksks), mem,
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_signed_overflowing_add_or_sub(void **streams,
|
||||
uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_signed_overflowing_add_or_sub_memory<uint64_t> *mem_ptr =
|
||||
(int_signed_overflowing_add_or_sub_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
137
backends/tfhe-cuda-backend/cuda/src/integer/addition.cuh
Normal file
137
backends/tfhe-cuda-backend/cuda/src/integer/addition.cuh
Normal file
@@ -0,0 +1,137 @@
|
||||
#ifndef TFHE_RS_ADDITION_CUH
|
||||
#define TFHE_RS_ADDITION_CUH
|
||||
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer.h"
|
||||
#include "integer/comparison.cuh"
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/negation.cuh"
|
||||
#include "integer/scalar_shifts.cuh"
|
||||
#include "linear_algebra.h"
|
||||
#include "programmable_bootstrap.h"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
template <typename Torus>
|
||||
void host_resolve_signed_overflow(
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *result, Torus *last_block_inner_propagation,
|
||||
Torus *last_block_input_carry, Torus *last_block_output_carry,
|
||||
int_resolve_signed_overflow_memory<Torus> *mem, void **bsks, Torus **ksks) {
|
||||
|
||||
auto x = mem->x;
|
||||
|
||||
Torus *d_clears =
|
||||
(Torus *)cuda_malloc_async(sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
cuda_set_value_async<Torus>(streams[0], gpu_indexes[0], d_clears, 2, 1);
|
||||
|
||||
// replace with host function call
|
||||
cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
|
||||
streams[0], gpu_indexes[0], x, last_block_output_carry, d_clears,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
|
||||
host_addition(streams[0], gpu_indexes[0], last_block_inner_propagation,
|
||||
last_block_inner_propagation, x, mem->params.big_lwe_dimension,
|
||||
1);
|
||||
host_addition(streams[0], gpu_indexes[0], last_block_inner_propagation,
|
||||
last_block_inner_propagation, last_block_input_carry,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
|
||||
host_apply_univariate_lut_kb<Torus>(streams, gpu_indexes, gpu_count, result,
|
||||
last_block_inner_propagation,
|
||||
mem->resolve_overflow_lut, ksks, bsks, 1);
|
||||
|
||||
cuda_drop_async(d_clears, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_signed_overflowing_add_or_sub_kb(
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
int_signed_overflowing_add_or_sub_memory<Torus> **mem_ptr,
|
||||
uint32_t num_blocks, SIGNED_OPERATION op, int_radix_params params,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
*mem_ptr = new int_signed_overflowing_add_or_sub_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, op,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
/*
|
||||
* Addition - signed_operation = 1
|
||||
* Subtraction - signed_operation = -1
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_signed_overflowing_add_or_sub_kb(
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lhs, Torus *rhs, Torus *overflowed, SIGNED_OPERATION op, void **bsks,
|
||||
uint64_t **ksks,
|
||||
int_signed_overflowing_add_or_sub_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
uint32_t big_lwe_dimension = radix_params.big_lwe_dimension;
|
||||
uint32_t big_lwe_size = big_lwe_dimension + 1;
|
||||
uint32_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
assert(radix_params.message_modulus >= 4 && radix_params.carry_modulus >= 4);
|
||||
|
||||
auto result = mem_ptr->result;
|
||||
auto neg_rhs = mem_ptr->neg_rhs;
|
||||
auto input_carries = mem_ptr->input_carries;
|
||||
auto output_carry = mem_ptr->output_carry;
|
||||
auto last_block_inner_propagation = mem_ptr->last_block_inner_propagation;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(result, lhs, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
// phase 1
|
||||
if (op == SIGNED_OPERATION::ADDITION) {
|
||||
host_addition(streams[0], gpu_indexes[0], result, lhs, rhs,
|
||||
big_lwe_dimension, num_blocks);
|
||||
} else {
|
||||
host_integer_radix_negation(
|
||||
streams, gpu_indexes, gpu_count, neg_rhs, rhs, big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
host_addition(streams[0], gpu_indexes[0], result, lhs, neg_rhs,
|
||||
big_lwe_dimension, num_blocks);
|
||||
}
|
||||
|
||||
// phase 2
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
host_propagate_single_carry(mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
result, output_carry, input_carries,
|
||||
mem_ptr->scp_mem, bsks, ksks, num_blocks);
|
||||
host_generate_last_block_inner_propagation(
|
||||
mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
last_block_inner_propagation, &lhs[(num_blocks - 1) * big_lwe_size],
|
||||
&rhs[(num_blocks - 1) * big_lwe_size], mem_ptr->las_block_prop_mem, bsks,
|
||||
ksks);
|
||||
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
// phase 3
|
||||
auto input_carry = &input_carries[(num_blocks - 1) * big_lwe_size];
|
||||
|
||||
host_resolve_signed_overflow(
|
||||
streams, gpu_indexes, gpu_count, overflowed, last_block_inner_propagation,
|
||||
input_carry, output_carry, mem_ptr->resolve_overflow_mem, bsks, ksks);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lhs, result, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_ADDITION_CUH
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "integer/bitwise_ops.cuh"
|
||||
|
||||
void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, BITOP_TYPE op_type,
|
||||
bool allocate_gpu_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
BITOP_TYPE op_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -15,37 +15,29 @@ void scratch_cuda_integer_radix_bitop_kb_64(
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_radix_bitop_kb<uint64_t>(
|
||||
stream, (int_bitop_buffer<uint64_t> **)mem_ptr, lwe_ciphertext_count,
|
||||
params, op_type, allocate_gpu_memory);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_bitop_buffer<uint64_t> **)mem_ptr, lwe_ciphertext_count, params,
|
||||
op_type, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_bitop_integer_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1,
|
||||
void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk,
|
||||
uint32_t lwe_ciphertext_count) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void *lwe_array_1, void *lwe_array_2, int8_t *mem_ptr,
|
||||
void **bsks, void **ksks, uint32_t lwe_ciphertext_count) {
|
||||
|
||||
host_integer_radix_bitop_kb<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_array_1),
|
||||
static_cast<uint64_t *>(lwe_array_2),
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
void cuda_bitnot_integer_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_in,
|
||||
int8_t *mem_ptr, void *bsk, void *ksk, uint32_t lwe_ciphertext_count) {
|
||||
|
||||
host_integer_radix_bitnot_kb<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_array_in),
|
||||
(int_bitop_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
|
||||
lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_bitop(cuda_stream_t *stream, int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_integer_bitop(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, int8_t **mem_ptr_void) {
|
||||
|
||||
int_bitop_buffer<uint64_t> *mem_ptr =
|
||||
(int_bitop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -13,40 +13,28 @@
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_radix_bitop_kb(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
host_integer_radix_bitop_kb(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out,
|
||||
Torus *lwe_array_1, Torus *lwe_array_2,
|
||||
int_bitop_buffer<Torus> *mem_ptr, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
int_bitop_buffer<Torus> *mem_ptr, void **bsks,
|
||||
Torus **ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto lut = mem_ptr->lut;
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
stream, lwe_array_out, lwe_array_1, lwe_array_2, bsk, ksk,
|
||||
num_radix_blocks, lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_radix_bitnot_kb(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
Torus *lwe_array_in,
|
||||
int_bitop_buffer<Torus> *mem_ptr, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
|
||||
auto lut = mem_ptr->lut;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, lwe_array_out, lwe_array_in, bsk, ksk, num_radix_blocks, lut);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, num_radix_blocks, lut, lut->params.message_modulus);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_radix_bitop_kb(
|
||||
cuda_stream_t *stream, int_bitop_buffer<Torus> **mem_ptr,
|
||||
uint32_t num_radix_blocks, int_radix_params params, BITOP_TYPE op,
|
||||
bool allocate_gpu_memory) {
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
int_bitop_buffer<Torus> **mem_ptr, uint32_t num_radix_blocks,
|
||||
int_radix_params params, BITOP_TYPE op, bool allocate_gpu_memory) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
*mem_ptr = new int_bitop_buffer<Torus>(stream, op, params, num_radix_blocks,
|
||||
allocate_gpu_memory);
|
||||
*mem_ptr =
|
||||
new int_bitop_buffer<Torus>(streams, gpu_indexes, gpu_count, op, params,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
#include "integer/cmux.cuh"
|
||||
|
||||
void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t lwe_ciphertext_count, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -17,29 +18,33 @@ void scratch_cuda_integer_radix_cmux_kb_64(
|
||||
[](uint64_t x) -> uint64_t { return x == 1; };
|
||||
|
||||
scratch_cuda_integer_radix_cmux_kb(
|
||||
stream, (int_cmux_buffer<uint64_t> **)mem_ptr, predicate_lut_f,
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_cmux_buffer<uint64_t> **)mem_ptr, predicate_lut_f,
|
||||
lwe_ciphertext_count, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_cmux_integer_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_condition,
|
||||
void *lwe_array_true, void *lwe_array_false, int8_t *mem_ptr, void *bsk,
|
||||
void *ksk, uint32_t lwe_ciphertext_count) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void *lwe_condition, void *lwe_array_true,
|
||||
void *lwe_array_false, int8_t *mem_ptr, void **bsks, void **ksks,
|
||||
uint32_t lwe_ciphertext_count) {
|
||||
|
||||
host_integer_radix_cmux_kb<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_condition),
|
||||
static_cast<uint64_t *>(lwe_array_true),
|
||||
static_cast<uint64_t *>(lwe_array_false),
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsk, static_cast<uint64_t *>(ksk),
|
||||
(int_cmux_buffer<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
|
||||
lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_cmux(cuda_stream_t *stream,
|
||||
void cleanup_cuda_integer_radix_cmux(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_cmux_buffer<uint64_t> *mem_ptr =
|
||||
(int_cmux_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -2,15 +2,15 @@
|
||||
#define CUDA_INTEGER_CMUX_CUH
|
||||
|
||||
#include "integer.cuh"
|
||||
#include <omp.h>
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void zero_out_if(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
__host__ void zero_out_if(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out,
|
||||
Torus *lwe_array_input, Torus *lwe_condition,
|
||||
int_zero_out_if_buffer<Torus> *mem_ptr,
|
||||
int_radix_lut<Torus> *predicate, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
int_radix_lut<Torus> *predicate, void **bsks,
|
||||
Torus **ksks, uint32_t num_radix_blocks) {
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
auto params = mem_ptr->params;
|
||||
|
||||
int big_lwe_size = params.big_lwe_dimension + 1;
|
||||
@@ -27,8 +27,7 @@ __host__ void zero_out_if(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
auto lwe_array_out_block = tmp_lwe_array_input + i * big_lwe_size;
|
||||
auto lwe_array_input_block = lwe_array_input + i * big_lwe_size;
|
||||
|
||||
device_pack_bivariate_blocks<<<num_blocks, num_threads, 0,
|
||||
stream->stream>>>(
|
||||
device_pack_bivariate_blocks<<<num_blocks, num_threads, 0, streams[0]>>>(
|
||||
lwe_array_out_block, predicate->lwe_indexes_in, lwe_array_input_block,
|
||||
lwe_condition, predicate->lwe_indexes_in, params.big_lwe_dimension,
|
||||
params.message_modulus, 1);
|
||||
@@ -36,67 +35,64 @@ __host__ void zero_out_if(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
}
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, lwe_array_out, tmp_lwe_array_input, bsk, ksk, num_radix_blocks,
|
||||
predicate);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, tmp_lwe_array_input, bsks,
|
||||
ksks, num_radix_blocks, predicate);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_radix_cmux_kb(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
Torus *lwe_condition, Torus *lwe_array_true,
|
||||
Torus *lwe_array_false,
|
||||
int_cmux_buffer<Torus> *mem_ptr, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
__host__ void host_integer_radix_cmux_kb(
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lwe_array_out, Torus *lwe_condition, Torus *lwe_array_true,
|
||||
Torus *lwe_array_false, int_cmux_buffer<Torus> *mem_ptr, void **bsks,
|
||||
Torus **ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto params = mem_ptr->params;
|
||||
|
||||
// Since our CPU threads will be working on different streams we shall assert
|
||||
// the work in the main stream is completed
|
||||
stream->synchronize();
|
||||
auto true_stream = mem_ptr->zero_if_true_buffer->local_stream;
|
||||
auto false_stream = mem_ptr->zero_if_false_buffer->local_stream;
|
||||
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
// Both sections may be executed in parallel
|
||||
#pragma omp section
|
||||
{
|
||||
auto mem_true = mem_ptr->zero_if_true_buffer;
|
||||
zero_out_if(true_stream, mem_ptr->tmp_true_ct, lwe_array_true,
|
||||
lwe_condition, mem_true, mem_ptr->inverted_predicate_lut, bsk,
|
||||
ksk, num_radix_blocks);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
auto mem_false = mem_ptr->zero_if_false_buffer;
|
||||
zero_out_if(false_stream, mem_ptr->tmp_false_ct, lwe_array_false,
|
||||
lwe_condition, mem_false, mem_ptr->predicate_lut, bsk, ksk,
|
||||
num_radix_blocks);
|
||||
}
|
||||
auto true_streams = mem_ptr->zero_if_true_buffer->true_streams;
|
||||
auto false_streams = mem_ptr->zero_if_false_buffer->false_streams;
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
auto mem_true = mem_ptr->zero_if_true_buffer;
|
||||
zero_out_if(true_streams, gpu_indexes, gpu_count, mem_ptr->tmp_true_ct,
|
||||
lwe_array_true, lwe_condition, mem_true,
|
||||
mem_ptr->inverted_predicate_lut, bsks, ksks, num_radix_blocks);
|
||||
auto mem_false = mem_ptr->zero_if_false_buffer;
|
||||
zero_out_if(false_streams, gpu_indexes, gpu_count, mem_ptr->tmp_false_ct,
|
||||
lwe_array_false, lwe_condition, mem_false, mem_ptr->predicate_lut,
|
||||
bsks, ksks, num_radix_blocks);
|
||||
for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
|
||||
}
|
||||
for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
|
||||
}
|
||||
cuda_synchronize_stream(true_stream);
|
||||
cuda_synchronize_stream(false_stream);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
auto added_cts = mem_ptr->tmp_true_ct;
|
||||
host_addition(stream, added_cts, mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
|
||||
params.big_lwe_dimension, num_radix_blocks);
|
||||
host_addition(streams[0], gpu_indexes[0], added_cts, mem_ptr->tmp_true_ct,
|
||||
mem_ptr->tmp_false_ct, params.big_lwe_dimension,
|
||||
num_radix_blocks);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, lwe_array_out, added_cts, bsk, ksk, num_radix_blocks,
|
||||
mem_ptr->message_extract_lut);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
num_radix_blocks, mem_ptr->message_extract_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_radix_cmux_kb(
|
||||
cuda_stream_t *stream, int_cmux_buffer<Torus> **mem_ptr,
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
int_cmux_buffer<Torus> **mem_ptr,
|
||||
std::function<Torus(Torus)> predicate_lut_f, uint32_t num_radix_blocks,
|
||||
int_radix_params params, bool allocate_gpu_memory) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
*mem_ptr = new int_cmux_buffer<Torus>(stream, predicate_lut_f, params,
|
||||
*mem_ptr = new int_cmux_buffer<Torus>(streams, gpu_indexes, gpu_count,
|
||||
predicate_lut_f, params,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
#include "integer/comparison.cuh"
|
||||
|
||||
void scratch_cuda_integer_radix_comparison_kb_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, COMPARISON_TYPE op_type, bool is_signed,
|
||||
bool allocate_gpu_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
COMPARISON_TYPE op_type, bool is_signed, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -18,8 +18,9 @@ void scratch_cuda_integer_radix_comparison_kb_64(
|
||||
case EQ:
|
||||
case NE:
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
stream, (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks,
|
||||
params, op_type, false, allocate_gpu_memory);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params,
|
||||
op_type, false, allocate_gpu_memory);
|
||||
break;
|
||||
case GT:
|
||||
case GE:
|
||||
@@ -28,16 +29,17 @@ void scratch_cuda_integer_radix_comparison_kb_64(
|
||||
case MAX:
|
||||
case MIN:
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
stream, (int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks,
|
||||
params, op_type, is_signed, allocate_gpu_memory);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params,
|
||||
op_type, is_signed, allocate_gpu_memory);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, void *lwe_array_out, void *lwe_array_1,
|
||||
void *lwe_array_2, int8_t *mem_ptr, void *bsk, void *ksk,
|
||||
uint32_t num_radix_blocks) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void *lwe_array_1, void *lwe_array_2, int8_t *mem_ptr,
|
||||
void **bsks, void **ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
@@ -45,39 +47,43 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
case EQ:
|
||||
case NE:
|
||||
host_integer_radix_equality_check_kb<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_array_1),
|
||||
static_cast<uint64_t *>(lwe_array_2), buffer, bsk,
|
||||
static_cast<uint64_t *>(ksk), num_radix_blocks);
|
||||
static_cast<uint64_t *>(lwe_array_2), buffer, bsks, (uint64_t **)(ksks),
|
||||
num_radix_blocks);
|
||||
break;
|
||||
case GT:
|
||||
case GE:
|
||||
case LT:
|
||||
case LE:
|
||||
host_integer_radix_difference_check_kb<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_array_1),
|
||||
static_cast<uint64_t *>(lwe_array_2), buffer,
|
||||
buffer->diff_buffer->operator_f, bsk, static_cast<uint64_t *>(ksk),
|
||||
buffer->diff_buffer->operator_f, bsks, (uint64_t **)(ksks),
|
||||
num_radix_blocks);
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
host_integer_radix_maxmin_kb<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(lwe_array_out),
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<uint64_t *>(lwe_array_1),
|
||||
static_cast<uint64_t *>(lwe_array_2), buffer, bsk,
|
||||
static_cast<uint64_t *>(ksk), num_radix_blocks);
|
||||
static_cast<uint64_t *>(lwe_array_2), buffer, bsks, (uint64_t **)(ksks),
|
||||
num_radix_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error: integer operation not supported")
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_comparison(cuda_stream_t *stream,
|
||||
void cleanup_cuda_integer_comparison(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -33,16 +33,17 @@ __global__ void device_accumulate_all_blocks(Torus *output, Torus *input_block,
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void accumulate_all_blocks(cuda_stream_t *stream, Torus *output,
|
||||
Torus *input, uint32_t lwe_dimension,
|
||||
__host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *output, Torus *input,
|
||||
uint32_t lwe_dimension,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
cudaSetDevice(gpu_index);
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = (lwe_dimension + 1);
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
// Add all blocks and store in sum
|
||||
device_accumulate_all_blocks<<<num_blocks, num_threads, 0, stream->stream>>>(
|
||||
device_accumulate_all_blocks<<<num_blocks, num_threads, 0, stream>>>(
|
||||
output, input, lwe_dimension, num_radix_blocks);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
@@ -55,13 +56,13 @@ __host__ void accumulate_all_blocks(cuda_stream_t *stream, Torus *output,
|
||||
*
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
__host__ void are_all_comparisons_block_true(
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void **bsks, Torus **ksks,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
auto params = mem_ptr->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
@@ -76,9 +77,10 @@ are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
uint32_t total_modulus = message_modulus * carry_modulus;
|
||||
uint32_t max_value = total_modulus - 1;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
tmp_out, lwe_array_in,
|
||||
num_radix_blocks * (big_lwe_dimension + 1) * sizeof(Torus), stream);
|
||||
cuda_memcpy_async_gpu_to_gpu(tmp_out, lwe_array_in,
|
||||
num_radix_blocks * (big_lwe_dimension + 1) *
|
||||
sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
uint32_t remaining_blocks = num_radix_blocks;
|
||||
|
||||
@@ -91,17 +93,17 @@ are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = tmp_out;
|
||||
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
|
||||
auto is_equal_to_num_blocks_map =
|
||||
&are_all_block_true_buffer->is_equal_to_lut_map;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
accumulate_all_blocks(stream, accumulator, input_blocks,
|
||||
big_lwe_dimension, chunk_length);
|
||||
accumulate_all_blocks(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension, chunk_length);
|
||||
|
||||
accumulator += (big_lwe_dimension + 1);
|
||||
remaining_blocks -= (chunk_length - 1);
|
||||
input_blocks += (big_lwe_dimension + 1) * chunk_length;
|
||||
}
|
||||
accumulator = are_all_block_true_buffer->tmp_block_accumulated;
|
||||
auto is_equal_to_num_blocks_map =
|
||||
&are_all_block_true_buffer->is_equal_to_lut_map;
|
||||
|
||||
// Selects a LUT
|
||||
int_radix_lut<Torus> *lut;
|
||||
@@ -115,16 +117,20 @@ are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
lut = (*is_equal_to_num_blocks_map)[chunk_length];
|
||||
} else {
|
||||
// LUT needs to be computed
|
||||
auto new_lut = new int_radix_lut<Torus>(stream, params, max_value,
|
||||
num_radix_blocks, true);
|
||||
auto new_lut =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
max_value, num_radix_blocks, true);
|
||||
|
||||
auto is_equal_to_num_blocks_lut_f = [max_value,
|
||||
chunk_length](Torus x) -> Torus {
|
||||
return (x & max_value) == chunk_length;
|
||||
};
|
||||
generate_device_accumulator<Torus>(
|
||||
stream, new_lut->lut, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, is_equal_to_num_blocks_lut_f);
|
||||
streams[0], gpu_indexes[0], new_lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
is_equal_to_num_blocks_lut_f);
|
||||
|
||||
new_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
(*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
|
||||
lut = new_lut;
|
||||
@@ -135,11 +141,13 @@ are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
if (remaining_blocks == 1) {
|
||||
// In the last iteration we copy the output to the final address
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, lwe_array_out, accumulator, bsk, ksk, 1, lut);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
|
||||
ksks, 1, lut);
|
||||
return;
|
||||
} else {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, tmp_out, accumulator, bsk, ksk, num_chunks, lut);
|
||||
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
|
||||
num_chunks, lut);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -152,9 +160,12 @@ are_all_comparisons_block_true(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void is_at_least_one_comparisons_block_true(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *bsk, Torus *ksk,
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void **bsks, Torus **ksks,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
cudaSetDevice(gpu_indexes[0]);
|
||||
auto params = mem_ptr->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
auto message_modulus = params.message_modulus;
|
||||
@@ -165,9 +176,10 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
uint32_t total_modulus = message_modulus * carry_modulus;
|
||||
uint32_t max_value = total_modulus - 1;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
mem_ptr->tmp_lwe_array_out, lwe_array_in,
|
||||
num_radix_blocks * (big_lwe_dimension + 1) * sizeof(Torus), stream);
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->tmp_lwe_array_out, lwe_array_in,
|
||||
num_radix_blocks * (big_lwe_dimension + 1) *
|
||||
sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
uint32_t remaining_blocks = num_radix_blocks;
|
||||
while (remaining_blocks > 0) {
|
||||
@@ -180,8 +192,8 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
auto input_blocks = mem_ptr->tmp_lwe_array_out;
|
||||
auto accumulator = buffer->tmp_block_accumulated;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
accumulate_all_blocks(stream, accumulator, input_blocks,
|
||||
big_lwe_dimension, chunk_length);
|
||||
accumulate_all_blocks(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension, chunk_length);
|
||||
|
||||
accumulator += (big_lwe_dimension + 1);
|
||||
remaining_blocks -= (chunk_length - 1);
|
||||
@@ -196,12 +208,13 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
if (remaining_blocks == 1) {
|
||||
// In the last iteration we copy the output to the final address
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, lwe_array_out, accumulator, bsk, ksk, 1, lut);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
|
||||
ksks, 1, lut);
|
||||
return;
|
||||
} else {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, mem_ptr->tmp_lwe_array_out, accumulator, bsk, ksk, num_chunks,
|
||||
lut);
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->tmp_lwe_array_out,
|
||||
accumulator, bsks, ksks, num_chunks, lut);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -227,11 +240,11 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
// are_all_comparisons_block_true
|
||||
template <typename Torus>
|
||||
__host__ void host_compare_with_zero_equality(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *bsk, Torus *ksk,
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void **bsks, Torus **ksks,
|
||||
int32_t num_radix_blocks, int_radix_lut<Torus> *zero_comparison) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
auto params = mem_ptr->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
auto message_modulus = params.message_modulus;
|
||||
@@ -256,7 +269,8 @@ __host__ void host_compare_with_zero_equality(
|
||||
|
||||
if (num_radix_blocks == 1) {
|
||||
// Just copy
|
||||
cuda_memcpy_async_gpu_to_gpu(sum, lwe_array_in, big_lwe_size_bytes, stream);
|
||||
cuda_memcpy_async_gpu_to_gpu(sum, lwe_array_in, big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
num_sum_blocks = 1;
|
||||
} else {
|
||||
uint32_t remainder_blocks = num_radix_blocks;
|
||||
@@ -266,8 +280,8 @@ __host__ void host_compare_with_zero_equality(
|
||||
uint32_t chunk_size =
|
||||
std::min(remainder_blocks, num_elements_to_fill_carry);
|
||||
|
||||
accumulate_all_blocks(stream, sum_i, chunk, big_lwe_dimension,
|
||||
chunk_size);
|
||||
accumulate_all_blocks(streams[0], gpu_indexes[0], sum_i, chunk,
|
||||
big_lwe_dimension, chunk_size);
|
||||
|
||||
num_sum_blocks++;
|
||||
remainder_blocks -= (chunk_size - 1);
|
||||
@@ -279,40 +293,44 @@ __host__ void host_compare_with_zero_equality(
|
||||
}
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, sum, sum, bsk, ksk, num_sum_blocks, zero_comparison);
|
||||
are_all_comparisons_block_true(stream, lwe_array_out, sum, mem_ptr, bsk, ksk,
|
||||
num_sum_blocks);
|
||||
streams, gpu_indexes, gpu_count, sum, sum, bsks, ksks, num_sum_blocks,
|
||||
zero_comparison);
|
||||
are_all_comparisons_block_true(streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
sum, mem_ptr, bsks, ksks, num_sum_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_equality_check_kb(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_1,
|
||||
Torus *lwe_array_2, int_comparison_buffer<Torus> *mem_ptr, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lwe_array_out, Torus *lwe_array_1, Torus *lwe_array_2,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void **bsks, Torus **ksks,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
auto eq_buffer = mem_ptr->eq_buffer;
|
||||
|
||||
// Applies the LUT for the comparison operation
|
||||
auto comparisons = mem_ptr->tmp_block_comparisons;
|
||||
integer_radix_apply_bivariate_lookup_table_kb(
|
||||
stream, comparisons, lwe_array_1, lwe_array_2, bsk, ksk, num_radix_blocks,
|
||||
eq_buffer->operator_lut);
|
||||
streams, gpu_indexes, gpu_count, comparisons, lwe_array_1, lwe_array_2,
|
||||
bsks, ksks, num_radix_blocks, eq_buffer->operator_lut,
|
||||
eq_buffer->operator_lut->params.message_modulus);
|
||||
|
||||
// This takes a Vec of blocks, where each block is either 0 or 1.
|
||||
//
|
||||
// It returns a block encrypting 1 if all input blocks are 1
|
||||
// otherwise the block encrypts 0
|
||||
are_all_comparisons_block_true(stream, lwe_array_out, comparisons, mem_ptr,
|
||||
bsk, ksk, num_radix_blocks);
|
||||
are_all_comparisons_block_true(streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
comparisons, mem_ptr, bsks, ksks,
|
||||
num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
compare_radix_blocks_kb(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
compare_radix_blocks_kb(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out,
|
||||
Torus *lwe_array_left, Torus *lwe_array_right,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
int_comparison_buffer<Torus> *mem_ptr, void **bsks,
|
||||
Torus **ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto params = mem_ptr->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
@@ -334,21 +352,21 @@ compare_radix_blocks_kb(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
|
||||
// Subtract
|
||||
// Here we need the true lwe sub, not the one that comes from shortint.
|
||||
host_subtraction(stream, lwe_array_out, lwe_array_left, lwe_array_right,
|
||||
big_lwe_dimension, num_radix_blocks);
|
||||
host_subtraction(streams[0], gpu_indexes[0], lwe_array_out, lwe_array_left,
|
||||
lwe_array_right, big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
// Apply LUT to compare to 0
|
||||
auto is_non_zero_lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, lwe_array_out, lwe_array_out, bsk, ksk, num_radix_blocks,
|
||||
is_non_zero_lut);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_out, bsks, ksks,
|
||||
num_radix_blocks, is_non_zero_lut);
|
||||
|
||||
// Add one
|
||||
// Here Lhs can have the following values: (-1) % (message modulus * carry
|
||||
// modulus), 0, 1 So the output values after the addition will be: 0, 1, 2
|
||||
host_integer_radix_add_scalar_one_inplace(stream, lwe_array_out,
|
||||
big_lwe_dimension, num_radix_blocks,
|
||||
message_modulus, carry_modulus);
|
||||
host_integer_radix_add_scalar_one_inplace(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, big_lwe_dimension,
|
||||
num_radix_blocks, message_modulus, carry_modulus);
|
||||
}
|
||||
|
||||
// Reduces a vec containing shortint blocks that encrypts a sign
|
||||
@@ -356,13 +374,13 @@ compare_radix_blocks_kb(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
// final sign
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
tree_sign_reduction(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
tree_sign_reduction(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out,
|
||||
Torus *lwe_block_comparisons,
|
||||
int_tree_sign_reduction_buffer<Torus> *tree_buffer,
|
||||
std::function<Torus(Torus)> sign_handler_f, void *bsk,
|
||||
Torus *ksk, uint32_t num_radix_blocks) {
|
||||
std::function<Torus(Torus)> sign_handler_f, void **bsks,
|
||||
Torus **ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
auto params = tree_buffer->params;
|
||||
auto big_lwe_dimension = params.big_lwe_dimension;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
@@ -381,16 +399,19 @@ tree_sign_reduction(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
auto y = tree_buffer->tmp_y;
|
||||
if (x != lwe_block_comparisons)
|
||||
cuda_memcpy_async_gpu_to_gpu(x, lwe_block_comparisons,
|
||||
big_lwe_size_bytes * num_radix_blocks, stream);
|
||||
big_lwe_size_bytes * num_radix_blocks,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
uint32_t partial_block_count = num_radix_blocks;
|
||||
|
||||
auto inner_tree_leaf = tree_buffer->tree_inner_leaf_lut;
|
||||
while (partial_block_count > 2) {
|
||||
pack_blocks(stream, y, x, big_lwe_dimension, partial_block_count, 4);
|
||||
pack_blocks(streams[0], gpu_indexes[0], y, x, big_lwe_dimension,
|
||||
partial_block_count, 4);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
stream, x, y, bsk, ksk, partial_block_count >> 1, inner_tree_leaf);
|
||||
streams, gpu_indexes, gpu_count, x, y, bsks, ksks,
|
||||
partial_block_count >> 1, inner_tree_leaf);
|
||||
|
||||
if ((partial_block_count % 2) != 0) {
|
||||
partial_block_count >>= 1;
|
||||
@@ -400,7 +421,8 @@ tree_sign_reduction(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
auto last_x_block = x + (partial_block_count - 1) * big_lwe_size;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(last_x_block, last_y_block,
|
||||
big_lwe_size_bytes, stream);
|
||||
big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
} else {
|
||||
partial_block_count >>= 1;
|
||||
}
|
||||
@@ -411,7 +433,8 @@ tree_sign_reduction(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
std::function<Torus(Torus)> f;
|
||||
|
||||
if (partial_block_count == 2) {
|
||||
pack_blocks(stream, y, x, big_lwe_dimension, partial_block_count, 4);
|
||||
pack_blocks(streams[0], gpu_indexes[0], y, x, big_lwe_dimension,
|
||||
partial_block_count, 4);
|
||||
|
||||
f = [block_selector_f, sign_handler_f](Torus x) -> Torus {
|
||||
int msb = (x >> 2) & 3;
|
||||
@@ -425,23 +448,25 @@ tree_sign_reduction(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
y = x;
|
||||
f = sign_handler_f;
|
||||
}
|
||||
generate_device_accumulator<Torus>(stream, last_lut->lut, glwe_dimension,
|
||||
polynomial_size, message_modulus,
|
||||
carry_modulus, f);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
// Last leaf
|
||||
integer_radix_apply_univariate_lookup_table_kb(stream, lwe_array_out, y, bsk,
|
||||
ksk, 1, last_lut);
|
||||
integer_radix_apply_univariate_lookup_table_kb(streams, gpu_indexes,
|
||||
gpu_count, lwe_array_out, y,
|
||||
bsks, ksks, 1, last_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_radix_difference_check_kb(
|
||||
cuda_stream_t *stream, Torus *lwe_array_out, Torus *lwe_array_left,
|
||||
Torus *lwe_array_right, int_comparison_buffer<Torus> *mem_ptr,
|
||||
std::function<Torus(Torus)> reduction_lut_f, void *bsk, Torus *ksk,
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
Torus *lwe_array_out, Torus *lwe_array_left, Torus *lwe_array_right,
|
||||
int_comparison_buffer<Torus> *mem_ptr,
|
||||
std::function<Torus(Torus)> reduction_lut_f, void **bsks, Torus **ksks,
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
auto diff_buffer = mem_ptr->diff_buffer;
|
||||
|
||||
auto params = mem_ptr->params;
|
||||
@@ -463,21 +488,21 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
if (mem_ptr->is_signed) {
|
||||
packed_num_radix_blocks -= 2;
|
||||
}
|
||||
pack_blocks(stream, packed_left, lwe_array_left, big_lwe_dimension,
|
||||
packed_num_radix_blocks, message_modulus);
|
||||
pack_blocks(stream, packed_right, lwe_array_right, big_lwe_dimension,
|
||||
packed_num_radix_blocks, message_modulus);
|
||||
pack_blocks(streams[0], gpu_indexes[0], packed_left, lwe_array_left,
|
||||
big_lwe_dimension, packed_num_radix_blocks, message_modulus);
|
||||
pack_blocks(streams[0], gpu_indexes[0], packed_right, lwe_array_right,
|
||||
big_lwe_dimension, packed_num_radix_blocks, message_modulus);
|
||||
// From this point we have half number of blocks
|
||||
packed_num_radix_blocks /= 2;
|
||||
|
||||
// Clean noise
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, packed_left, packed_left, bsk, ksk, packed_num_radix_blocks,
|
||||
identity_lut);
|
||||
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, packed_right, packed_right, bsk, ksk, packed_num_radix_blocks,
|
||||
identity_lut);
|
||||
streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
|
||||
lhs = packed_left;
|
||||
rhs = packed_right;
|
||||
@@ -492,14 +517,15 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
if (!mem_ptr->is_signed) {
|
||||
// Compare packed blocks, or simply the total number of radix blocks in the
|
||||
// inputs
|
||||
compare_radix_blocks_kb(stream, comparisons, lhs, rhs, mem_ptr, bsk, ksk,
|
||||
packed_num_radix_blocks);
|
||||
compare_radix_blocks_kb(streams, gpu_indexes, gpu_count, comparisons, lhs,
|
||||
rhs, mem_ptr, bsks, ksks, packed_num_radix_blocks);
|
||||
num_comparisons = packed_num_radix_blocks;
|
||||
} else {
|
||||
// Packing is possible
|
||||
if (carry_modulus >= message_modulus) {
|
||||
// Compare (num_radix_blocks - 2) / 2 packed blocks
|
||||
compare_radix_blocks_kb(stream, comparisons, lhs, rhs, mem_ptr, bsk, ksk,
|
||||
compare_radix_blocks_kb(streams, gpu_indexes, gpu_count, comparisons, lhs,
|
||||
rhs, mem_ptr, bsks, ksks,
|
||||
packed_num_radix_blocks);
|
||||
|
||||
// Compare the last block before the sign block separately
|
||||
@@ -510,35 +536,38 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
diff_buffer->tmp_packed_right +
|
||||
packed_num_radix_blocks * big_lwe_size;
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, last_left_block_before_sign_block,
|
||||
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsk, ksk, 1,
|
||||
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
|
||||
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
|
||||
identity_lut);
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, last_right_block_before_sign_block,
|
||||
lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsk, ksk, 1,
|
||||
identity_lut);
|
||||
streams, gpu_indexes, gpu_count, last_right_block_before_sign_block,
|
||||
lwe_array_right + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks,
|
||||
1, identity_lut);
|
||||
compare_radix_blocks_kb(
|
||||
stream, comparisons + packed_num_radix_blocks * big_lwe_size,
|
||||
streams, gpu_indexes, gpu_count,
|
||||
comparisons + packed_num_radix_blocks * big_lwe_size,
|
||||
last_left_block_before_sign_block, last_right_block_before_sign_block,
|
||||
mem_ptr, bsk, ksk, 1);
|
||||
mem_ptr, bsks, ksks, 1);
|
||||
// Compare the sign block separately
|
||||
integer_radix_apply_bivariate_lookup_table_kb(
|
||||
stream, comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
|
||||
streams, gpu_indexes, gpu_count,
|
||||
comparisons + (packed_num_radix_blocks + 1) * big_lwe_size,
|
||||
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
|
||||
lwe_array_right + (num_radix_blocks - 1) * big_lwe_size, bsk, ksk, 1,
|
||||
mem_ptr->signed_lut);
|
||||
lwe_array_right + (num_radix_blocks - 1) * big_lwe_size, bsks, ksks,
|
||||
1, mem_ptr->signed_lut, mem_ptr->signed_lut->params.message_modulus);
|
||||
num_comparisons = packed_num_radix_blocks + 2;
|
||||
|
||||
} else {
|
||||
compare_radix_blocks_kb(stream, comparisons, lwe_array_left,
|
||||
lwe_array_right, mem_ptr, bsk, ksk,
|
||||
num_radix_blocks - 1);
|
||||
compare_radix_blocks_kb(streams, gpu_indexes, gpu_count, comparisons,
|
||||
lwe_array_left, lwe_array_right, mem_ptr, bsks,
|
||||
ksks, num_radix_blocks - 1);
|
||||
// Compare the sign block separately
|
||||
integer_radix_apply_bivariate_lookup_table_kb(
|
||||
stream, comparisons + (num_radix_blocks - 1) * big_lwe_size,
|
||||
streams, gpu_indexes, gpu_count,
|
||||
comparisons + (num_radix_blocks - 1) * big_lwe_size,
|
||||
lwe_array_left + (num_radix_blocks - 1) * big_lwe_size,
|
||||
lwe_array_right + (num_radix_blocks - 1) * big_lwe_size, bsk, ksk, 1,
|
||||
mem_ptr->signed_lut);
|
||||
lwe_array_right + (num_radix_blocks - 1) * big_lwe_size, bsks, ksks,
|
||||
1, mem_ptr->signed_lut, mem_ptr->signed_lut->params.message_modulus);
|
||||
num_comparisons = num_radix_blocks;
|
||||
}
|
||||
}
|
||||
@@ -546,39 +575,42 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
// Reduces a vec containing radix blocks that encrypts a sign
|
||||
// (inferior, equal, superior) to one single radix block containing the
|
||||
// final sign
|
||||
tree_sign_reduction(stream, lwe_array_out, comparisons,
|
||||
mem_ptr->diff_buffer->tree_buffer, reduction_lut_f, bsk,
|
||||
ksk, num_comparisons);
|
||||
tree_sign_reduction(streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
comparisons, mem_ptr->diff_buffer->tree_buffer,
|
||||
reduction_lut_f, bsks, ksks, num_comparisons);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_radix_comparison_check_kb(
|
||||
cuda_stream_t *stream, int_comparison_buffer<Torus> **mem_ptr,
|
||||
uint32_t num_radix_blocks, int_radix_params params, COMPARISON_TYPE op,
|
||||
bool is_signed, bool allocate_gpu_memory) {
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
int_comparison_buffer<Torus> **mem_ptr, uint32_t num_radix_blocks,
|
||||
int_radix_params params, COMPARISON_TYPE op, bool is_signed,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
*mem_ptr = new int_comparison_buffer<Torus>(
|
||||
stream, op, params, num_radix_blocks, is_signed, allocate_gpu_memory);
|
||||
*mem_ptr = new int_comparison_buffer<Torus>(streams, gpu_indexes, gpu_count,
|
||||
op, params, num_radix_blocks,
|
||||
is_signed, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_radix_maxmin_kb(cuda_stream_t *stream, Torus *lwe_array_out,
|
||||
host_integer_radix_maxmin_kb(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out,
|
||||
Torus *lwe_array_left, Torus *lwe_array_right,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *bsk,
|
||||
Torus *ksk, uint32_t total_num_radix_blocks) {
|
||||
int_comparison_buffer<Torus> *mem_ptr, void **bsks,
|
||||
Torus **ksks, uint32_t total_num_radix_blocks) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
// Compute the sign
|
||||
host_integer_radix_difference_check_kb(
|
||||
stream, mem_ptr->tmp_lwe_array_out, lwe_array_left, lwe_array_right,
|
||||
mem_ptr, mem_ptr->identity_lut_f, bsk, ksk, total_num_radix_blocks);
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->tmp_lwe_array_out,
|
||||
lwe_array_left, lwe_array_right, mem_ptr, mem_ptr->identity_lut_f, bsks,
|
||||
ksks, total_num_radix_blocks);
|
||||
|
||||
// Selector
|
||||
host_integer_radix_cmux_kb(
|
||||
stream, lwe_array_out, mem_ptr->tmp_lwe_array_out, lwe_array_left,
|
||||
lwe_array_right, mem_ptr->cmux_buffer, bsk, ksk, total_num_radix_blocks);
|
||||
host_integer_radix_cmux_kb(streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
mem_ptr->tmp_lwe_array_out, lwe_array_left,
|
||||
lwe_array_right, mem_ptr->cmux_buffer, bsks, ksks,
|
||||
total_num_radix_blocks);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
#include "integer/div_rem.cuh"
|
||||
|
||||
void scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -14,70 +14,29 @@ void scratch_cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_div_rem_kb<uint64_t>(
|
||||
stream, (int_div_rem_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_div_rem_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_div_rem_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, void *quotient, void *remainder, void *numerator,
|
||||
void *divisor, int8_t *mem_ptr, void *bsk, void *ksk, uint32_t num_blocks) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, void *quotient,
|
||||
void *remainder, void *numerator, void *divisor, int8_t *mem_ptr,
|
||||
void **bsks, void **ksks, uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_div_rem_memory<uint64_t> *)mem_ptr;
|
||||
|
||||
switch (mem->params.polynomial_size) {
|
||||
case 512:
|
||||
host_integer_div_rem_kb<uint64_t, Degree<512>>(
|
||||
stream, static_cast<uint64_t *>(quotient),
|
||||
static_cast<uint64_t *>(remainder), static_cast<uint64_t *>(numerator),
|
||||
static_cast<uint64_t *>(divisor), bsk, static_cast<uint64_t *>(ksk),
|
||||
mem, num_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
|
||||
host_integer_div_rem_kb<uint64_t, Degree<1024>>(
|
||||
stream, static_cast<uint64_t *>(quotient),
|
||||
static_cast<uint64_t *>(remainder), static_cast<uint64_t *>(numerator),
|
||||
static_cast<uint64_t *>(divisor), bsk, static_cast<uint64_t *>(ksk),
|
||||
mem, num_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_div_rem_kb<uint64_t, Degree<2048>>(
|
||||
stream, static_cast<uint64_t *>(quotient),
|
||||
static_cast<uint64_t *>(remainder), static_cast<uint64_t *>(numerator),
|
||||
static_cast<uint64_t *>(divisor), bsk, static_cast<uint64_t *>(ksk),
|
||||
mem, num_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_div_rem_kb<uint64_t, Degree<4096>>(
|
||||
stream, static_cast<uint64_t *>(quotient),
|
||||
static_cast<uint64_t *>(remainder), static_cast<uint64_t *>(numerator),
|
||||
static_cast<uint64_t *>(divisor), bsk, static_cast<uint64_t *>(ksk),
|
||||
mem, num_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_div_rem_kb<uint64_t, Degree<8192>>(
|
||||
stream, static_cast<uint64_t *>(quotient),
|
||||
static_cast<uint64_t *>(remainder), static_cast<uint64_t *>(numerator),
|
||||
static_cast<uint64_t *>(divisor), bsk, static_cast<uint64_t *>(ksk),
|
||||
mem, num_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_div_rem_kb<uint64_t, Degree<16384>>(
|
||||
stream, static_cast<uint64_t *>(quotient),
|
||||
static_cast<uint64_t *>(remainder), static_cast<uint64_t *>(numerator),
|
||||
static_cast<uint64_t *>(divisor), bsk, static_cast<uint64_t *>(ksk),
|
||||
mem, num_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer div_rem): unsupported polynomial size. "
|
||||
"Only N = 512, 1024, 2048, 4096, 8192, 16384 is supported")
|
||||
}
|
||||
host_integer_div_rem_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(quotient), static_cast<uint64_t *>(remainder),
|
||||
static_cast<uint64_t *>(numerator), static_cast<uint64_t *>(divisor),
|
||||
bsks, (uint64_t **)(ksks), mem, num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_div_rem(cuda_stream_t *stream,
|
||||
int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_integer_div_rem(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, int8_t **mem_ptr_void) {
|
||||
int_div_rem_memory<uint64_t> *mem_ptr =
|
||||
(int_div_rem_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <omp.h>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
@@ -31,17 +30,13 @@ template <typename Torus> struct lwe_ciphertext_list {
|
||||
int_radix_params params;
|
||||
|
||||
size_t big_lwe_size;
|
||||
size_t radix_size;
|
||||
size_t big_lwe_size_bytes;
|
||||
size_t radix_size_bytes;
|
||||
size_t big_lwe_dimension;
|
||||
|
||||
lwe_ciphertext_list(Torus *src, int_radix_params params, size_t max_blocks)
|
||||
: data(src), params(params), max_blocks(max_blocks) {
|
||||
big_lwe_size = params.big_lwe_dimension + 1;
|
||||
big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
radix_size = max_blocks * big_lwe_size;
|
||||
radix_size_bytes = radix_size * sizeof(Torus);
|
||||
big_lwe_dimension = params.big_lwe_dimension;
|
||||
len = max_blocks;
|
||||
}
|
||||
@@ -49,42 +44,44 @@ template <typename Torus> struct lwe_ciphertext_list {
|
||||
// copies ciphertexts from Torus*, starting from `starting_block` including
|
||||
// `finish_block`, does not change the value of self len
|
||||
void copy_from(Torus *src, size_t start_block, size_t finish_block,
|
||||
cuda_stream_t *stream) {
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
size_t tmp_len = finish_block - start_block + 1;
|
||||
cuda_memcpy_async_gpu_to_gpu(data, &src[start_block * big_lwe_size],
|
||||
tmp_len * big_lwe_size_bytes, stream);
|
||||
tmp_len * big_lwe_size_bytes, stream,
|
||||
gpu_index);
|
||||
}
|
||||
|
||||
// copies ciphertexts from lwe_ciphertext_list, starting from `starting_block`
|
||||
// including `finish_block`, does not change the value of self len
|
||||
void copy_from(const lwe_ciphertext_list &src, size_t start_block,
|
||||
size_t finish_block, cuda_stream_t *stream) {
|
||||
copy_from(src.data, start_block, finish_block, stream);
|
||||
size_t finish_block, cudaStream_t stream, uint32_t gpu_index) {
|
||||
copy_from(src.data, start_block, finish_block, stream, gpu_index);
|
||||
}
|
||||
|
||||
// copies ciphertexts from Torus*, starting from `starting_block`
|
||||
// including `finish_block`, updating the value of self len
|
||||
void clone_from(Torus *src, size_t start_block, size_t finish_block,
|
||||
cuda_stream_t *stream) {
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
len = finish_block - start_block + 1;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(data, &src[start_block * big_lwe_size],
|
||||
len * big_lwe_size_bytes, stream);
|
||||
len * big_lwe_size_bytes, stream, gpu_index);
|
||||
}
|
||||
|
||||
// copies ciphertexts from ciphertexts_list, starting from `starting_block`
|
||||
// including `finish_block`, updating the value of self len
|
||||
void clone_from(const lwe_ciphertext_list &src, size_t start_block,
|
||||
size_t finish_block, cuda_stream_t *stream) {
|
||||
clone_from(src.data, start_block, finish_block, stream);
|
||||
size_t finish_block, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
clone_from(src.data, start_block, finish_block, stream, gpu_index);
|
||||
}
|
||||
|
||||
// assign zero to blocks starting from `start_block` including `finish_block`
|
||||
void assign_zero(size_t start_block, size_t finish_block,
|
||||
cuda_stream_t *stream) {
|
||||
void assign_zero(size_t start_block, size_t finish_block, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
auto size = finish_block - start_block + 1;
|
||||
cuda_memset_async(&data[start_block * big_lwe_size], 0,
|
||||
size * big_lwe_size_bytes, stream);
|
||||
size * big_lwe_size_bytes, stream, gpu_index);
|
||||
}
|
||||
|
||||
// return pointer to last block
|
||||
@@ -110,7 +107,8 @@ template <typename Torus> struct lwe_ciphertext_list {
|
||||
}
|
||||
|
||||
// insert ciphertext at index `ind`
|
||||
void insert(size_t ind, Torus *ciphertext_block, cuda_stream_t *stream) {
|
||||
void insert(size_t ind, Torus *ciphertext_block, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
assert(ind <= len);
|
||||
assert(len < max_blocks);
|
||||
|
||||
@@ -119,33 +117,34 @@ template <typename Torus> struct lwe_ciphertext_list {
|
||||
for (size_t i = len; i > ind; i--) {
|
||||
Torus *src = &data[(i - 1) * big_lwe_size];
|
||||
Torus *dst = &data[i * big_lwe_size];
|
||||
cuda_memcpy_async_gpu_to_gpu(dst, src, big_lwe_size_bytes, stream);
|
||||
cuda_memcpy_async_gpu_to_gpu(dst, src, big_lwe_size_bytes, stream,
|
||||
gpu_index);
|
||||
}
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(&data[insert_offset], ciphertext_block,
|
||||
big_lwe_size_bytes, stream);
|
||||
big_lwe_size_bytes, stream, gpu_index);
|
||||
len++;
|
||||
}
|
||||
|
||||
// push ciphertext at the end of `data`
|
||||
void push(Torus *ciphertext_block, cuda_stream_t *stream) {
|
||||
void push(Torus *ciphertext_block, cudaStream_t stream, uint32_t gpu_index) {
|
||||
assert(len < max_blocks);
|
||||
|
||||
size_t offset = len * big_lwe_size;
|
||||
cuda_memcpy_async_gpu_to_gpu(&data[offset], ciphertext_block,
|
||||
big_lwe_size_bytes, stream);
|
||||
big_lwe_size_bytes, stream, gpu_index);
|
||||
len++;
|
||||
}
|
||||
|
||||
// duplicate ciphertext into `number_of_blocks` ciphertexts
|
||||
void fill_with_same_ciphertext(Torus *ciphertext, size_t number_of_blocks,
|
||||
cuda_stream_t *stream) {
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
assert(number_of_blocks <= max_blocks);
|
||||
|
||||
for (size_t i = 0; i < number_of_blocks; i++) {
|
||||
Torus *dest = &data[i * big_lwe_size];
|
||||
cuda_memcpy_async_gpu_to_gpu(dest, ciphertext, big_lwe_size_bytes,
|
||||
stream);
|
||||
cuda_memcpy_async_gpu_to_gpu(dest, ciphertext, big_lwe_size_bytes, stream,
|
||||
gpu_index);
|
||||
}
|
||||
|
||||
len = number_of_blocks;
|
||||
@@ -161,20 +160,21 @@ template <typename Torus> struct lwe_ciphertext_list {
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_div_rem_kb(
|
||||
cuda_stream_t *stream, int_div_rem_memory<Torus> **mem_ptr,
|
||||
uint32_t num_blocks, int_radix_params params, bool allocate_gpu_memory) {
|
||||
cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
int_div_rem_memory<Torus> **mem_ptr, uint32_t num_blocks,
|
||||
int_radix_params params, bool allocate_gpu_memory) {
|
||||
|
||||
cudaSetDevice(stream->gpu_index);
|
||||
*mem_ptr = new int_div_rem_memory<Torus>(stream, params, num_blocks,
|
||||
allocate_gpu_memory);
|
||||
*mem_ptr = new int_div_rem_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
Torus *remainder, Torus *numerator,
|
||||
Torus *divisor, void *bsk, uint64_t *ksk,
|
||||
int_div_rem_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
template <typename Torus>
|
||||
__host__ void
|
||||
host_integer_div_rem_kb(cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *quotient, Torus *remainder,
|
||||
Torus *numerator, Torus *divisor, void **bsks,
|
||||
uint64_t **ksks, int_div_rem_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
@@ -222,11 +222,13 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
lwe_ciphertext_list<Torus> cleaned_merged_interesting_remainder(
|
||||
mem_ptr->cleaned_merged_interesting_remainder, radix_params, num_blocks);
|
||||
|
||||
numerator_block_stack.clone_from(numerator, 0, num_blocks - 1, stream);
|
||||
remainder1.assign_zero(0, num_blocks - 1, stream);
|
||||
remainder2.assign_zero(0, num_blocks - 1, stream);
|
||||
numerator_block_stack.clone_from(numerator, 0, num_blocks - 1, streams[0],
|
||||
gpu_indexes[0]);
|
||||
remainder1.assign_zero(0, num_blocks - 1, streams[0], gpu_indexes[0]);
|
||||
remainder2.assign_zero(0, num_blocks - 1, streams[0], gpu_indexes[0]);
|
||||
|
||||
cuda_memset_async(quotient, 0, big_lwe_size_bytes * num_blocks, stream);
|
||||
cuda_memset_async(quotient, 0, big_lwe_size_bytes * num_blocks, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
for (int i = total_bits - 1; i >= 0; i--) {
|
||||
uint32_t block_of_bit = i / num_bits_in_message;
|
||||
@@ -240,76 +242,79 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
uint32_t first_trivial_block = last_non_trivial_block + 1;
|
||||
|
||||
interesting_remainder1.clone_from(remainder1, 0, last_non_trivial_block,
|
||||
stream);
|
||||
streams[0], gpu_indexes[0]);
|
||||
interesting_remainder2.clone_from(remainder2, 0, last_non_trivial_block,
|
||||
stream);
|
||||
interesting_divisor.clone_from(divisor, 0, last_non_trivial_block, stream);
|
||||
streams[0], gpu_indexes[0]);
|
||||
interesting_divisor.clone_from(divisor, 0, last_non_trivial_block,
|
||||
streams[0], gpu_indexes[0]);
|
||||
divisor_ms_blocks.clone_from(divisor,
|
||||
(msb_bit_set + 1) / num_bits_in_message,
|
||||
num_blocks - 1, stream);
|
||||
num_blocks - 1, streams[0], gpu_indexes[0]);
|
||||
|
||||
// We split the divisor at a block position, when in reality the split
|
||||
// should be at a bit position meaning that potentially (depending on
|
||||
// msb_bit_set) the split versions share some bits they should not. So we do
|
||||
// one PBS on the last block of the interesting_divisor, and first block of
|
||||
// divisor_ms_blocks to trim out bits which should not be there
|
||||
auto trim_last_interesting_divisor_bits = [&](cuda_stream_t *stream) {
|
||||
if ((msb_bit_set + 1) % num_bits_in_message == 0) {
|
||||
return;
|
||||
}
|
||||
// The last block of the interesting part of the remainder
|
||||
// can contain bits which we should not account for
|
||||
// we have to zero them out.
|
||||
auto trim_last_interesting_divisor_bits =
|
||||
[&](cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count) {
|
||||
if ((msb_bit_set + 1) % num_bits_in_message == 0) {
|
||||
return;
|
||||
}
|
||||
// The last block of the interesting part of the remainder
|
||||
// can contain bits which we should not account for
|
||||
// we have to zero them out.
|
||||
|
||||
// Where the msb is set in the block
|
||||
uint32_t pos_in_block = msb_bit_set % num_bits_in_message;
|
||||
// Where the msb is set in the block
|
||||
uint32_t pos_in_block = msb_bit_set % num_bits_in_message;
|
||||
|
||||
// e.g 2 bits in message:
|
||||
// if pos_in_block is 0, then we want to keep only first bit (right
|
||||
// shift
|
||||
// mask by 1) if pos_in_block is 1, then we want to keep the two
|
||||
// bits
|
||||
// (right shift mask by 0)
|
||||
uint32_t shift_amount = num_bits_in_message - (pos_in_block + 1);
|
||||
// e.g 2 bits in message:
|
||||
// if pos_in_block is 0, then we want to keep only first bit (right
|
||||
// shift
|
||||
// mask by 1) if pos_in_block is 1, then we want to keep the two
|
||||
// bits
|
||||
// (right shift mask by 0)
|
||||
uint32_t shift_amount = num_bits_in_message - (pos_in_block + 1);
|
||||
|
||||
// Create mask of 1s on the message part, 0s in the carries
|
||||
uint32_t full_message_mask = message_modulus - 1;
|
||||
// Create mask of 1s on the message part, 0s in the carries
|
||||
uint32_t full_message_mask = message_modulus - 1;
|
||||
|
||||
// Shift the mask so that we will only keep bits we should
|
||||
uint32_t shifted_mask = full_message_mask >> shift_amount;
|
||||
// Shift the mask so that we will only keep bits we should
|
||||
uint32_t shifted_mask = full_message_mask >> shift_amount;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, interesting_divisor.last_block(),
|
||||
interesting_divisor.last_block(), bsk, ksk, 1,
|
||||
mem_ptr->masking_luts_1[shifted_mask]);
|
||||
}; // trim_last_interesting_divisor_bits
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
streams, gpu_indexes, gpu_count, interesting_divisor.last_block(),
|
||||
interesting_divisor.last_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_1[shifted_mask]);
|
||||
}; // trim_last_interesting_divisor_bits
|
||||
|
||||
auto trim_first_divisor_ms_bits = [&](cuda_stream_t *stream) {
|
||||
if (divisor_ms_blocks.is_empty() ||
|
||||
((msb_bit_set + 1) % num_bits_in_message) == 0) {
|
||||
return;
|
||||
}
|
||||
// Where the msb is set in the block
|
||||
uint32_t pos_in_block = msb_bit_set % num_bits_in_message;
|
||||
auto trim_first_divisor_ms_bits =
|
||||
[&](cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count) {
|
||||
if (divisor_ms_blocks.is_empty() ||
|
||||
((msb_bit_set + 1) % num_bits_in_message) == 0) {
|
||||
return;
|
||||
}
|
||||
// Where the msb is set in the block
|
||||
uint32_t pos_in_block = msb_bit_set % num_bits_in_message;
|
||||
|
||||
// e.g 2 bits in message:
|
||||
// if pos_in_block is 0, then we want to discard the first bit (left shift
|
||||
// mask by 1) if pos_in_block is 1, then we want to discard the two bits
|
||||
// (left shift mask by 2) let shift_amount = num_bits_in_message -
|
||||
// pos_in_block
|
||||
uint32_t shift_amount = pos_in_block + 1;
|
||||
uint32_t full_message_mask = message_modulus - 1;
|
||||
uint32_t shifted_mask = full_message_mask << shift_amount;
|
||||
// e.g 2 bits in message:
|
||||
// if pos_in_block is 0, then we want to discard the first bit (left
|
||||
// shift mask by 1) if pos_in_block is 1, then we want to discard the
|
||||
// two bits (left shift mask by 2) let shift_amount =
|
||||
// num_bits_in_message - pos_in_block
|
||||
uint32_t shift_amount = pos_in_block + 1;
|
||||
uint32_t full_message_mask = message_modulus - 1;
|
||||
uint32_t shifted_mask = full_message_mask << shift_amount;
|
||||
|
||||
// Keep the mask within the range of message bits, so that
|
||||
// the estimated degree of the output is < msg_modulus
|
||||
shifted_mask = shifted_mask & full_message_mask;
|
||||
// Keep the mask within the range of message bits, so that
|
||||
// the estimated degree of the output is < msg_modulus
|
||||
shifted_mask = shifted_mask & full_message_mask;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, divisor_ms_blocks.first_block(),
|
||||
divisor_ms_blocks.first_block(), bsk, ksk, 1,
|
||||
mem_ptr->masking_luts_2[shifted_mask]);
|
||||
}; // trim_first_divisor_ms_bits
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
streams, gpu_indexes, gpu_count, divisor_ms_blocks.first_block(),
|
||||
divisor_ms_blocks.first_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_2[shifted_mask]);
|
||||
}; // trim_first_divisor_ms_bits
|
||||
|
||||
// This does
|
||||
// R := R << 1; R(0) := N(i)
|
||||
@@ -320,74 +325,70 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
// However, to keep the remainder clean (noise wise), what we do is that we
|
||||
// put the remainder block from which we need to extract the bit, as the LSB
|
||||
// of the Remainder, so that left shifting will pull the bit we need.
|
||||
auto left_shift_interesting_remainder1 = [&](cuda_stream_t *stream3) {
|
||||
numerator_block_1.clone_from(numerator_block_stack,
|
||||
numerator_block_stack.len - 1,
|
||||
numerator_block_stack.len - 1, stream3);
|
||||
numerator_block_stack.pop();
|
||||
interesting_remainder1.insert(0, numerator_block_1.first_block(),
|
||||
stream3);
|
||||
auto left_shift_interesting_remainder1 =
|
||||
[&](cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count) {
|
||||
numerator_block_1.clone_from(
|
||||
numerator_block_stack, numerator_block_stack.len - 1,
|
||||
numerator_block_stack.len - 1, streams[0], gpu_indexes[0]);
|
||||
numerator_block_stack.pop();
|
||||
interesting_remainder1.insert(0, numerator_block_1.first_block(),
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
stream3, interesting_remainder1.data, 1, mem_ptr->shift_mem_1, bsk,
|
||||
ksk, interesting_remainder1.len);
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder1.data, 1,
|
||||
mem_ptr->shift_mem_1, bsks, ksks, interesting_remainder1.len);
|
||||
|
||||
tmp_radix.clone_from(interesting_remainder1, 0,
|
||||
interesting_remainder1.len - 1, stream3);
|
||||
tmp_radix.clone_from(interesting_remainder1, 0,
|
||||
interesting_remainder1.len - 1, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
radix_blocks_rotate_left<<<interesting_remainder1.len, 256, 0,
|
||||
stream3->stream>>>(
|
||||
interesting_remainder1.data, tmp_radix.data, 1,
|
||||
interesting_remainder1.len, big_lwe_size);
|
||||
host_radix_blocks_rotate_left(
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder1.data,
|
||||
tmp_radix.data, 1, interesting_remainder1.len, big_lwe_size);
|
||||
|
||||
numerator_block_1.clone_from(interesting_remainder1,
|
||||
interesting_remainder1.len - 1,
|
||||
interesting_remainder1.len - 1, stream3);
|
||||
numerator_block_1.clone_from(
|
||||
interesting_remainder1, interesting_remainder1.len - 1,
|
||||
interesting_remainder1.len - 1, streams[0], gpu_indexes[0]);
|
||||
|
||||
interesting_remainder1.pop();
|
||||
interesting_remainder1.pop();
|
||||
|
||||
if (pos_in_block != 0) {
|
||||
// We have not yet extracted all the bits from this numerator
|
||||
// so, we put it back on the front so that it gets taken next iteration
|
||||
numerator_block_stack.push(numerator_block_1.first_block(), stream3);
|
||||
}
|
||||
}; // left_shift_interesting_remainder1
|
||||
if (pos_in_block != 0) {
|
||||
// We have not yet extracted all the bits from this numerator
|
||||
// so, we put it back on the front so that it gets taken next
|
||||
// iteration
|
||||
numerator_block_stack.push(numerator_block_1.first_block(),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
}; // left_shift_interesting_remainder1
|
||||
|
||||
auto left_shift_interesting_remainder2 = [&](cuda_stream_t *stream4) {
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
stream4, interesting_remainder2.data, 1, mem_ptr->shift_mem_2, bsk,
|
||||
ksk, interesting_remainder2.len);
|
||||
}; // left_shift_interesting_remainder2
|
||||
auto left_shift_interesting_remainder2 =
|
||||
[&](cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count) {
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder2.data, 1,
|
||||
mem_ptr->shift_mem_2, bsks, ksks, interesting_remainder2.len);
|
||||
}; // left_shift_interesting_remainder2
|
||||
|
||||
stream->synchronize();
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
// interesting_divisor
|
||||
trim_last_interesting_divisor_bits(mem_ptr->sub_stream_1);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
// divisor_ms_blocks
|
||||
trim_first_divisor_ms_bits(mem_ptr->sub_stream_2);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
// interesting_remainder1
|
||||
// numerator_block_stack
|
||||
left_shift_interesting_remainder1(mem_ptr->sub_stream_3);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
// interesting_remainder2
|
||||
left_shift_interesting_remainder2(mem_ptr->sub_stream_4);
|
||||
}
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
// interesting_divisor
|
||||
trim_last_interesting_divisor_bits(mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count);
|
||||
// divisor_ms_blocks
|
||||
trim_first_divisor_ms_bits(mem_ptr->sub_streams_2, gpu_indexes, gpu_count);
|
||||
// interesting_remainder1
|
||||
// numerator_block_stack
|
||||
left_shift_interesting_remainder1(mem_ptr->sub_streams_3, gpu_indexes,
|
||||
gpu_count);
|
||||
// interesting_remainder2
|
||||
left_shift_interesting_remainder2(mem_ptr->sub_streams_4, gpu_indexes,
|
||||
gpu_count);
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_4[j], gpu_indexes[j]);
|
||||
}
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_1);
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_2);
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_3);
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_4);
|
||||
|
||||
// if interesting_remainder1 != 0 -> interesting_remainder2 == 0
|
||||
// if interesting_remainder1 == 0 -> interesting_remainder2 != 0
|
||||
@@ -395,7 +396,7 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
// but in that position, interesting_remainder2 always has a 0
|
||||
auto &merged_interesting_remainder = interesting_remainder1;
|
||||
|
||||
host_addition(stream, merged_interesting_remainder.data,
|
||||
host_addition(streams[0], gpu_indexes[0], merged_interesting_remainder.data,
|
||||
merged_interesting_remainder.data,
|
||||
interesting_remainder2.data, radix_params.big_lwe_dimension,
|
||||
merged_interesting_remainder.len);
|
||||
@@ -405,7 +406,7 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
// `cleaned_merged_interesting_remainder`
|
||||
cleaned_merged_interesting_remainder.clone_from(
|
||||
merged_interesting_remainder, 0, merged_interesting_remainder.len - 1,
|
||||
stream);
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
assert(merged_interesting_remainder.len == interesting_divisor.len);
|
||||
|
||||
@@ -415,36 +416,41 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
// fills:
|
||||
// `new_remainder` - radix ciphertext
|
||||
// `subtraction_overflowed` - single ciphertext
|
||||
auto do_overflowing_sub = [&](cuda_stream_t *stream) {
|
||||
host_integer_overflowing_sub_kb<Torus, params>(
|
||||
stream, new_remainder.data, subtraction_overflowed.data,
|
||||
merged_interesting_remainder.data, interesting_divisor.data, bsk, ksk,
|
||||
mem_ptr->overflow_sub_mem, merged_interesting_remainder.len);
|
||||
auto do_overflowing_sub = [&](cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
host_integer_overflowing_sub_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
subtraction_overflowed.data, merged_interesting_remainder.data,
|
||||
interesting_divisor.data, bsks, ksks, mem_ptr->overflow_sub_mem,
|
||||
merged_interesting_remainder.len);
|
||||
};
|
||||
|
||||
// fills:
|
||||
// `at_least_one_upper_block_is_non_zero` - single ciphertext
|
||||
auto check_divisor_upper_blocks = [&](cuda_stream_t *stream) {
|
||||
auto check_divisor_upper_blocks = [&](cudaStream_t *streams,
|
||||
uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
auto &trivial_blocks = divisor_ms_blocks;
|
||||
if (trivial_blocks.is_empty()) {
|
||||
cuda_memset_async(at_least_one_upper_block_is_non_zero.first_block(), 0,
|
||||
big_lwe_size_bytes, stream);
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
} else {
|
||||
|
||||
// We could call unchecked_scalar_ne
|
||||
// But we are in the special case where scalar == 0
|
||||
// So we can skip some stuff
|
||||
host_compare_with_zero_equality(
|
||||
stream, tmp_1.data, trivial_blocks.data, mem_ptr->comparison_buffer,
|
||||
bsk, ksk, trivial_blocks.len,
|
||||
streams, gpu_indexes, gpu_count, tmp_1.data, trivial_blocks.data,
|
||||
mem_ptr->comparison_buffer, bsks, ksks, trivial_blocks.len,
|
||||
mem_ptr->comparison_buffer->eq_buffer->is_non_zero_lut);
|
||||
|
||||
tmp_1.len =
|
||||
ceil_div(trivial_blocks.len, message_modulus * carry_modulus - 1);
|
||||
|
||||
is_at_least_one_comparisons_block_true(
|
||||
stream, at_least_one_upper_block_is_non_zero.data, tmp_1.data,
|
||||
mem_ptr->comparison_buffer, bsk, ksk, tmp_1.len);
|
||||
streams, gpu_indexes, gpu_count,
|
||||
at_least_one_upper_block_is_non_zero.data, tmp_1.data,
|
||||
mem_ptr->comparison_buffer, bsks, ksks, tmp_1.len);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -452,40 +458,36 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
// so that it can be safely used in bivariate PBSes
|
||||
// fills:
|
||||
// `cleaned_merged_interesting_remainder` - radix ciphertext
|
||||
auto create_clean_version_of_merged_remainder = [&](cuda_stream_t *stream) {
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
stream, cleaned_merged_interesting_remainder.data,
|
||||
cleaned_merged_interesting_remainder.data, bsk, ksk,
|
||||
cleaned_merged_interesting_remainder.len,
|
||||
mem_ptr->message_extract_lut_1);
|
||||
};
|
||||
auto create_clean_version_of_merged_remainder =
|
||||
[&](cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count) {
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
cleaned_merged_interesting_remainder.data, bsks, ksks,
|
||||
cleaned_merged_interesting_remainder.len,
|
||||
mem_ptr->message_extract_lut_1);
|
||||
};
|
||||
|
||||
// phase 2
|
||||
stream->synchronize();
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
// new_remainder
|
||||
// subtraction_overflowed
|
||||
do_overflowing_sub(mem_ptr->sub_stream_1);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
// at_least_one_upper_block_is_non_zero
|
||||
check_divisor_upper_blocks(mem_ptr->sub_stream_2);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
// cleaned_merged_interesting_remainder
|
||||
create_clean_version_of_merged_remainder(mem_ptr->sub_stream_3);
|
||||
}
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
// new_remainder
|
||||
// subtraction_overflowed
|
||||
do_overflowing_sub(mem_ptr->sub_streams_1, gpu_indexes, gpu_count);
|
||||
// at_least_one_upper_block_is_non_zero
|
||||
check_divisor_upper_blocks(mem_ptr->sub_streams_2, gpu_indexes, gpu_count);
|
||||
// cleaned_merged_interesting_remainder
|
||||
create_clean_version_of_merged_remainder(mem_ptr->sub_streams_3,
|
||||
gpu_indexes, gpu_count);
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
|
||||
}
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_1);
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_2);
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_3);
|
||||
|
||||
host_addition(stream, overflow_sum.data, subtraction_overflowed.data,
|
||||
host_addition(streams[0], gpu_indexes[0], overflow_sum.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data,
|
||||
radix_params.big_lwe_dimension, 1);
|
||||
|
||||
@@ -493,95 +495,92 @@ __host__ void host_integer_div_rem_kb(cuda_stream_t *stream, Torus *quotient,
|
||||
int factor_lut_id = factor - 2;
|
||||
overflow_sum_radix.fill_with_same_ciphertext(
|
||||
overflow_sum.first_block(), cleaned_merged_interesting_remainder.len,
|
||||
stream);
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
auto conditionally_zero_out_merged_interesting_remainder =
|
||||
[&](cuda_stream_t *stream) {
|
||||
integer_radix_apply_bivariate_lookup_table_kb_factor<Torus>(
|
||||
stream, cleaned_merged_interesting_remainder.data,
|
||||
[&](cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count) {
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
overflow_sum_radix.data, bsk, ksk,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
overflow_sum_radix.data, bsks, ksks,
|
||||
cleaned_merged_interesting_remainder.len,
|
||||
mem_ptr->zero_out_if_overflow_did_not_happen[factor_lut_id],
|
||||
factor);
|
||||
};
|
||||
|
||||
auto conditionally_zero_out_merged_new_remainder =
|
||||
[&](cuda_stream_t *stream) {
|
||||
integer_radix_apply_bivariate_lookup_table_kb_factor<Torus>(
|
||||
stream, new_remainder.data, new_remainder.data,
|
||||
overflow_sum_radix.data, bsk, ksk, new_remainder.len,
|
||||
[&](cudaStream_t *streams, uint32_t *gpu_indexes, uint32_t gpu_count) {
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
new_remainder.data, overflow_sum_radix.data, bsks, ksks,
|
||||
new_remainder.len,
|
||||
mem_ptr->zero_out_if_overflow_happened[factor_lut_id], factor);
|
||||
};
|
||||
|
||||
auto set_quotient_bit = [&](cuda_stream_t *stream) {
|
||||
auto set_quotient_bit = [&](cudaStream_t *streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
stream, did_not_overflow.data, subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data, bsk, ksk, 1,
|
||||
mem_ptr->merge_overflow_flags_luts[pos_in_block]);
|
||||
streams, gpu_indexes, gpu_count, did_not_overflow.data,
|
||||
subtraction_overflowed.data,
|
||||
at_least_one_upper_block_is_non_zero.data, bsks, ksks, 1,
|
||||
mem_ptr->merge_overflow_flags_luts[pos_in_block],
|
||||
mem_ptr->merge_overflow_flags_luts[pos_in_block]
|
||||
->params.message_modulus);
|
||||
|
||||
host_addition(stream, "ient[block_of_bit * big_lwe_size],
|
||||
host_addition(streams[0], gpu_indexes[0],
|
||||
"ient[block_of_bit * big_lwe_size],
|
||||
"ient[block_of_bit * big_lwe_size],
|
||||
did_not_overflow.data, radix_params.big_lwe_dimension, 1);
|
||||
};
|
||||
|
||||
stream->synchronize();
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
// cleaned_merged_interesting_remainder
|
||||
conditionally_zero_out_merged_interesting_remainder(
|
||||
mem_ptr->sub_stream_1);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
// new_remainder
|
||||
conditionally_zero_out_merged_new_remainder(mem_ptr->sub_stream_2);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
// quotient
|
||||
set_quotient_bit(mem_ptr->sub_stream_3);
|
||||
}
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
// cleaned_merged_interesting_remainder
|
||||
conditionally_zero_out_merged_interesting_remainder(mem_ptr->sub_streams_1,
|
||||
gpu_indexes, gpu_count);
|
||||
// new_remainder
|
||||
conditionally_zero_out_merged_new_remainder(mem_ptr->sub_streams_2,
|
||||
gpu_indexes, gpu_count);
|
||||
// quotient
|
||||
set_quotient_bit(mem_ptr->sub_streams_3, gpu_indexes, gpu_count);
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
|
||||
}
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_1);
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_2);
|
||||
cuda_synchronize_stream(mem_ptr->sub_stream_3);
|
||||
|
||||
assert(first_trivial_block - 1 == cleaned_merged_interesting_remainder.len);
|
||||
assert(first_trivial_block - 1 == new_remainder.len);
|
||||
|
||||
remainder1.copy_from(cleaned_merged_interesting_remainder, 0,
|
||||
first_trivial_block - 1, stream);
|
||||
remainder2.copy_from(new_remainder, 0, first_trivial_block - 1, stream);
|
||||
first_trivial_block - 1, streams[0], gpu_indexes[0]);
|
||||
remainder2.copy_from(new_remainder, 0, first_trivial_block - 1, streams[0],
|
||||
gpu_indexes[0]);
|
||||
}
|
||||
|
||||
assert(remainder1.len == remainder2.len);
|
||||
|
||||
// Clean the quotient and remainder
|
||||
// as even though they have no carries, they are not at nominal noise level
|
||||
host_addition(stream, remainder, remainder1.data, remainder2.data,
|
||||
radix_params.big_lwe_dimension, remainder1.len);
|
||||
host_addition(streams[0], gpu_indexes[0], remainder, remainder1.data,
|
||||
remainder2.data, radix_params.big_lwe_dimension,
|
||||
remainder1.len);
|
||||
|
||||
stream->synchronize();
|
||||
#pragma omp parallel sections
|
||||
{
|
||||
#pragma omp section
|
||||
{
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
mem_ptr->sub_stream_1, remainder, remainder, bsk, ksk, num_blocks,
|
||||
mem_ptr->message_extract_lut_1);
|
||||
}
|
||||
#pragma omp section
|
||||
{
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
mem_ptr->sub_stream_2, quotient, quotient, bsk, ksk, num_blocks,
|
||||
mem_ptr->message_extract_lut_2);
|
||||
}
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
mem_ptr->sub_streams_1, gpu_indexes, gpu_count, remainder, remainder,
|
||||
bsks, ksks, num_blocks, mem_ptr->message_extract_lut_1);
|
||||
integer_radix_apply_univariate_lookup_table_kb(
|
||||
mem_ptr->sub_streams_2, gpu_indexes, gpu_count, quotient, quotient, bsks,
|
||||
ksks, num_blocks, mem_ptr->message_extract_lut_2);
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
mem_ptr->sub_stream_1->synchronize();
|
||||
mem_ptr->sub_stream_2->synchronize();
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_DIV_REM_CUH
|
||||
|
||||
@@ -1,119 +1,52 @@
|
||||
#include "integer/integer.cuh"
|
||||
#include <linear_algebra.h>
|
||||
|
||||
void cuda_full_propagation_64_inplace(
|
||||
cuda_stream_t *stream, void *input_blocks, int8_t *mem_ptr, void *ksk,
|
||||
void *bsk, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t ks_base_log, uint32_t ks_level,
|
||||
uint32_t pbs_base_log, uint32_t pbs_level, uint32_t grouping_factor,
|
||||
uint32_t num_blocks) {
|
||||
void cuda_full_propagation_64_inplace(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, void *input_blocks,
|
||||
int8_t *mem_ptr, void **ksks, void **bsks,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_full_propagate_inplace<uint64_t, int64_t, AmortizedDegree<256>>(
|
||||
stream, static_cast<uint64_t *>(input_blocks),
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk),
|
||||
bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log,
|
||||
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
|
||||
break;
|
||||
case 512:
|
||||
host_full_propagate_inplace<uint64_t, int64_t, AmortizedDegree<512>>(
|
||||
stream, static_cast<uint64_t *>(input_blocks),
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk),
|
||||
bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log,
|
||||
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
host_full_propagate_inplace<uint64_t, int64_t, AmortizedDegree<1024>>(
|
||||
stream, static_cast<uint64_t *>(input_blocks),
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk),
|
||||
bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log,
|
||||
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_full_propagate_inplace<uint64_t, int64_t, AmortizedDegree<2048>>(
|
||||
stream, static_cast<uint64_t *>(input_blocks),
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk),
|
||||
bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log,
|
||||
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_full_propagate_inplace<uint64_t, int64_t, AmortizedDegree<4096>>(
|
||||
stream, static_cast<uint64_t *>(input_blocks),
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk),
|
||||
bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log,
|
||||
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_full_propagate_inplace<uint64_t, int64_t, AmortizedDegree<8192>>(
|
||||
stream, static_cast<uint64_t *>(input_blocks),
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk),
|
||||
bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log,
|
||||
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_full_propagate_inplace<uint64_t, int64_t, AmortizedDegree<16384>>(
|
||||
stream, static_cast<uint64_t *>(input_blocks),
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk),
|
||||
bsk, lwe_dimension, glwe_dimension, polynomial_size, ks_base_log,
|
||||
ks_level, pbs_base_log, pbs_level, grouping_factor, num_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (full propagation inplace): unsupported polynomial size. "
|
||||
"Supported N's are powers of two"
|
||||
" in the interval [256..16384].")
|
||||
}
|
||||
int_fullprop_buffer<uint64_t> *buffer =
|
||||
(int_fullprop_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_full_propagate_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(input_blocks), buffer, (uint64_t **)(ksks), bsks,
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void scratch_cuda_full_propagation_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_full_propagation<uint64_t>(
|
||||
stream, (int_fullprop_buffer<uint64_t> **)mem_ptr, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, level_count, grouping_factor,
|
||||
input_lwe_ciphertext_count, message_modulus, carry_modulus, pbs_type,
|
||||
allocate_gpu_memory);
|
||||
(cudaStream_t *)streams, gpu_indexes, gpu_count,
|
||||
(int_fullprop_buffer<uint64_t> **)mem_ptr, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cleanup_cuda_full_propagation(cuda_stream_t *stream,
|
||||
int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_full_propagation(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, int8_t **mem_ptr_void) {
|
||||
|
||||
int_fullprop_buffer<uint64_t> *mem_ptr =
|
||||
(int_fullprop_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
cuda_drop_async(mem_ptr->lut_buffer, stream);
|
||||
cuda_drop_async(mem_ptr->lut_indexes, stream);
|
||||
|
||||
cuda_drop_async(mem_ptr->lwe_indexes, stream);
|
||||
|
||||
cuda_drop_async(mem_ptr->tmp_small_lwe_vector, stream);
|
||||
cuda_drop_async(mem_ptr->tmp_big_lwe_vector, stream);
|
||||
|
||||
switch (mem_ptr->pbs_type) {
|
||||
case CLASSICAL: {
|
||||
auto x = (pbs_buffer<uint64_t, CLASSICAL> *)(mem_ptr->pbs_buffer);
|
||||
x->release(stream);
|
||||
} break;
|
||||
case MULTI_BIT: {
|
||||
auto x = (pbs_buffer<uint64_t, MULTI_BIT> *)(mem_ptr->pbs_buffer);
|
||||
x->release(stream);
|
||||
} break;
|
||||
default:
|
||||
PANIC("Cuda error (PBS): unsupported implementation variant.")
|
||||
}
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t big_lwe_dimension,
|
||||
uint32_t small_lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -121,34 +54,49 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_propagate_single_carry_kb_inplace(
|
||||
stream, (int_sc_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_sc_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(cuda_stream_t *stream,
|
||||
void *lwe_array, int8_t *mem_ptr,
|
||||
void *bsk, void *ksk,
|
||||
uint32_t num_blocks) {
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, void *lwe_array,
|
||||
void *carry_out, int8_t *mem_ptr, void **bsks, void **ksks,
|
||||
uint32_t num_blocks) {
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(lwe_array),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsk,
|
||||
static_cast<uint64_t *>(ksk), num_blocks);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
nullptr, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(cuda_stream_t *stream,
|
||||
void cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, void *lwe_array,
|
||||
void *carry_out, void *input_carries, int8_t *mem_ptr, void **bsks,
|
||||
void **ksks, uint32_t num_blocks) {
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
static_cast<uint64_t *>(input_carries),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_sc_prop_memory<uint64_t> *mem_ptr =
|
||||
(int_sc_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, void *input_lut,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
void *input_lut, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
@@ -156,26 +104,124 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
grouping_factor, message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_apply_univariate_lut_kb<uint64_t>(
|
||||
stream, (int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr, static_cast<uint64_t *>(input_lut),
|
||||
num_radix_blocks, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_univariate_lut_kb_64(cuda_stream_t *stream,
|
||||
void *output_radix_lwe,
|
||||
void cuda_apply_univariate_lut_kb_64(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
void *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *ksk, void *bsk,
|
||||
void **ksks, void **bsks,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
host_apply_univariate_lut_kb<uint64_t>(
|
||||
stream, static_cast<uint64_t *>(output_radix_lwe),
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<uint64_t *>(input_radix_lwe),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, static_cast<uint64_t *>(ksk), bsk,
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks,
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_apply_univariate_lut_kb_64(cuda_stream_t *stream,
|
||||
void cleanup_cuda_apply_univariate_lut_kb_64(void **streams,
|
||||
uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
void *input_lut, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_apply_bivariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr, static_cast<uint64_t *>(input_lut),
|
||||
num_radix_blocks, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
void *input_radix_lwe_1,
|
||||
void *input_radix_lwe_2, int8_t *mem_ptr,
|
||||
void **ksks, void **bsks,
|
||||
uint32_t num_blocks, uint32_t shift) {
|
||||
|
||||
host_apply_bivariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<uint64_t *>(input_radix_lwe_1),
|
||||
static_cast<uint64_t *>(input_radix_lwe_2),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
|
||||
shift);
|
||||
}
|
||||
|
||||
void cleanup_cuda_apply_bivariate_lut_kb_64(void **streams,
|
||||
uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
void *input_lut, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
|
||||
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_apply_bivariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr, static_cast<uint64_t *>(input_lut),
|
||||
num_radix_blocks, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
void *output_radix_lwe, void *input_radix_lwe, int8_t *mem_ptr, void **ksks,
|
||||
void **bsks, uint32_t num_blocks, uint32_t shift) {
|
||||
|
||||
int_radix_params params = ((int_radix_lut<uint64_t> *)mem_ptr)->params;
|
||||
|
||||
host_compute_prefix_sum_hillis_steele<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<uint64_t *>(input_radix_lwe), params,
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_radix_lut<uint64_t> *mem_ptr = (int_radix_lut<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void cuda_integer_reverse_blocks_64_inplace(void **streams,
|
||||
uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, void *lwe_array,
|
||||
uint32_t num_blocks,
|
||||
uint32_t lwe_size) {
|
||||
|
||||
host_radix_blocks_reverse_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes,
|
||||
static_cast<uint64_t *>(lwe_array), num_blocks, lwe_size);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -66,11 +66,11 @@ void generate_ids_update_degrees(int *terms_degree, size_t *h_lwe_idx_in,
|
||||
* the integer radix multiplication in keyswitch->bootstrap order.
|
||||
*/
|
||||
void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, uint32_t glwe_dimension, uint32_t lwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t pbs_base_log, uint32_t pbs_level,
|
||||
uint32_t ks_base_log, uint32_t ks_level, uint32_t grouping_factor,
|
||||
uint32_t num_radix_blocks, PBS_TYPE pbs_type, uint32_t max_shared_memory,
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log,
|
||||
uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
@@ -87,7 +87,8 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
case 8192:
|
||||
case 16384:
|
||||
scratch_cuda_integer_mult_radix_ciphertext_kb<uint64_t>(
|
||||
stream, (int_mul_memory<uint64_t> **)mem_ptr, num_radix_blocks, params,
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_mul_memory<uint64_t> **)mem_ptr, num_radix_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
break;
|
||||
default:
|
||||
@@ -122,73 +123,69 @@ void scratch_cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
* - 'num_blocks' is the number of big lwe ciphertext blocks inside radix
|
||||
* ciphertext
|
||||
* - 'pbs_type' selects which PBS implementation should be used
|
||||
* - 'max_shared_memory' maximum shared memory per cuda block
|
||||
*/
|
||||
void cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
cuda_stream_t *stream, void *radix_lwe_out, void *radix_lwe_left,
|
||||
void *radix_lwe_right, void *bsk, void *ksk, int8_t *mem_ptr,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t pbs_base_log,
|
||||
uint32_t pbs_level, uint32_t ks_base_log, uint32_t ks_level,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, PBS_TYPE pbs_type,
|
||||
uint32_t max_shared_memory) {
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void *radix_lwe_left, void *radix_lwe_right,
|
||||
void **bsks, void **ksks, int8_t *mem_ptr, uint32_t polynomial_size,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
host_integer_mult_radix_kb<uint64_t, int64_t, AmortizedDegree<256>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<256>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_left),
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsk,
|
||||
static_cast<uint64_t *>(ksk), (int_mul_memory<uint64_t> *)mem_ptr,
|
||||
num_blocks);
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 512:
|
||||
host_integer_mult_radix_kb<uint64_t, int64_t, AmortizedDegree<512>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<512>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_left),
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsk,
|
||||
static_cast<uint64_t *>(ksk), (int_mul_memory<uint64_t> *)mem_ptr,
|
||||
num_blocks);
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 1024:
|
||||
host_integer_mult_radix_kb<uint64_t, int64_t, AmortizedDegree<1024>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<1024>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_left),
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsk,
|
||||
static_cast<uint64_t *>(ksk), (int_mul_memory<uint64_t> *)mem_ptr,
|
||||
num_blocks);
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_mult_radix_kb<uint64_t, int64_t, AmortizedDegree<2048>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<2048>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_left),
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsk,
|
||||
static_cast<uint64_t *>(ksk), (int_mul_memory<uint64_t> *)mem_ptr,
|
||||
num_blocks);
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_mult_radix_kb<uint64_t, int64_t, AmortizedDegree<4096>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<4096>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_left),
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsk,
|
||||
static_cast<uint64_t *>(ksk), (int_mul_memory<uint64_t> *)mem_ptr,
|
||||
num_blocks);
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_mult_radix_kb<uint64_t, int64_t, AmortizedDegree<8192>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<8192>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_left),
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsk,
|
||||
static_cast<uint64_t *>(ksk), (int_mul_memory<uint64_t> *)mem_ptr,
|
||||
num_blocks);
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_mult_radix_kb<uint64_t, int64_t, AmortizedDegree<16384>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
host_integer_mult_radix_kb<uint64_t, AmortizedDegree<16384>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_left),
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsk,
|
||||
static_cast<uint64_t *>(ksk), (int_mul_memory<uint64_t> *)mem_ptr,
|
||||
num_blocks);
|
||||
static_cast<uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
(int_mul_memory<uint64_t> *)mem_ptr, num_blocks);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
|
||||
@@ -196,35 +193,38 @@ void cuda_integer_mult_radix_ciphertext_kb_64(
|
||||
}
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_mult(cuda_stream_t *stream, int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_integer_mult(void **streams, uint32_t *gpu_indexes,
|
||||
uint32_t gpu_count, int8_t **mem_ptr_void) {
|
||||
|
||||
int_mul_memory<uint64_t> *mem_ptr =
|
||||
(int_mul_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_radix_sum_ciphertexts_vec_kb_64(
|
||||
cuda_stream_t *stream, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks_in_radix,
|
||||
uint32_t max_num_radix_in_vec, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
void scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count, int8_t **mem_ptr,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t lwe_dimension,
|
||||
uint32_t ks_level, uint32_t ks_base_log, uint32_t pbs_level,
|
||||
uint32_t pbs_base_log, uint32_t grouping_factor,
|
||||
uint32_t num_blocks_in_radix, uint32_t max_num_radix_in_vec,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus);
|
||||
scratch_cuda_integer_sum_ciphertexts_vec_kb<uint64_t>(
|
||||
stream, (int_sum_ciphertexts_vec_memory<uint64_t> **)mem_ptr,
|
||||
num_blocks_in_radix, max_num_radix_in_vec, params, allocate_gpu_memory);
|
||||
scratch_cuda_integer_partial_sum_ciphertexts_vec_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_sum_ciphertexts_vec_memory<uint64_t> **)mem_ptr, num_blocks_in_radix,
|
||||
max_num_radix_in_vec, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_radix_sum_ciphertexts_vec_kb_64(
|
||||
cuda_stream_t *stream, void *radix_lwe_out, void *radix_lwe_vec,
|
||||
uint32_t num_radix_in_vec, int8_t *mem_ptr, void *bsk, void *ksk,
|
||||
uint32_t num_blocks_in_radix) {
|
||||
void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void *radix_lwe_vec, uint32_t num_radix_in_vec,
|
||||
int8_t *mem_ptr, void **bsks, void **ksks, uint32_t num_blocks_in_radix) {
|
||||
|
||||
auto mem = (int_sum_ciphertexts_vec_memory<uint64_t> *)mem_ptr;
|
||||
|
||||
@@ -237,46 +237,51 @@ void cuda_integer_radix_sum_ciphertexts_vec_kb_64(
|
||||
|
||||
switch (mem->params.polynomial_size) {
|
||||
case 512:
|
||||
host_integer_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<512>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsk,
|
||||
static_cast<uint64_t *>(ksk), mem, num_blocks_in_radix,
|
||||
num_radix_in_vec);
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<512>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsks,
|
||||
(uint64_t **)(ksks), mem, num_blocks_in_radix, num_radix_in_vec);
|
||||
break;
|
||||
case 1024:
|
||||
host_integer_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<1024>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsk,
|
||||
static_cast<uint64_t *>(ksk), mem, num_blocks_in_radix,
|
||||
num_radix_in_vec);
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<1024>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsks,
|
||||
(uint64_t **)(ksks), mem, num_blocks_in_radix, num_radix_in_vec);
|
||||
break;
|
||||
case 2048:
|
||||
host_integer_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<2048>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsk,
|
||||
static_cast<uint64_t *>(ksk), mem, num_blocks_in_radix,
|
||||
num_radix_in_vec);
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<2048>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsks,
|
||||
(uint64_t **)(ksks), mem, num_blocks_in_radix, num_radix_in_vec);
|
||||
break;
|
||||
case 4096:
|
||||
host_integer_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<4096>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsk,
|
||||
static_cast<uint64_t *>(ksk), mem, num_blocks_in_radix,
|
||||
num_radix_in_vec);
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<4096>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsks,
|
||||
(uint64_t **)(ksks), mem, num_blocks_in_radix, num_radix_in_vec);
|
||||
break;
|
||||
case 8192:
|
||||
host_integer_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<8192>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsk,
|
||||
static_cast<uint64_t *>(ksk), mem, num_blocks_in_radix,
|
||||
num_radix_in_vec);
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<8192>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsks,
|
||||
(uint64_t **)(ksks), mem, num_blocks_in_radix, num_radix_in_vec);
|
||||
break;
|
||||
case 16384:
|
||||
host_integer_sum_ciphertexts_vec_kb<uint64_t, AmortizedDegree<16384>>(
|
||||
stream, static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsk,
|
||||
static_cast<uint64_t *>(ksk), mem, num_blocks_in_radix,
|
||||
num_radix_in_vec);
|
||||
host_integer_partial_sum_ciphertexts_vec_kb<uint64_t,
|
||||
AmortizedDegree<16384>>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_vec), terms_degree, bsks,
|
||||
(uint64_t **)(ksks), mem, num_blocks_in_radix, num_radix_in_vec);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (integer multiplication): unsupported polynomial size. "
|
||||
@@ -286,10 +291,11 @@ void cuda_integer_radix_sum_ciphertexts_vec_kb_64(
|
||||
free(terms_degree);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_sum_ciphertexts_vec(cuda_stream_t *stream,
|
||||
int8_t **mem_ptr_void) {
|
||||
void cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
|
||||
void **streams, uint32_t *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_sum_ciphertexts_vec_memory<uint64_t> *mem_ptr =
|
||||
(int_sum_ciphertexts_vec_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release(stream);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user