mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
122 Commits
as/lut_cac
...
ns/wip/ver
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6b8190344e | ||
|
|
21ffe8e1fb | ||
|
|
6b8c0decda | ||
|
|
7593093d8f | ||
|
|
344ea55426 | ||
|
|
53de52c9fd | ||
|
|
14870536e4 | ||
|
|
35cc35bd96 | ||
|
|
20403d6325 | ||
|
|
71f52b2853 | ||
|
|
d83e57f29a | ||
|
|
9592445bd8 | ||
|
|
035285fcbe | ||
|
|
650f8a400f | ||
|
|
7fac0bf3b2 | ||
|
|
91a927e09e | ||
|
|
87cf5dd8a0 | ||
|
|
52d90e3c62 | ||
|
|
4abaf92dbd | ||
|
|
140d27f11d | ||
|
|
cd4f677248 | ||
|
|
6ad8f30e3f | ||
|
|
58f075b669 | ||
|
|
4145497a47 | ||
|
|
c939687351 | ||
|
|
316c345d0a | ||
|
|
dcb0f892ef | ||
|
|
4e1ab7f769 | ||
|
|
b8843352a5 | ||
|
|
8f9571dc64 | ||
|
|
1d7c7dfa98 | ||
|
|
b3029d7296 | ||
|
|
b4c8f782c4 | ||
|
|
94fb1c61fe | ||
|
|
9a870652dd | ||
|
|
b71799de2f | ||
|
|
869af08f1e | ||
|
|
4a2eac2990 | ||
|
|
bbe62324fa | ||
|
|
bdc5d8597e | ||
|
|
cba7f17c12 | ||
|
|
74bbdf9038 | ||
|
|
7da02520dd | ||
|
|
0398dccf29 | ||
|
|
e3ad38d077 | ||
|
|
c471c3f687 | ||
|
|
e40e127393 | ||
|
|
9ad43d62c3 | ||
|
|
8daccd1d85 | ||
|
|
ed117630e8 | ||
|
|
267ad280f2 | ||
|
|
9a556bc517 | ||
|
|
5d651c0639 | ||
|
|
96b7c6ee60 | ||
|
|
0340869347 | ||
|
|
daff533de4 | ||
|
|
155c575bd9 | ||
|
|
e942c22bc1 | ||
|
|
e620768e3c | ||
|
|
ebd6c06d7b | ||
|
|
fb630d5cd2 | ||
|
|
c2bcb10702 | ||
|
|
a1f17f4a00 | ||
|
|
8f35a3a3d6 | ||
|
|
2f111bc413 | ||
|
|
ee9a95f8dd | ||
|
|
16bdbeb0ea | ||
|
|
75667b79ca | ||
|
|
8b7f8ead23 | ||
|
|
03aa3ddec2 | ||
|
|
40a439620f | ||
|
|
0fca1796f3 | ||
|
|
3fb5eb7a21 | ||
|
|
d1f77b23f3 | ||
|
|
e75bca1b1d | ||
|
|
aa1e9ffdfa | ||
|
|
45d76cf079 | ||
|
|
f4713ceeaa | ||
|
|
600532e8f7 | ||
|
|
7bfb236543 | ||
|
|
7c0ecf48f2 | ||
|
|
66b357d869 | ||
|
|
b7786afaf5 | ||
|
|
84931c420f | ||
|
|
93201d6afb | ||
|
|
1f6681ec39 | ||
|
|
31a1e977b2 | ||
|
|
2cfa0f74d1 | ||
|
|
17fd3e9db5 | ||
|
|
59646a76e4 | ||
|
|
406055671b | ||
|
|
87bb4d99d3 | ||
|
|
9f9b54dcb8 | ||
|
|
a8a796de6c | ||
|
|
7b4093b572 | ||
|
|
f52eb16581 | ||
|
|
96622506c5 | ||
|
|
ce73b934b2 | ||
|
|
e4f6cf7b43 | ||
|
|
e23455e0df | ||
|
|
98d2139761 | ||
|
|
e659de7d16 | ||
|
|
b56989a491 | ||
|
|
dc30ae092b | ||
|
|
0c65e957fc | ||
|
|
9144fe4de6 | ||
|
|
c5c16782ff | ||
|
|
bb571712bf | ||
|
|
222a7e93c4 | ||
|
|
8c96762f79 | ||
|
|
d23e879a87 | ||
|
|
ae856dcce2 | ||
|
|
4c786562ba | ||
|
|
9d0a772089 | ||
|
|
cfd8672b0f | ||
|
|
3c2c40b058 | ||
|
|
bb7e94423b | ||
|
|
e90944d213 | ||
|
|
e55c339c46 | ||
|
|
c1f82f633a | ||
|
|
61550c6405 | ||
|
|
f93b872551 |
4
.github/actions/gpu_setup/action.yml
vendored
4
.github/actions/gpu_setup/action.yml
vendored
@@ -23,6 +23,10 @@ runs:
|
||||
echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
|
||||
sha256sum -c checksum
|
||||
sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
|
||||
|
||||
# Disable unattended-upgrades to avoid lock issues
|
||||
sudo systemctl disable --now unattended-upgrades
|
||||
|
||||
sudo apt-get clean
|
||||
sudo rm -rf /var/lib/apt/lists/*
|
||||
sudo apt update
|
||||
|
||||
15
.github/runs-on.yml
vendored
Normal file
15
.github/runs-on.yml
vendored
Normal file
@@ -0,0 +1,15 @@
|
||||
runners:
|
||||
cpu-big:
|
||||
family: m6i.32xlarge
|
||||
image: cpu-tests-eu-west-3
|
||||
volume: 200gb
|
||||
spot: false
|
||||
cpu-small:
|
||||
family: m6i.4xlarge
|
||||
image: cpu-tests-eu-west-3
|
||||
volume: 200gb
|
||||
spot: false
|
||||
|
||||
images:
|
||||
cpu-tests-eu-west-3:
|
||||
ami: "ami-0a786ffdb1411fac4" # Ubuntu 24.04
|
||||
@@ -34,6 +34,9 @@ permissions:
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: aws_tfhe_backward_compat_tests/setup-instance
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name != 'push'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
@@ -41,7 +44,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -66,7 +69,7 @@ jobs:
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'true' # Needed to pull lfs data
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -80,7 +83,7 @@ jobs:
|
||||
|
||||
- name: Retrieve data from cache
|
||||
id: retrieve-data-cache
|
||||
uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
with:
|
||||
path: |
|
||||
utils/tfhe-backward-compat-data/**/*.cbor
|
||||
@@ -109,7 +112,7 @@ jobs:
|
||||
- name: Store data in cache
|
||||
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
|
||||
continue-on-error: true
|
||||
uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
with:
|
||||
path: |
|
||||
utils/tfhe-backward-compat-data/**/*.cbor
|
||||
@@ -141,7 +144,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
12
.github/workflows/aws_tfhe_fast_tests.yml
vendored
12
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -171,7 +171,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -219,7 +219,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -232,7 +232,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -299,7 +299,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/aws_tfhe_integer_tests.yml
vendored
8
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -86,7 +86,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
timeout-minutes: 480 # 8 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -168,7 +168,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/aws_tfhe_noise_checks.yml
vendored
6
.github/workflows/aws_tfhe_noise_checks.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -60,7 +60,7 @@ jobs:
|
||||
timeout-minutes: 1440
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
steps.changed-files.outputs.integer_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -172,7 +172,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/aws_tfhe_tests.yml
vendored
8
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -72,7 +72,7 @@ jobs:
|
||||
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -155,7 +155,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -182,7 +182,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -279,7 +279,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
10
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -80,7 +80,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -147,7 +147,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/benchmark_cpu.yml
vendored
4
.github/workflows/benchmark_cpu.yml
vendored
@@ -16,10 +16,12 @@ on:
|
||||
- integer_zk
|
||||
- shortint
|
||||
- shortint_oprf
|
||||
- hlapi
|
||||
- hlapi_unsigned
|
||||
- hlapi_signed
|
||||
- hlapi_erc20
|
||||
- hlapi_dex
|
||||
- hlapi_noise_squash
|
||||
- hlapi_kvstore
|
||||
- tfhe_zk_pok
|
||||
- boolean
|
||||
- pbs
|
||||
|
||||
8
.github/workflows/benchmark_cpu_common.yml
vendored
8
.github/workflows/benchmark_cpu_common.yml
vendored
@@ -126,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -149,7 +149,7 @@ jobs:
|
||||
params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -229,7 +229,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -261,7 +261,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
25
.github/workflows/benchmark_cpu_weekly.yml
vendored
25
.github/workflows/benchmark_cpu_weekly.yml
vendored
@@ -24,6 +24,7 @@ permissions: {}
|
||||
jobs:
|
||||
prepare-inputs:
|
||||
name: benchmark_cpu_weekly/prepare-inputs
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
is_weekly_bench_group_1: ${{ steps.check_bench_group_1.outputs.is_weekly_bench_group_1 }}
|
||||
@@ -72,8 +73,7 @@ jobs:
|
||||
|
||||
run-benchmarks-integer:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-integer
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& (needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true' || needs.prepare-inputs.outputs.is_quarterly_bench == 'true')
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true' || needs.prepare-inputs.outputs.is_quarterly_bench == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
@@ -92,8 +92,7 @@ jobs:
|
||||
|
||||
run-benchmarks-integer-zk-pke:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-integer-zk-pke
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
@@ -111,8 +110,7 @@ jobs:
|
||||
|
||||
run-benchmarks-hlapi-erc20:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-hlapi-erc20
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
@@ -130,8 +128,7 @@ jobs:
|
||||
|
||||
run-benchmarks-hlapi-dex:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-hlapi-dex
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
@@ -149,8 +146,7 @@ jobs:
|
||||
|
||||
run-benchmarks-core-crypto:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-core-crypto
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
@@ -167,8 +163,7 @@ jobs:
|
||||
|
||||
run-benchmarks-shortint:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-shortint
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& (needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' || needs.prepare-inputs.outputs.is_quarterly_bench == 'true')
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true' || needs.prepare-inputs.outputs.is_quarterly_bench == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
@@ -186,8 +181,7 @@ jobs:
|
||||
|
||||
run-benchmarks-boolean:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-boolean
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
@@ -206,8 +200,7 @@ jobs:
|
||||
|
||||
run-benchmarks-tfhe-zk-pok:
|
||||
name: benchmark_cpu_weekly/run-benchmarks-tfhe-zk-pok
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
&& needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_cpu_common.yml
|
||||
with:
|
||||
|
||||
8
.github/workflows/benchmark_ct_key_sizes.yml
vendored
8
.github/workflows/benchmark_ct_key_sizes.yml
vendored
@@ -33,7 +33,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -49,7 +49,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -105,7 +105,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
55
.github/workflows/benchmark_documentation.yml
vendored
55
.github/workflows/benchmark_documentation.yml
vendored
@@ -25,10 +25,6 @@ on:
|
||||
description: "Generate SVG tables"
|
||||
type: boolean
|
||||
default: true
|
||||
open-pr:
|
||||
description: "Open a PR with the benchmark results"
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
permissions: {}
|
||||
|
||||
@@ -166,54 +162,3 @@ jobs:
|
||||
DATA_EXTRACTOR_DATABASE_USER: ${{ secrets.DATA_EXTRACTOR_DATABASE_USER }}
|
||||
DATA_EXTRACTOR_DATABASE_HOST: ${{ secrets.DATA_EXTRACTOR_DATABASE_HOST }}
|
||||
DATA_EXTRACTOR_DATABASE_PASSWORD: ${{ secrets.DATA_EXTRACTOR_DATABASE_PASSWORD }}
|
||||
|
||||
open-pr:
|
||||
name: benchmark-documentation/open-pr
|
||||
needs: [ generate-svgs-with-benchmarks-run, generate-svgs-without-benchmarks-run ]
|
||||
if: ${{ always() && inputs.open-pr &&
|
||||
(needs.generate-svgs-with-benchmarks-run.result == 'success' || needs.generate-svgs-without-benchmarks-run.result == 'success') }}
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: write # Needed to create a commit
|
||||
pull-requests: write # Needed to open a pull-request
|
||||
env:
|
||||
PATH_TO_DOC_ASSETS: tfhe/docs/.gitbook/assets
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
- name: Download SVG tables
|
||||
uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
|
||||
with:
|
||||
path: svg_tables
|
||||
merge-multiple: 'true'
|
||||
|
||||
# Perform best effort to copy SVG tables. If the copy fails or files don't exist, the PR will still be created.
|
||||
- name: Copy SVG tables to documentation location
|
||||
run: |
|
||||
cp -f svg_tables/*integer-benchmark*.svg "${PATH_TO_DOC_ASSETS}" 2>/dev/null
|
||||
cp -f svg_tables/*pbs-benchmark-tuniform*.svg "${PATH_TO_DOC_ASSETS}" 2>/dev/null
|
||||
cp -f svg_tables/cpu-gpu-hpu-integer-benchmark-fheuint64-tuniform-2m128-ciphertext.svg "${PATH_TO_DOC_ASSETS}" 2>/dev/null
|
||||
|
||||
- name: Get current date
|
||||
id: get-date
|
||||
run: |
|
||||
echo "date=$(date '+%g_%m_%d_%Hh%Mm%Ss')" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Create pull-request
|
||||
uses: peter-evans/create-pull-request@98357b18bf14b5342f975ff684046ec3b2a07725 # v8.0.0
|
||||
with:
|
||||
sign-commits: true # Commit will be signed by github-actions bot
|
||||
add-paths: ${{ env.PATH_TO_DOC_ASSETS }}/*.svg
|
||||
branch: gh-bot/docs/update-svg-tables-${{ steps.get-date.outputs.date }}
|
||||
commit-message: |
|
||||
chore(docs): update benchmark results for all backends
|
||||
|
||||
Automated documentation update from tfhe-rs CI pipeline.
|
||||
title: |
|
||||
[CI] chore(docs): update benchmark results for all backends
|
||||
body: |
|
||||
Documentation update triggered by GitHub workflow.
|
||||
labels: documentation
|
||||
|
||||
8
.github/workflows/benchmark_gpu_4090.yml
vendored
8
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -123,7 +123,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -146,7 +146,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
10
.github/workflows/benchmark_gpu_common.yml
vendored
10
.github/workflows/benchmark_gpu_common.yml
vendored
@@ -134,7 +134,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -175,7 +175,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -209,7 +209,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -287,7 +287,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -324,7 +324,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
18
.github/workflows/benchmark_gpu_coprocessor.yml
vendored
18
.github/workflows/benchmark_gpu_coprocessor.yml
vendored
@@ -50,6 +50,8 @@ env:
|
||||
jobs:
|
||||
parse-inputs:
|
||||
name: benchmark_gpu_coprocessor/parse-inputs
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: 'read'
|
||||
@@ -92,7 +94,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +132,7 @@ jobs:
|
||||
git lfs install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
path: tfhe-rs
|
||||
persist-credentials: false
|
||||
@@ -141,7 +143,7 @@ jobs:
|
||||
ls
|
||||
|
||||
- name: Checkout fhevm
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
repository: zama-ai/fhevm
|
||||
persist-credentials: 'false'
|
||||
@@ -192,10 +194,10 @@ jobs:
|
||||
cargo install sqlx-cli
|
||||
|
||||
- name: Install foundry
|
||||
uses: foundry-rs/foundry-toolchain@8b0419c685ef46cb79ec93fbdc131174afceb730
|
||||
uses: foundry-rs/foundry-toolchain@8789b3e21e6c11b2697f5eb56eddae542f746c10
|
||||
|
||||
- name: Cache cargo
|
||||
uses: actions/cache@9255dc7a253b0ccc959486e2bca901246202afeb # v5.0.1
|
||||
uses: actions/cache@8b402f58fbc84540c8b491a91e594a4576fec3d7 # v5.0.2
|
||||
with:
|
||||
path: |
|
||||
~/.cargo/registry
|
||||
@@ -223,7 +225,7 @@ jobs:
|
||||
working-directory: fhevm/coprocessor/fhevm-engine/tfhe-worker
|
||||
|
||||
- name: Use Node.js
|
||||
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
|
||||
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
|
||||
with:
|
||||
node-version: 20.x
|
||||
|
||||
@@ -299,7 +301,7 @@ jobs:
|
||||
path: fhevm/$${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -324,7 +326,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
34
.github/workflows/benchmark_gpu_weekly.yml
vendored
34
.github/workflows/benchmark_gpu_weekly.yml
vendored
@@ -25,6 +25,7 @@ permissions: {}
|
||||
jobs:
|
||||
prepare-inputs:
|
||||
name: benchmark_cpu_weekly/prepare-inputs
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
is_weekly_bench_group_1: ${{ steps.check_bench_group_1.outputs.is_weekly_bench_group_1 }}
|
||||
@@ -49,8 +50,7 @@ jobs:
|
||||
|
||||
run-benchmarks-8-h100-sxm5-integer:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -72,8 +72,7 @@ jobs:
|
||||
|
||||
run-benchmarks-8-h100-sxm5-integer-compression:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-compression
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -95,8 +94,7 @@ jobs:
|
||||
|
||||
run-benchmarks-8-h100-sxm5-integer-zk-aes:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-integer-zk-aes
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -118,8 +116,7 @@ jobs:
|
||||
|
||||
run-benchmarks-8-h100-sxm5-noise-squash:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-sxm5-noise-squash
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -141,8 +138,7 @@ jobs:
|
||||
|
||||
run-benchmarks-1-h100-core-crypto:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-1-h100-core-crypto (1xH100)
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_1 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -166,8 +162,7 @@ jobs:
|
||||
|
||||
run-benchmarks-1-h100-erc20:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-1-h100-erc20
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -187,8 +182,7 @@ jobs:
|
||||
|
||||
run-benchmarks-2-h100-erc20:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-2-h100-erc20
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -208,8 +202,7 @@ jobs:
|
||||
|
||||
run-benchmarks-8-h100-erc20:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-erc20
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -233,8 +226,7 @@ jobs:
|
||||
|
||||
run-benchmarks-1-h100-dex:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-1-h100-dex
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -254,8 +246,7 @@ jobs:
|
||||
|
||||
run-benchmarks-2-h100-dex:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-2-h100-dex
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
@@ -275,8 +266,7 @@ jobs:
|
||||
|
||||
run-benchmarks-8-h100-dex:
|
||||
name: benchmark_gpu_weekly/run-benchmarks-8-h100-dex
|
||||
if: github.repository == 'zama-ai/tfhe-rs' &&
|
||||
needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
if: needs.prepare-inputs.outputs.is_weekly_bench_group_2 == 'true'
|
||||
needs: prepare-inputs
|
||||
uses: ./.github/workflows/benchmark_gpu_common.yml
|
||||
with:
|
||||
|
||||
3
.github/workflows/benchmark_hpu.yml
vendored
3
.github/workflows/benchmark_hpu.yml
vendored
@@ -12,7 +12,8 @@ on:
|
||||
default: integer
|
||||
options:
|
||||
- integer
|
||||
- hlapi
|
||||
- hlapi_unsigned
|
||||
- hlapi_signed
|
||||
- hlapi_erc20
|
||||
op_flavor:
|
||||
description: "Operations set to run"
|
||||
|
||||
4
.github/workflows/benchmark_hpu_common.yml
vendored
4
.github/workflows/benchmark_hpu_common.yml
vendored
@@ -126,7 +126,7 @@ jobs:
|
||||
ssh-private-key: ${{ secrets.SSH_PRIVATE_KEY }}
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
|
||||
16
.github/workflows/benchmark_perf_regression.yml
vendored
16
.github/workflows/benchmark_perf_regression.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
pull-requests: write # Needed to write a comment in a pull-request
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -143,7 +143,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
command: ${{ fromJson(needs.prepare-benchmarks.outputs.commands) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0 # Needed to get commit hash
|
||||
persist-credentials: 'false'
|
||||
@@ -245,7 +245,7 @@ jobs:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -305,13 +305,13 @@ jobs:
|
||||
REF_NAME: ${{ github.head_ref || github.ref_name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install recent Python
|
||||
uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
|
||||
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
pip-install: -r ci/data_extractor/requirements.txt -r ci/perf_regression/requirements.txt
|
||||
@@ -383,7 +383,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
13
.github/workflows/benchmark_tfhe_fft.yml
vendored
13
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -31,13 +31,16 @@ permissions: {}
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: benchmark_tfhe_fft/setup-instance
|
||||
if:
|
||||
(github.event_name != 'workflow_dispatch' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -55,7 +58,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -73,7 +76,7 @@ jobs:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
@@ -102,7 +105,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -134,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
13
.github/workflows/benchmark_tfhe_ntt.yml
vendored
13
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -31,13 +31,16 @@ permissions: {}
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: benchmark_tfhe_ntt/setup-instance
|
||||
if:
|
||||
(github.event_name != 'workflow_dispatch' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -55,7 +58,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -73,7 +76,7 @@ jobs:
|
||||
SHA: ${{ github.sha }}
|
||||
|
||||
- name: Install rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: nightly
|
||||
override: true
|
||||
@@ -102,7 +105,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -134,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
17
.github/workflows/benchmark_wasm_client.yml
vendored
17
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -31,15 +31,14 @@ jobs:
|
||||
name: benchmark_wasm_client/should-run
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
(github.event_name != 'workflow_dispatch' && github.repository == 'zama-ai/tfhe-rs')
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
wasm_bench: ${{ steps.changed-files.outputs.wasm_bench_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -71,7 +70,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,7 +90,7 @@ jobs:
|
||||
browser: [ chrome, firefox ]
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -119,7 +118,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/restore@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -132,7 +131,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@9255dc7a253b0ccc959486e2bca901246202afeb #v5.0.1
|
||||
uses: actions/cache/save@8b402f58fbc84540c8b491a91e594a4576fec3d7 #v5.0.2
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -181,7 +180,7 @@ jobs:
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -213,7 +212,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/cargo_audit.yml
vendored
4
.github/workflows/cargo_audit.yml
vendored
@@ -24,9 +24,11 @@ permissions: {}
|
||||
jobs:
|
||||
audit:
|
||||
name: cargo_audit/audit
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
outputs:
|
||||
matrix_command: ${{ steps.set-pcc-commands-matrix.outputs.commands }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
6
.github/workflows/cargo_build_common.yml
vendored
6
.github/workflows/cargo_build_common.yml
vendored
@@ -80,7 +80,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
result: ${{ steps.set_builds_result.outputs.result }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -242,7 +242,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/cargo_build_tfhe_fft.yml
vendored
4
.github/workflows/cargo_build_tfhe_fft.yml
vendored
@@ -26,13 +26,13 @@ jobs:
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
4
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
4
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
@@ -24,13 +24,13 @@ jobs:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
|
||||
11
.github/workflows/cargo_test_fft.yml
vendored
11
.github/workflows/cargo_test_fft.yml
vendored
@@ -2,6 +2,7 @@
|
||||
name: cargo_test_fft
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
@@ -22,6 +23,8 @@ permissions:
|
||||
jobs:
|
||||
should-run:
|
||||
name: cargo_test_fft/should-run
|
||||
if: github.event_name != 'push' ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
@@ -29,7 +32,7 @@ jobs:
|
||||
fft_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.fft_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -56,13 +59,13 @@ jobs:
|
||||
runner_type: [ ubuntu-latest, macos-latest, windows-latest ]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
@@ -92,7 +95,7 @@ jobs:
|
||||
if: needs.should-run.outputs.fft_test == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
13
.github/workflows/cargo_test_ntt.yml
vendored
13
.github/workflows/cargo_test_ntt.yml
vendored
@@ -2,6 +2,7 @@
|
||||
name: cargo_test_ntt
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
@@ -24,6 +25,8 @@ permissions:
|
||||
jobs:
|
||||
should-run:
|
||||
name: cargo_test_ntt/should-run
|
||||
if: github.event_name != 'push' ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
@@ -31,7 +34,7 @@ jobs:
|
||||
ntt_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ntt_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
@@ -60,7 +63,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -87,13 +90,13 @@ jobs:
|
||||
os: ${{fromJson(needs.setup-instance.outputs.matrix_os)}}
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
@@ -143,7 +146,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/ci_lint.yml
vendored
4
.github/workflows/ci_lint.yml
vendored
@@ -20,7 +20,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -43,7 +43,7 @@ jobs:
|
||||
echo "version=$(make zizmor_version)" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
- name: Check workflows security
|
||||
uses: zizmorcore/zizmor-action@e639db99335bc9038abc0e066dfcd72e23d26fb4 # v0.3.0
|
||||
uses: zizmorcore/zizmor-action@135698455da5c3b3e55f73f4419e481ab68cdd95 # v0.4.1
|
||||
with:
|
||||
advanced-security: 'false' # Print results directly in logs
|
||||
persona: pedantic
|
||||
|
||||
6
.github/workflows/code_coverage.yml
vendored
6
.github/workflows/code_coverage.yml
vendored
@@ -31,7 +31,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
timeout-minutes: 5760 # 4 days
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -37,7 +37,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/generate_svg_common.yml
vendored
2
.github/workflows/generate_svg_common.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
|
||||
|
||||
8
.github/workflows/gpu_4090_tests.yml
vendored
8
.github/workflows/gpu_4090_tests.yml
vendored
@@ -19,8 +19,8 @@ on:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
schedule:
|
||||
# Nightly tests @ 1AM after each work day
|
||||
- cron: "0 1 * * MON-FRI"
|
||||
# Every other day at 1AM
|
||||
- cron: "0 1 */2 * *"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -37,11 +37,11 @@ jobs:
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ["self-hosted", "4090-desktop"]
|
||||
timeout-minutes: 1440 # 24 hours
|
||||
timeout-minutes: 2880 # 48 hours
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
14
.github/workflows/gpu_code_validation_tests.yml
vendored
14
.github/workflows/gpu_code_validation_tests.yml
vendored
@@ -23,8 +23,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# every month
|
||||
- cron: "0 0 1 * *"
|
||||
# every friday noon
|
||||
- cron: "0 12 * * 5"
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
@@ -35,15 +35,15 @@ jobs:
|
||||
setup-instance:
|
||||
name: gpu_code_validation_tests/setup-instance
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved')
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -79,7 +79,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: gpu_fast_h100_tests
|
||||
name: gpu_core_h100_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -32,7 +32,7 @@ permissions:
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_fast_h100_tests/should-run
|
||||
name: gpu_core_h100_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -61,15 +61,14 @@ jobs:
|
||||
- tfhe/src/integer/server_key/radix_parallel/tests_signed/**
|
||||
- tfhe/src/integer/server_key/radix_parallel/tests_cases_unsigned.rs
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_fast_h100_tests.yml'
|
||||
- '.github/workflows/gpu_core_h100_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: gpu_fast_h100_tests/setup-instance
|
||||
name: gpu_core_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
@@ -87,7 +86,7 @@ jobs:
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -111,7 +110,7 @@ jobs:
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: gpu_fast_h100_tests/cuda-tests-linux
|
||||
name: gpu_core_h100_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
@@ -129,7 +128,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -155,20 +154,8 @@ jobs:
|
||||
BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
|
||||
BIG_TESTS_INSTANCE=TRUE make test_cuda_backend
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: gpu_fast_h100_tests/slack-notify
|
||||
name: gpu_core_h100_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
@@ -187,10 +174,10 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "Fast H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
SLACK_MESSAGE: "Core H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: gpu_fast_h100_tests/teardown-instance
|
||||
name: gpu_core_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
@@ -198,7 +185,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
10
.github/workflows/gpu_fast_tests.yml
vendored
10
.github/workflows/gpu_fast_tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -79,7 +79,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -114,7 +114,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -151,7 +151,7 @@ jobs:
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
make test_high_level_api_gpu
|
||||
make test_high_level_api_gpu_fast
|
||||
|
||||
slack-notify:
|
||||
name: gpu_fast_tests/slack-notify
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/gpu_full_h100_tests.yml
vendored
6
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
10
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -154,7 +154,7 @@ jobs:
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
make test_high_level_api_gpu
|
||||
make test_high_level_api_gpu_fast
|
||||
|
||||
slack-notify:
|
||||
name: gpu_full_multi_gpu_tests/slack-notify
|
||||
@@ -187,7 +187,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
209
.github/workflows/gpu_hlapi_h100_tests.yml
vendored
Normal file
209
.github/workflows/gpu_hlapi_h100_tests.yml
vendored
Normal file
@@ -0,0 +1,209 @@
|
||||
# Compile and test tfhe-cuda-backend on an H100 VM on hyperstack
|
||||
name: gpu_hlapi_h100_tests
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
SLACKIFY_MARKDOWN: true
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
PULL_REQUEST_MD_LINK: ""
|
||||
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
|
||||
# Secrets will be available only to zama-ai organization members
|
||||
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
|
||||
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
types: [ labeled ]
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
# zizmor: ignore[concurrency-limits] concurrency is managed after instance setup to ensure safe provisioning
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
name: gpu_hlapi_h100_tests/should-run
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
outputs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@e0021407031f5be11a464abee9a0776171c79891 # v47.0.1
|
||||
with:
|
||||
files_yaml: |
|
||||
gpu:
|
||||
- tfhe/Cargo.toml
|
||||
- tfhe/build.rs
|
||||
- backends/tfhe-cuda-backend/**
|
||||
- tfhe/src/core_crypto/gpu/**
|
||||
- tfhe/src/integer/gpu/**
|
||||
- tfhe/src/integer/server_key/radix_parallel/tests_unsigned/**
|
||||
- tfhe/src/integer/server_key/radix_parallel/tests_signed/**
|
||||
- tfhe/src/integer/server_key/radix_parallel/tests_cases_unsigned.rs
|
||||
- tfhe/src/shortint/parameters/**
|
||||
- tfhe/src/high_level_api/**
|
||||
- tfhe/src/c_api/**
|
||||
- 'tfhe/docs/**/**.md'
|
||||
- '.github/workflows/gpu_hlapi_h100_tests.yml'
|
||||
- scripts/integer-tests.sh
|
||||
- ci/slab.toml
|
||||
|
||||
setup-instance:
|
||||
name: gpu_hlapi_h100_tests/setup-instance
|
||||
needs: should-run
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event.action != 'labeled' && needs.should-run.outputs.gpu_test == 'true') ||
|
||||
(github.event.action == 'labeled' && github.event.label.name == 'approved' && needs.should-run.outputs.gpu_test == 'true')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
# Use permanent remote instance label first as on-demand remote instance label output is set before the end of start-remote-instance step.
|
||||
# If the latter fails due to a failed GitHub action runner set up, we have to fallback on the permanent instance.
|
||||
# Since the on-demand remote label is set before failure, we have to do the logical OR in this order,
|
||||
# otherwise we'll try to run the next job on a non-existing on-demand instance.
|
||||
runner-name: ${{ steps.use-permanent-instance.outputs.runner_group || steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
remote-instance-outcome: ${{ steps.start-remote-instance.outcome }}
|
||||
steps:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: single-h100
|
||||
|
||||
# This will allow to fallback on permanent instances running on Hyperstack.
|
||||
- name: Use permanent remote instance
|
||||
id: use-permanent-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true' && steps.start-remote-instance.outcome == 'failure'
|
||||
run: |
|
||||
echo "runner_group=h100x1" >> "$GITHUB_OUTPUT"
|
||||
|
||||
# This instance will be spawned especially for pull-request from forked repository
|
||||
- name: Start GitHub instance
|
||||
id: start-github-instance
|
||||
if: env.SECRETS_AVAILABLE == 'false'
|
||||
run: |
|
||||
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
|
||||
|
||||
cuda-tests-linux:
|
||||
name: gpu_hlapi_h100_tests/cuda-tests-linux
|
||||
needs: [ should-run, setup-instance ]
|
||||
if: github.event_name != 'pull_request' ||
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
concurrency:
|
||||
group: ${{ github.workflow_ref }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.8"
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
if: needs.setup-instance.outputs.remote-instance-outcome == 'success'
|
||||
uses: ./.github/actions/gpu_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
- name: Enable nvidia multi-process service
|
||||
run: |
|
||||
nvidia-cuda-mps-control -d
|
||||
|
||||
- name: Run user docs tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_user_doc_gpu
|
||||
|
||||
- name: Test C API
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_c_api_gpu
|
||||
|
||||
- name: Run High Level API Tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_high_level_api_gpu
|
||||
|
||||
slack-notify:
|
||||
name: gpu_hlapi_h100_tests/slack-notify
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Set pull-request URL
|
||||
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
|
||||
run: |
|
||||
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
|
||||
env:
|
||||
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
|
||||
- name: Send message
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
|
||||
SLACK_MESSAGE: "HL API H100 tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
|
||||
|
||||
teardown-instance:
|
||||
name: gpu_hlapi_h100_tests/teardown-instance
|
||||
if: ${{ always() && needs.setup-instance.outputs.remote-instance-outcome == 'success' }}
|
||||
needs: [ setup-instance, cuda-tests-linux ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cuda-h100-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -112,7 +112,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/gpu_memory_sanitizer.yml
vendored
6
.github/workflows/gpu_memory_sanitizer.yml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -134,7 +134,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -42,7 +42,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -78,7 +78,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -134,7 +134,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/gpu_pcc.yml
vendored
6
.github/workflows/gpu_pcc.yml
vendored
@@ -38,7 +38,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -159,7 +159,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -170,7 +170,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -117,7 +117,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -179,7 +179,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -116,7 +116,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -170,7 +170,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -87,7 +87,7 @@ jobs:
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
continue-on-error: true
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -129,7 +129,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -184,7 +184,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -41,7 +41,7 @@ jobs:
|
||||
gpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.gpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -117,7 +117,7 @@ jobs:
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
@@ -179,7 +179,7 @@ jobs:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
13
.github/workflows/hpu_hlapi_tests.yml
vendored
13
.github/workflows/hpu_hlapi_tests.yml
vendored
@@ -2,6 +2,7 @@
|
||||
name: hpu_hlapi_tests
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
@@ -25,6 +26,8 @@ permissions: {}
|
||||
jobs:
|
||||
should-run:
|
||||
name: hpu_hlapi_tests/should-run
|
||||
if: github.event_name != 'push' ||
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
pull-requests: read # Needed to check for file change
|
||||
@@ -32,7 +35,7 @@ jobs:
|
||||
hpu_test: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.hpu_any_changed }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -62,7 +65,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -83,13 +86,13 @@ jobs:
|
||||
needs: setup-instance
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
uses: dtolnay/rust-toolchain@e97e2d8cc328f1b50210efc529dca0028893a2d9 # zizmor: ignore[stale-action-refs] this action doesn't create releases
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
@@ -114,7 +117,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/integer_long_run_tests.yml
vendored
6
.github/workflows/integer_long_run_tests.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -53,7 +53,7 @@ jobs:
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/m1_tests.yml
vendored
2
.github/workflows/m1_tests.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
timeout-minutes: 720
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: "false"
|
||||
token: ${{ env.CHECKOUT_TOKEN }}
|
||||
|
||||
5
.github/workflows/make_release_common.yml
vendored
5
.github/workflows/make_release_common.yml
vendored
@@ -52,7 +52,7 @@ jobs:
|
||||
hash: ${{ steps.hash.outputs.hash }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -75,6 +75,7 @@ jobs:
|
||||
name: make_release_common/provenance
|
||||
if: ${{ !inputs.dry-run }}
|
||||
needs: package
|
||||
# This action cannot be pinned to a specific commit (see https://github.com/slsa-framework/slsa-github-generator/blob/main/README.md#referencing-slsa-builders-and-generators)
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
actions: read # Needed to detect the GitHub Actions environment
|
||||
@@ -93,7 +94,7 @@ jobs:
|
||||
id-token: write # Needed for OIDC token exchange on crates.io
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
|
||||
7
.github/workflows/make_release_cuda.yml
vendored
7
.github/workflows/make_release_cuda.yml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -64,7 +64,7 @@ jobs:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: "false"
|
||||
@@ -117,6 +117,7 @@ jobs:
|
||||
name: make_release_cuda/provenance
|
||||
if: ${{ !inputs.dry_run }}
|
||||
needs: [package]
|
||||
# This action cannot be pinned to a specific commit (see https://github.com/slsa-framework/slsa-github-generator/blob/main/README.md#referencing-slsa-builders-and-generators)
|
||||
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0
|
||||
permissions:
|
||||
actions: read # Needed to detect the GitHub Actions environment
|
||||
@@ -221,7 +222,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/make_release_tfhe.yml
vendored
8
.github/workflows/make_release_tfhe.yml
vendored
@@ -68,7 +68,7 @@ jobs:
|
||||
id-token: write # also needed for OIDC token exchange on crates.io and npmjs.com
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
with:
|
||||
fetch-depth: 0
|
||||
persist-credentials: 'false'
|
||||
@@ -85,14 +85,14 @@ jobs:
|
||||
make build_web_js_api_parallel
|
||||
|
||||
- name: Authenticate on NPM
|
||||
uses: actions/setup-node@395ad3262231945c25e8478fd5baf05154b1d79f # v6.1.0
|
||||
uses: actions/setup-node@6044e13b5dc448c55e2357c09f80417699197238 # v6.2.0
|
||||
with:
|
||||
node-version: '24'
|
||||
registry-url: 'https://registry.npmjs.org'
|
||||
|
||||
- name: Publish web package
|
||||
if: ${{ inputs.push_web_package }}
|
||||
uses: JS-DevTools/npm-publish@7f8fe47b3bea1be0c3aec2b717c5ec1f3e03410b
|
||||
uses: JS-DevTools/npm-publish@4ce4bd0f334d5316473155078da1955d42148494
|
||||
with:
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
@@ -109,7 +109,7 @@ jobs:
|
||||
|
||||
- name: Publish Node package
|
||||
if: ${{ inputs.push_node_package }}
|
||||
uses: JS-DevTools/npm-publish@7f8fe47b3bea1be0c3aec2b717c5ec1f3e03410b
|
||||
uses: JS-DevTools/npm-publish@4ce4bd0f334d5316473155078da1955d42148494
|
||||
with:
|
||||
package: tfhe/pkg/package.json
|
||||
dry-run: ${{ inputs.dry_run }}
|
||||
|
||||
10
.github/workflows/parameters_check.yml
vendored
10
.github/workflows/parameters_check.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
name: parameters_check/setup-instance
|
||||
if:
|
||||
(github.event_name == 'push' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
github.event_name == 'workflow_dispatch'
|
||||
github.event_name != 'push'
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
|
||||
@@ -38,7 +38,7 @@ jobs:
|
||||
- name: Start remote instance
|
||||
id: start-remote-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -60,7 +60,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
toolchain: stable
|
||||
|
||||
- name: Checkout lattice-estimator
|
||||
uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd
|
||||
with:
|
||||
repository: malb/lattice-estimator
|
||||
path: lattice_estimator
|
||||
@@ -137,7 +137,7 @@ jobs:
|
||||
- name: Stop remote instance
|
||||
id: stop-instance
|
||||
if: env.SECRETS_AVAILABLE == 'true'
|
||||
uses: zama-ai/slab-github-runner@973c1d22702de8d0acd2b34e83404c96ed92c264 # v1.4.2
|
||||
uses: zama-ai/slab-github-runner@0a812986560d3f10dc65728b1ccb9ae4c48a8a16 # v1.5.1
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
67
.github/workflows/pr_milestone_check.yml
vendored
67
.github/workflows/pr_milestone_check.yml
vendored
@@ -1,67 +0,0 @@
|
||||
name: pr_milestone_check
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
types: [opened, edited, synchronize, reopened, milestoned, demilestoned]
|
||||
|
||||
permissions: {}
|
||||
|
||||
# zizmor: ignore[concurrency-limits] only Zama organization members can trigger this workflow
|
||||
# external contributors workflows are manually approved
|
||||
|
||||
jobs:
|
||||
check-empty-milestone:
|
||||
name: pr_milestone_check/check-empty-milestone
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.milestone == null
|
||||
permissions:
|
||||
pull-requests: write # Need write access on pull requests to post comment
|
||||
|
||||
steps:
|
||||
- name: Post Reminder Comment
|
||||
uses: octokit/request-action@dad4362715b7fb2ddedf9772c8670824af564f0d # v2.4.0
|
||||
with:
|
||||
route: POST /repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments
|
||||
body: |
|
||||
'### ❌ Milestone Missing
|
||||
|
||||
Please assign a milestone to this pull request. If your PR targets the next version of
|
||||
TFHE-rs please use the current quarter milestone, e.g. "Q1 26".
|
||||
|
||||
If your PR targets a patch version for previous releases: consider creating a dedicated
|
||||
milestone e.g. v1.5.1 if it does not exist yet.'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Check Final Status
|
||||
run: |
|
||||
echo "::error::Milestone is missing. This check is failing."
|
||||
exit 1
|
||||
|
||||
check-milestone-open:
|
||||
name: pr_milestone_check/check-milestone-open
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event.pull_request.milestone != null && github.event.pull_request.milestone.state == 'closed'
|
||||
permissions:
|
||||
pull-requests: write # Need write access on pull requests to post comment
|
||||
|
||||
steps:
|
||||
- name: Post Reminder Comment
|
||||
uses: octokit/request-action@dad4362715b7fb2ddedf9772c8670824af564f0d # v2.4.0
|
||||
with:
|
||||
route: POST /repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments
|
||||
body: |
|
||||
'### ❌ Milestone is closed
|
||||
|
||||
Please assign an open milestone to this pull request. If your PR targets the next version of
|
||||
TFHE-rs please use the current quarter milestone, e.g. "Q1 26".
|
||||
|
||||
If your PR targets a patch version for previous releases: consider creating a dedicated
|
||||
milestone e.g. v1.5.1 if it does not exist yet.'
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Check Final Status
|
||||
run: |
|
||||
echo "::error::Milestone is closed. This check is failing."
|
||||
exit 1
|
||||
8
.github/workflows/sync_on_push.yml
vendored
8
.github/workflows/sync_on_push.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
run: |
|
||||
echo ">>> Cloning source repo..."
|
||||
git lfs install
|
||||
git clone "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs --origin source && cd ./tfhe-rs
|
||||
git clone --quiet "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs --origin source && cd ./tfhe-rs
|
||||
git remote add destination "https://${USERNAME}:${TOKEN}@github.com/${DEST_REPO}.git"
|
||||
|
||||
echo ">>> Fetching all branches references down locally so subsequent commands can see them..."
|
||||
@@ -47,6 +47,8 @@ jobs:
|
||||
|
||||
echo ">>> Pushing all LFS items..."
|
||||
git lfs push --all destination "${DESTINATION_BRANCH}"
|
||||
|
||||
shred --remove .git/config
|
||||
|
||||
- name: git-sync-tags
|
||||
env:
|
||||
@@ -59,7 +61,7 @@ jobs:
|
||||
run: |
|
||||
echo ">>> Cloning source repo..."
|
||||
git lfs install
|
||||
git clone "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs-tag --origin source && cd ./tfhe-rs-tag
|
||||
git clone --quiet "https://${USERNAME}:${TOKEN}@github.com/${SOURCE_REPO}.git" ./tfhe-rs-tag --origin source && cd ./tfhe-rs-tag
|
||||
git remote add destination "https://${USERNAME}:${TOKEN}@github.com/${DEST_REPO}.git"
|
||||
|
||||
echo ">>> Fetching all branches references down locally so subsequent commands can see them..."
|
||||
@@ -70,3 +72,5 @@ jobs:
|
||||
|
||||
echo ">>> Pushing git changes..."
|
||||
git push destination "${SOURCE_BRANCH}:${DESTINATION_BRANCH}" -f
|
||||
|
||||
shred --remove .git/config
|
||||
|
||||
1
.github/workflows/unverified_prs.yml
vendored
1
.github/workflows/unverified_prs.yml
vendored
@@ -12,6 +12,7 @@ permissions: {}
|
||||
jobs:
|
||||
stale:
|
||||
name: unverified_prs/stale
|
||||
if: github.repository == 'zama-ai/tfhe-rs'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
issues: read # Needed to fetch all issues
|
||||
|
||||
81
Makefile
81
Makefile
@@ -29,8 +29,9 @@ WASM_PACK_VERSION="0.13.1"
|
||||
WASM_BINDGEN_VERSION:=$(shell cargo tree --target wasm32-unknown-unknown -e all --prefix none | grep "wasm-bindgen v" | head -n 1 | cut -d 'v' -f2)
|
||||
WEB_RUNNER_DIR=web-test-runner
|
||||
WEB_SERVER_DIR=tfhe/web_wasm_parallel_tests
|
||||
TYPOS_VERSION=1.39.0
|
||||
ZIZMOR_VERSION=1.16.2
|
||||
TAPLO_VERSION=0.10.0
|
||||
TYPOS_VERSION=1.42.0
|
||||
ZIZMOR_VERSION=1.20.0
|
||||
# This is done to avoid forgetting it, we still precise the RUSTFLAGS in the commands to be able to
|
||||
# copy paste the command in the terminal and change them if required without forgetting the flags
|
||||
export RUSTFLAGS?=-C target-cpu=native
|
||||
@@ -171,6 +172,10 @@ install_cargo_dylint:
|
||||
install_cargo_audit:
|
||||
cargo install --locked cargo-audit
|
||||
|
||||
.PHONY: install_taplo # Check Cargo.toml format
|
||||
install_taplo:
|
||||
@./scripts/install_taplo.sh --taplo-version $(TAPLO_VERSION)
|
||||
|
||||
.PHONY: install_typos_checker # Install typos checker
|
||||
install_typos_checker:
|
||||
@./scripts/install_typos.sh --typos-version $(TYPOS_VERSION)
|
||||
@@ -283,6 +288,10 @@ fmt_gpu: install_rs_check_toolchain
|
||||
fmt_c_tests:
|
||||
find tfhe/c_api_tests/ -regex '.*\.\(cpp\|hpp\|cu\|c\|h\)' -exec clang-format -style=file -i {} \;
|
||||
|
||||
.PHONY: fmt_toml # Format TOML files
|
||||
fmt_toml: install_taplo
|
||||
taplo fmt
|
||||
|
||||
.PHONY: check_fmt # Check rust code format
|
||||
check_fmt: install_rs_check_toolchain
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check
|
||||
@@ -307,6 +316,11 @@ check_fmt_js: check_nvm_installed
|
||||
$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt && \
|
||||
$(MAKE) -C tfhe/js_on_wasm_tests check_fmt
|
||||
|
||||
.PHONY: check_fmt_toml # Check TOML files format
|
||||
check_fmt_toml: install_taplo
|
||||
@RUST_LOG=warn taplo fmt --check || \
|
||||
echo "TOML files format check failed. Please run 'make fmt_toml'"
|
||||
|
||||
.PHONY: check_typos # Check for typos in codebase
|
||||
check_typos: install_typos_checker
|
||||
@typos && echo "No typos found"
|
||||
@@ -719,11 +733,12 @@ test_core_crypto_gpu:
|
||||
--features=gpu -p tfhe -- core_crypto::gpu::
|
||||
|
||||
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_gpu:
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p tfhe -- integer::gpu::server_key:: --test-threads=2
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p tfhe -- integer::gpu::server_key:: --test-threads=4
|
||||
test_integer_gpu: install_cargo_nextest
|
||||
TEST_THREADS=2 \
|
||||
DOCTEST_THREADS=4 \
|
||||
./scripts/integer-tests.sh \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
|
||||
--tfhe-package "tfhe" --all-but-noise
|
||||
|
||||
.PHONY: test_integer_gpu_debug # Run the tests of the integer module with Debug flags for CUDA
|
||||
test_integer_gpu_debug:
|
||||
@@ -1035,10 +1050,16 @@ test_high_level_api:
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p tfhe \
|
||||
-- high_level_api::
|
||||
|
||||
test_high_level_api_gpu: install_cargo_nextest
|
||||
test_high_level_api_gpu_fast: install_cargo_nextest # Run all the GPU tests for high_level_api except test_uniformity for oprf which is too long
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
-E "test(/high_level_api::.*gpu.*/) and not test(/uniformity/)"
|
||||
|
||||
|
||||
test_high_level_api_gpu: install_cargo_nextest # Run all the GPU tests for high_level_api
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p tfhe \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
test_list_gpu: install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo nextest list --cargo-profile $(CARGO_PROFILE) \
|
||||
@@ -1357,6 +1378,9 @@ clippy_bench: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=shortint,internal-keycache \
|
||||
-p tfhe-benchmark -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_bench_gpu # Run clippy lints on tfhe-benchmark
|
||||
clippy_bench_gpu: install_rs_check_toolchain
|
||||
@@ -1391,14 +1415,14 @@ bench_signed_integer: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
|
||||
bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_signed_integer_gpu # Run benchmarks for signed integer on GPU backend
|
||||
bench_signed_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=$(BENCH_PARAM_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
@@ -1435,7 +1459,7 @@ bench_integer_compression_128b_gpu: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_zk_gpu
|
||||
bench_integer_zk_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,gpu,pbs-stats,zk-pok -p tfhe-benchmark --profile release_lto_off --
|
||||
@@ -1461,6 +1485,13 @@ bench_integer_trivium_gpu: install_rs_check_toolchain
|
||||
--bench integer-trivium \
|
||||
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_integer_kreyvium_gpu # Run benchmarks for kreyvium on GPU backend
|
||||
bench_integer_kreyvium_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-kreyvium \
|
||||
--features=integer,internal-keycache,gpu, -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_PARAM_TYPE=MULTI_BIT __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
@@ -1495,7 +1526,7 @@ bench_signed_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-zk-pke \
|
||||
--features=integer,internal-keycache,zk-pok,pbs-stats \
|
||||
@@ -1641,11 +1672,18 @@ bench_web_js_api_unsafe_coop_firefox_ci: setup_venv
|
||||
nvm use $(NODE_VERSION) && \
|
||||
$(MAKE) bench_web_js_api_unsafe_coop_firefox
|
||||
|
||||
.PHONY: bench_hlapi # Run benchmarks for integer operations
|
||||
bench_hlapi: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) \
|
||||
.PHONY: bench_hlapi_unsigned # Run benchmarks for integer operations
|
||||
bench_hlapi_unsigned: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi \
|
||||
--bench hlapi_unsigned \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_signed # Run benchmarks for signed integer operations
|
||||
bench_hlapi_signed: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_BIT_SIZES_SET=$(BIT_SIZES_SET) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi_signed \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
.PHONY: bench_hlapi_gpu # Run benchmarks for integer operations on GPU
|
||||
@@ -1735,6 +1773,13 @@ bench_hlapi_noise_squash_gpu: install_rs_check_toolchain
|
||||
--bench hlapi-noise-squash \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats -p tfhe-benchmark --profile release_lto_off --
|
||||
|
||||
.PHONY: bench_hlapi_kvstore # Run benchmarks for Key-Value Store operations
|
||||
bench_hlapi_kvstore: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-kvstore \
|
||||
--features=integer,internal-keycache,pbs-stats -p tfhe-benchmark --
|
||||
|
||||
|
||||
.PHONY: bench_custom # Run benchmarks with a user-defined command
|
||||
bench_custom: install_rs_check_toolchain
|
||||
@@ -1846,6 +1891,7 @@ pcc_batch_1:
|
||||
$(call run_recipe_with_details,no_dbg_log)
|
||||
$(call run_recipe_with_details,check_parameter_export_ok)
|
||||
$(call run_recipe_with_details,check_fmt)
|
||||
$(call run_recipe_with_details,check_fmt_toml)
|
||||
$(call run_recipe_with_details,check_typos)
|
||||
$(call run_recipe_with_details,lint_doc)
|
||||
$(call run_recipe_with_details,check_md_docs_are_tested)
|
||||
@@ -1921,6 +1967,7 @@ fpcc:
|
||||
$(call run_recipe_with_details,no_dbg_log)
|
||||
$(call run_recipe_with_details,check_parameter_export_ok)
|
||||
$(call run_recipe_with_details,check_fmt)
|
||||
$(call run_recipe_with_details,check_fmt_toml)
|
||||
$(call run_recipe_with_details,check_typos)
|
||||
$(call run_recipe_with_details,lint_doc)
|
||||
$(call run_recipe_with_details,check_md_docs_are_tested)
|
||||
|
||||
@@ -129,7 +129,7 @@ Other sizes than 64 bit are expected to be available in the future.
|
||||
|
||||
# FHE shortint Trivium implementation
|
||||
|
||||
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
|
||||
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
|
||||
It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
|
||||
on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
|
||||
its setup a little more intricate.
|
||||
@@ -138,9 +138,9 @@ Example code:
|
||||
```rust
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::shortint::parameters::current_params::{
|
||||
V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::{ConfigBuilder, generate_keys, FheUint64};
|
||||
use tfhe::prelude::*;
|
||||
@@ -148,17 +148,17 @@ use tfhe_trivium::TriviumStreamShortint;
|
||||
|
||||
fn test_shortint() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::current_params::{
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
@@ -11,19 +11,19 @@ use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
|
||||
|
||||
pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -64,19 +64,19 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
|
||||
pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -112,19 +112,19 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
|
||||
pub fn kreyvium_shortint_trans(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::current_params::{
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
@@ -11,19 +11,19 @@ use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
|
||||
|
||||
pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -64,19 +64,19 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
|
||||
pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -112,19 +112,19 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
|
||||
pub fn trivium_shortint_trans(c: &mut Criterion) {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::current_params::{
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo renaud1239/Kreyvium,
|
||||
@@ -221,19 +221,19 @@ use tfhe::shortint::prelude::*;
|
||||
#[test]
|
||||
fn kreyvium_test_shortint_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -7,7 +7,7 @@ use tfhe::prelude::*;
|
||||
use tfhe::shortint::Ciphertext;
|
||||
use tfhe::{set_server_key, unset_server_key, FheUint64, FheUint8, ServerKey};
|
||||
|
||||
/// Triat specifying the interface for trans ciphering a FheUint64 object. Since it is meant
|
||||
/// Trait specifying the interface for trans ciphering a FheUint64 object. Since it is meant
|
||||
/// to be used with stream ciphers, encryption and decryption are by default the same.
|
||||
pub trait TransCiphering {
|
||||
fn trans_encrypt_64(&mut self, cipher: FheUint64) -> FheUint64;
|
||||
|
||||
@@ -1,9 +1,9 @@
|
||||
use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::current_params::{
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
|
||||
};
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
|
||||
@@ -357,19 +357,19 @@ use tfhe::shortint::prelude::*;
|
||||
#[test]
|
||||
fn trivium_test_shortint_long() {
|
||||
let config = ConfigBuilder::default()
|
||||
.use_custom_parameters(V1_5_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.use_custom_parameters(V1_6_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
|
||||
.build();
|
||||
let (hl_client_key, hl_server_key) = generate_keys(config);
|
||||
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V1_5_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
gen_keys(V1_6_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V1_5_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
V1_6_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -87,6 +87,7 @@ fn main() {
|
||||
"cuda/include/integer/rerand.h",
|
||||
"cuda/include/aes/aes.h",
|
||||
"cuda/include/trivium/trivium.h",
|
||||
"cuda/include/kreyvium/kreyvium.h",
|
||||
"cuda/include/zk/zk.h",
|
||||
"cuda/include/keyswitch/keyswitch.h",
|
||||
"cuda/include/keyswitch/ks_enums.h",
|
||||
|
||||
@@ -34,7 +34,7 @@ template <typename Torus> struct int_aes_lut_buffers {
|
||||
SBOX_MAX_AND_GATES * num_aes_inputs * sbox_parallelism,
|
||||
params.pbs_type);
|
||||
this->and_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams_and_lut, {0}, {and_lambda}, allocate_gpu_memory);
|
||||
active_streams_and_lut, {0}, {and_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->and_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
|
||||
@@ -48,7 +48,7 @@ template <typename Torus> struct int_aes_lut_buffers {
|
||||
auto active_streams_flush_lut = streams.active_gpu_subset(
|
||||
AES_STATE_BITS * num_aes_inputs, params.pbs_type);
|
||||
this->flush_lut->generate_and_broadcast_lut(
|
||||
active_streams_flush_lut, {0}, {flush_lambda}, allocate_gpu_memory);
|
||||
active_streams_flush_lut, {0}, {flush_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
this->flush_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
|
||||
this->carry_lut = new int_radix_lut<Torus>(
|
||||
@@ -60,7 +60,7 @@ template <typename Torus> struct int_aes_lut_buffers {
|
||||
auto active_streams_carry_lut =
|
||||
streams.active_gpu_subset(num_aes_inputs, params.pbs_type);
|
||||
this->carry_lut->generate_and_broadcast_lut(
|
||||
active_streams_carry_lut, {0}, {carry_lambda}, allocate_gpu_memory);
|
||||
active_streams_carry_lut, {0}, {carry_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
this->carry_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
}
|
||||
|
||||
|
||||
@@ -10,11 +10,7 @@ extern std::mutex m;
|
||||
extern bool p2p_enabled;
|
||||
extern const int THRESHOLD_MULTI_GPU_WITH_MULTI_BIT_PARAMS;
|
||||
extern const int THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS;
|
||||
|
||||
extern "C" {
|
||||
int32_t cuda_setup_multi_gpu(int device_0_id);
|
||||
}
|
||||
|
||||
extern const int THRESHOLD_MULTI_GPU_WITH_CLASSICAL_PARAMS_U128;
|
||||
// Define a variant type that can be either a vector or a single pointer
|
||||
template <typename Torus>
|
||||
using LweArrayVariant = std::variant<std::vector<Torus *>, Torus *>;
|
||||
@@ -42,6 +38,8 @@ get_variant_element(const std::variant<std::vector<Torus>, Torus> &variant,
|
||||
|
||||
uint32_t get_active_gpu_count(uint32_t num_inputs, uint32_t gpu_count,
|
||||
PBS_TYPE pbs_type);
|
||||
uint32_t get_active_gpu_count_u128(uint32_t num_inputs, uint32_t gpu_count,
|
||||
PBS_TYPE pbs_type);
|
||||
|
||||
int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);
|
||||
|
||||
@@ -70,7 +68,7 @@ public:
|
||||
// Construct an empty set. Invalid use of an empty set should raise an error
|
||||
// right away through asserts or because of a nullptr dereference
|
||||
CudaStreams()
|
||||
: _streams(nullptr), _gpu_indexes(nullptr), _gpu_count((uint32_t)-1),
|
||||
: _streams(nullptr), _gpu_indexes(nullptr), _gpu_count(0),
|
||||
_owns_streams(false) {}
|
||||
|
||||
// Returns a subset of this set as an active subset. An active subset is one
|
||||
@@ -80,7 +78,15 @@ public:
|
||||
_streams, _gpu_indexes,
|
||||
get_active_gpu_count(num_radix_blocks, _gpu_count, pbs_type));
|
||||
}
|
||||
|
||||
// Returns a subset of this set as an active subset for pbs128. An active
|
||||
// subset is one that is temporarily used to perform some computation. For
|
||||
// pbs128, the threshold is different, because the original threshold was
|
||||
// designed for 2_2 params.
|
||||
CudaStreams active_gpu_subset_u128(int num_radix_blocks, PBS_TYPE pbs_type) {
|
||||
return CudaStreams(
|
||||
_streams, _gpu_indexes,
|
||||
get_active_gpu_count_u128(num_radix_blocks, _gpu_count, pbs_type));
|
||||
}
|
||||
// Returns a CudaStreams struct containing only the ith stream
|
||||
CudaStreams get_ith(int i) const {
|
||||
return CudaStreams(&_streams[i], &_gpu_indexes[i], 1);
|
||||
@@ -114,11 +120,13 @@ public:
|
||||
// streams on the same GPU
|
||||
void create_on_same_gpus(const CudaStreams &other) {
|
||||
PANIC_IF_FALSE(_streams == nullptr,
|
||||
"Assign clone to non-empty cudastreams");
|
||||
"Cuda error: Assign clone to non-empty CudaStreams");
|
||||
PANIC_IF_FALSE(_gpu_count <= 8,
|
||||
"Cuda error: GPU count should be in the interval [0, 8]");
|
||||
|
||||
cudaStream_t *new_streams = new cudaStream_t[other._gpu_count];
|
||||
|
||||
uint32_t *gpu_indexes_clone = new uint32_t[_gpu_count];
|
||||
uint32_t *gpu_indexes_clone = new uint32_t[other._gpu_count];
|
||||
for (uint32_t i = 0; i < other._gpu_count; ++i) {
|
||||
new_streams[i] = cuda_create_stream(other._gpu_indexes[i]);
|
||||
gpu_indexes_clone[i] = other._gpu_indexes[i];
|
||||
@@ -142,9 +150,9 @@ public:
|
||||
_gpu_count(src._gpu_count), _owns_streams(false) {}
|
||||
|
||||
CudaStreams &operator=(CudaStreams const &other) {
|
||||
PANIC_IF_FALSE(this->_streams == nullptr ||
|
||||
this->_streams == other._streams,
|
||||
"Assigning an already initialized CudaStreams");
|
||||
/* PANIC_IF_FALSE(this->_streams == nullptr ||
|
||||
this->_streams == other._streams,
|
||||
"Assigning an already initialized CudaStreams");*/
|
||||
this->_streams = other._streams;
|
||||
this->_gpu_indexes = other._gpu_indexes;
|
||||
this->_gpu_count = other._gpu_count;
|
||||
@@ -170,6 +178,7 @@ public:
|
||||
_streams = nullptr;
|
||||
delete[] _gpu_indexes;
|
||||
_gpu_indexes = nullptr;
|
||||
_gpu_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -483,4 +492,38 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
// Event pool for managing temporary CUDA events in scatter/gather operations
|
||||
struct CudaEventPool {
|
||||
private:
|
||||
std::vector<cudaEvent_t> _events;
|
||||
std::vector<uint32_t> _gpu_indices;
|
||||
|
||||
public:
|
||||
CudaEventPool() {}
|
||||
|
||||
// Requests a new event from the pool (creates and stores it)
|
||||
cudaEvent_t request_event(uint32_t gpu_index) {
|
||||
cudaEvent_t event = cuda_create_event(gpu_index);
|
||||
_events.push_back(event);
|
||||
_gpu_indices.push_back(gpu_index);
|
||||
return event;
|
||||
}
|
||||
|
||||
// Releases all pooled events
|
||||
// This should always be called in the release of the LUT, so streams
|
||||
// are already synchronized
|
||||
void release() {
|
||||
for (size_t i = 0; i < _events.size(); i++) {
|
||||
cuda_event_destroy(_events[i], _gpu_indices[i]);
|
||||
}
|
||||
_events.clear();
|
||||
_gpu_indices.clear();
|
||||
}
|
||||
|
||||
~CudaEventPool() {
|
||||
GPU_ASSERT(_events.empty(),
|
||||
"CudaEventPool: must call release before destruction");
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@@ -45,12 +45,9 @@ template <typename Torus> struct boolean_bitop_buffer {
|
||||
|
||||
// BooleanBlock can have degree 0 or 1. when ct is 0 path is hardcoded,
|
||||
// only lut for degree = 1 is generated
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), lut->get_lut(0, 0),
|
||||
lut->get_degree(0), lut->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_bivariate_f, 2, gpu_memory_allocated);
|
||||
lut->broadcast_lut(active_streams);
|
||||
lut->generate_and_broadcast_bivariate_lut(active_streams, {0},
|
||||
{lut_bivariate_f},
|
||||
LUT_0_FOR_ALL_BLOCKS, {}, 2);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -66,7 +63,7 @@ template <typename Torus> struct boolean_bitop_buffer {
|
||||
};
|
||||
|
||||
message_extract_lut->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
|
||||
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
tmp_lwe_left = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
@@ -137,7 +134,7 @@ template <typename Torus> struct int_bitop_buffer {
|
||||
};
|
||||
|
||||
lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {lut_bivariate_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {lut_bivariate_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -169,7 +166,7 @@ template <typename Torus> struct int_bitop_buffer {
|
||||
}
|
||||
|
||||
lut->generate_and_broadcast_lut(active_streams, lut_indices, lut_funcs,
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -206,7 +203,7 @@ template <typename Torus> struct boolean_bitnot_buffer {
|
||||
streams.active_gpu_subset(lwe_ciphertext_count, params.pbs_type);
|
||||
|
||||
message_extract_lut->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
|
||||
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ template <typename Torus> struct int_extend_radix_with_sign_msb_buffer {
|
||||
const auto sign_bit = (xm >> (bits_per_block - 1)) & 1;
|
||||
return (Torus)((msg_modulus - 1) * sign_bit);
|
||||
}},
|
||||
allocate_gpu_memory);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->last_block = new CudaRadixCiphertextFFI;
|
||||
|
||||
|
||||
@@ -85,30 +85,28 @@ template <typename Torus> struct int_cmux_buffer {
|
||||
new int_radix_lut<Torus>(streams, params, 1, num_radix_blocks,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
Torus *h_lut_indexes = predicate_lut->h_lut_indexes;
|
||||
for (int index = 0; index < 2 * num_radix_blocks; index++) {
|
||||
if (index < num_radix_blocks) {
|
||||
h_lut_indexes[index] = 0;
|
||||
} else {
|
||||
h_lut_indexes[index] = 1;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
predicate_lut->get_lut_indexes(0, 0), h_lut_indexes,
|
||||
2 * num_radix_blocks * sizeof(Torus), streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
|
||||
auto active_streams_pred =
|
||||
streams.active_gpu_subset(2 * num_radix_blocks, params.pbs_type);
|
||||
auto lut_index_generator = [num_radix_blocks](Torus *h_lut_indexes,
|
||||
uint32_t num_indexes) {
|
||||
for (int index = 0; index < 2 * num_radix_blocks; index++) {
|
||||
if (index < num_radix_blocks) {
|
||||
h_lut_indexes[index] = 0;
|
||||
} else {
|
||||
h_lut_indexes[index] = 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
predicate_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams_pred, {0, 1}, {inverted_lut_f, lut_f},
|
||||
gpu_memory_allocated);
|
||||
lut_index_generator);
|
||||
|
||||
auto active_streams_msg =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
|
||||
message_extract_lut->generate_and_broadcast_lut(
|
||||
active_streams_msg, {0}, {message_extract_lut_f}, gpu_memory_allocated);
|
||||
active_streams_msg, {0}, {message_extract_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
@@ -28,7 +28,7 @@ template <typename Torus> struct int_are_all_block_true_buffer {
|
||||
Torus total_modulus = params.message_modulus * params.carry_modulus;
|
||||
uint32_t max_value = (total_modulus - 1) / (params.message_modulus - 1);
|
||||
|
||||
int max_chunks = (num_radix_blocks + max_value - 1) / max_value;
|
||||
int max_chunks = CEIL_DIV(num_radix_blocks, max_value);
|
||||
tmp_out = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), tmp_out, num_radix_blocks,
|
||||
@@ -53,7 +53,7 @@ template <typename Torus> struct int_are_all_block_true_buffer {
|
||||
};
|
||||
|
||||
is_max_value->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {is_max_value_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {is_max_value_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
@@ -105,7 +105,7 @@ template <typename Torus> struct int_comparison_eq_buffer {
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
is_non_zero_lut->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {is_non_zero_lut_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {is_non_zero_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
// Scalar may have up to num_radix_blocks blocks
|
||||
scalar_comparison_luts = new int_radix_lut<Torus>(
|
||||
@@ -135,7 +135,7 @@ template <typename Torus> struct int_comparison_eq_buffer {
|
||||
}
|
||||
|
||||
scalar_comparison_luts->generate_and_broadcast_lut(
|
||||
active_streams, lut_indices, lut_funcs, gpu_memory_allocated);
|
||||
active_streams, lut_indices, lut_funcs, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
if (op == COMPARISON_TYPE::EQ || op == COMPARISON_TYPE::NE) {
|
||||
operator_lut =
|
||||
@@ -143,8 +143,7 @@ template <typename Torus> struct int_comparison_eq_buffer {
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
operator_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {operator_f}, gpu_memory_allocated);
|
||||
// operator_lut->broadcast_lut(active_streams);
|
||||
active_streams, {0}, {operator_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
} else {
|
||||
operator_lut = nullptr;
|
||||
}
|
||||
@@ -228,7 +227,7 @@ template <typename Torus> struct int_tree_sign_reduction_buffer {
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
tree_inner_leaf_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {block_selector_f}, allocate_gpu_memory);
|
||||
active_streams, {0}, {block_selector_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
@@ -413,7 +412,7 @@ template <typename Torus> struct int_comparison_buffer {
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
identity_lut->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {identity_lut_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {identity_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
uint32_t total_modulus = params.message_modulus * params.carry_modulus;
|
||||
auto is_zero_f = [total_modulus](Torus x) -> Torus {
|
||||
@@ -424,7 +423,7 @@ template <typename Torus> struct int_comparison_buffer {
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
is_zero_lut->generate_and_broadcast_lut(active_streams, {0}, {is_zero_f},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
switch (op) {
|
||||
case COMPARISON_TYPE::MAX:
|
||||
@@ -501,7 +500,7 @@ template <typename Torus> struct int_comparison_buffer {
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
signed_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {signed_lut_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {signed_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
preallocated_h_lut = (Torus *)malloc(
|
||||
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus));
|
||||
|
||||
@@ -11,16 +11,26 @@ template <typename Torus> struct int_compression {
|
||||
Torus *tmp_glwe_array_out;
|
||||
bool gpu_memory_allocated;
|
||||
uint32_t lwe_per_glwe;
|
||||
uint32_t max_num_glwes;
|
||||
|
||||
// num_radix_blocks: total number of LWE ciphertexts (radix blocks) to
|
||||
// compress lwe_per_glwe: max LWEs packed per GLWE (= polynomial_size),
|
||||
// defined by the chosen parameter set
|
||||
int_compression(CudaStreams streams, int_radix_params compression_params,
|
||||
uint32_t num_radix_blocks, uint32_t lwe_per_glwe,
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker) {
|
||||
gpu_memory_allocated = allocate_gpu_memory;
|
||||
this->compression_params = compression_params;
|
||||
this->lwe_per_glwe = lwe_per_glwe;
|
||||
|
||||
uint64_t glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
|
||||
compression_params.polynomial_size;
|
||||
|
||||
// Calculate the actual number of GLWEs needed based on total radix blocks.
|
||||
// This ensures we allocate enough memory when num_radix_blocks >
|
||||
// lwe_per_glwe.
|
||||
max_num_glwes = CEIL_DIV(num_radix_blocks, lwe_per_glwe);
|
||||
|
||||
tmp_lwe = static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
|
||||
num_radix_blocks * (compression_params.small_lwe_dimension + 1) *
|
||||
sizeof(Torus),
|
||||
@@ -28,7 +38,7 @@ template <typename Torus> struct int_compression {
|
||||
allocate_gpu_memory));
|
||||
tmp_glwe_array_out =
|
||||
static_cast<Torus *>(cuda_malloc_with_size_tracking_async(
|
||||
lwe_per_glwe * glwe_accumulator_size * sizeof(Torus),
|
||||
max_num_glwes * glwe_accumulator_size * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), size_tracker,
|
||||
allocate_gpu_memory));
|
||||
|
||||
@@ -106,19 +116,13 @@ template <typename Torus> struct int_decompression {
|
||||
encryption_params.carry_modulus;
|
||||
auto effective_compression_carry_modulus = 1;
|
||||
|
||||
generate_device_accumulator_with_encoding<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
decompression_rescale_lut->get_lut(0, 0),
|
||||
decompression_rescale_lut->get_degree(0),
|
||||
decompression_rescale_lut->get_max_degree(0),
|
||||
encryption_params.glwe_dimension, encryption_params.polynomial_size,
|
||||
effective_compression_message_modulus,
|
||||
effective_compression_carry_modulus,
|
||||
encryption_params.message_modulus, encryption_params.carry_modulus,
|
||||
decompression_rescale_f, gpu_memory_allocated);
|
||||
auto active_streams = streams.active_gpu_subset(
|
||||
num_blocks_to_decompress, decompression_rescale_lut->params.pbs_type);
|
||||
decompression_rescale_lut->broadcast_lut(active_streams);
|
||||
decompression_rescale_lut->generate_and_broadcast_lut_with_encoding(
|
||||
active_streams, {0}, {decompression_rescale_f},
|
||||
effective_compression_message_modulus,
|
||||
effective_compression_carry_modulus,
|
||||
encryption_params.message_modulus, encryption_params.carry_modulus);
|
||||
}
|
||||
}
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
@@ -285,7 +285,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
for (int j = 0; j < 2; j++) {
|
||||
luts[j]->generate_and_broadcast_lut(streams.get_ith(lut_gpu_indexes[j]),
|
||||
{0}, {zero_out_if_not_1_lut_f},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
luts[0] = zero_out_if_not_2_lut_1;
|
||||
@@ -295,7 +295,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
for (int j = 0; j < 2; j++) {
|
||||
luts[j]->generate_and_broadcast_lut(streams.get_ith(lut_gpu_indexes[j]),
|
||||
{0}, {zero_out_if_not_2_lut_f},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
quotient_lut_1 =
|
||||
@@ -316,11 +316,11 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
auto quotient_lut_3_f = [](Torus cond) -> Torus { return cond * 3; };
|
||||
|
||||
quotient_lut_1->generate_and_broadcast_lut(
|
||||
streams.get_ith(2), {0}, {quotient_lut_1_f}, gpu_memory_allocated);
|
||||
streams.get_ith(2), {0}, {quotient_lut_1_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
quotient_lut_2->generate_and_broadcast_lut(
|
||||
streams.get_ith(1), {0}, {quotient_lut_2_f}, gpu_memory_allocated);
|
||||
streams.get_ith(1), {0}, {quotient_lut_2_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
quotient_lut_3->generate_and_broadcast_lut(
|
||||
streams.get_ith(0), {0}, {quotient_lut_3_f}, gpu_memory_allocated);
|
||||
streams.get_ith(0), {0}, {quotient_lut_3_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
message_extract_lut_1 = new int_radix_lut<Torus>(
|
||||
streams, params, 1, num_blocks, allocate_gpu_memory, size_tracker);
|
||||
@@ -340,7 +340,7 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
|
||||
for (int j = 0; j < 2; j++) {
|
||||
luts[j]->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
|
||||
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -485,29 +485,35 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
(Torus *)cuda_malloc_with_size_tracking_async(
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
size_tracker, allocate_gpu_memory);
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
bool is_last_index = (index == (nb - 1));
|
||||
if (is_last_index) {
|
||||
if (nb == 1) {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
|
||||
auto index_generator = [nb, group_size](Torus *h_lut_indexes, uint32_t) {
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
bool is_last_index = (index == (nb - 1));
|
||||
if (is_last_index) {
|
||||
if (nb == 1) {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2;
|
||||
}
|
||||
} else if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2;
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
} else if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else {
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
first_indexes_for_overflow_sub_gpu_0[nb - 1], h_lut_indexes,
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
allocate_gpu_memory);
|
||||
};
|
||||
|
||||
generate_lut_indexes<Torus>(streams, index_generator,
|
||||
first_indexes_for_overflow_sub_gpu_0[nb - 1],
|
||||
nb, 2 * group_size + 1, h_lut_indexes,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
// Extra indexes for the luts in second step
|
||||
uint32_t num_extra_luts = use_seq ? (group_size - 1) : 1;
|
||||
uint32_t num_luts_second_step = 2 * group_size + num_extra_luts;
|
||||
for (int nb = 1; nb <= num_blocks; nb++) {
|
||||
second_indexes_for_overflow_sub_gpu_0[nb - 1] =
|
||||
(Torus *)cuda_malloc_with_size_tracking_async(
|
||||
@@ -518,24 +524,37 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
size_tracker, allocate_gpu_memory);
|
||||
|
||||
auto index_generator = [nb, group_size, use_seq](Torus *h_lut_indexes,
|
||||
uint32_t) {
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
|
||||
if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else if (index_in_grouping == (group_size - 1)) {
|
||||
if (use_seq) {
|
||||
int inner_index = (grouping_index - 1) % (group_size - 1);
|
||||
h_lut_indexes[index] = inner_index + 2 * group_size;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
}
|
||||
} else {
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
generate_lut_indexes<Torus>(streams, index_generator,
|
||||
second_indexes_for_overflow_sub_gpu_0[nb - 1],
|
||||
nb, num_luts_second_step, h_lut_indexes,
|
||||
allocate_gpu_memory);
|
||||
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
|
||||
if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else if (index_in_grouping == (group_size - 1)) {
|
||||
if (use_seq) {
|
||||
int inner_index = (grouping_index - 1) % (group_size - 1);
|
||||
h_lut_indexes[index] = inner_index + 2 * group_size;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
}
|
||||
} else {
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
|
||||
bool may_have_its_padding_bit_set =
|
||||
!is_in_first_grouping && (index_in_grouping == group_size - 1);
|
||||
|
||||
@@ -549,10 +568,6 @@ template <typename Torus> struct unsigned_int_div_rem_2_2_memory {
|
||||
h_scalar[index] = 0;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
second_indexes_for_overflow_sub_gpu_0[nb - 1], h_lut_indexes,
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
allocate_gpu_memory);
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
scalars_for_overflow_sub_gpu_0[nb - 1], h_scalar, nb * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
@@ -991,12 +1006,12 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
|
||||
auto active_streams_1 = streams.active_gpu_subset(1, params.pbs_type);
|
||||
masking_luts_1[i]->generate_and_broadcast_lut(
|
||||
active_streams_1, {0}, {lut_f_masking}, gpu_memory_allocated);
|
||||
active_streams_1, {0}, {lut_f_masking}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
auto active_streams_2 =
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
masking_luts_2[i]->generate_and_broadcast_lut(
|
||||
active_streams_2, {0}, {lut_f_masking}, gpu_memory_allocated);
|
||||
active_streams_2, {0}, {lut_f_masking}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
// create and generate message_extract_lut_1 and message_extract_lut_2
|
||||
@@ -1019,7 +1034,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type);
|
||||
for (int j = 0; j < 2; j++) {
|
||||
luts[j]->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {lut_f_message_extract}, gpu_memory_allocated);
|
||||
active_streams, {0}, {lut_f_message_extract}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
// Give name to closures to improve readability
|
||||
@@ -1045,24 +1060,14 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
}
|
||||
};
|
||||
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
zero_out_if_overflow_did_not_happen[0]->get_lut(0, 0),
|
||||
zero_out_if_overflow_did_not_happen[0]->get_degree(0),
|
||||
zero_out_if_overflow_did_not_happen[0]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, cur_lut_f, params.message_modulus - 2,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_did_not_happen[0]->broadcast_lut(active_streams);
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
zero_out_if_overflow_did_not_happen[1]->get_lut(0, 0),
|
||||
zero_out_if_overflow_did_not_happen[1]->get_degree(0),
|
||||
zero_out_if_overflow_did_not_happen[1]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, cur_lut_f, params.message_modulus - 1,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_did_not_happen[1]->broadcast_lut(active_streams);
|
||||
zero_out_if_overflow_did_not_happen[0]
|
||||
->generate_and_broadcast_bivariate_lut(active_streams, {0}, {cur_lut_f},
|
||||
LUT_0_FOR_ALL_BLOCKS, {},
|
||||
params.message_modulus - 2);
|
||||
zero_out_if_overflow_did_not_happen[1]
|
||||
->generate_and_broadcast_bivariate_lut(active_streams, {0}, {cur_lut_f},
|
||||
LUT_0_FOR_ALL_BLOCKS, {},
|
||||
params.message_modulus - 1);
|
||||
|
||||
// create and generate zero_out_if_overflow_happened
|
||||
zero_out_if_overflow_happened = new int_radix_lut<Torus> *[2];
|
||||
@@ -1079,24 +1084,12 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
}
|
||||
};
|
||||
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
zero_out_if_overflow_happened[0]->get_lut(0, 0),
|
||||
zero_out_if_overflow_happened[0]->get_degree(0),
|
||||
zero_out_if_overflow_happened[0]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, overflow_happened_f, params.message_modulus - 2,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_happened[0]->broadcast_lut(active_streams);
|
||||
generate_device_accumulator_bivariate_with_factor<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
zero_out_if_overflow_happened[1]->get_lut(0, 0),
|
||||
zero_out_if_overflow_happened[1]->get_degree(0),
|
||||
zero_out_if_overflow_happened[1]->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, overflow_happened_f, params.message_modulus - 1,
|
||||
gpu_memory_allocated);
|
||||
zero_out_if_overflow_happened[1]->broadcast_lut(active_streams);
|
||||
zero_out_if_overflow_happened[0]->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {overflow_happened_f}, LUT_0_FOR_ALL_BLOCKS, {},
|
||||
params.message_modulus - 2);
|
||||
zero_out_if_overflow_happened[1]->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {overflow_happened_f}, LUT_0_FOR_ALL_BLOCKS, {},
|
||||
params.message_modulus - 1);
|
||||
|
||||
// merge_overflow_flags_luts
|
||||
merge_overflow_flags_luts = new int_radix_lut<Torus> *[num_bits_in_message];
|
||||
@@ -1111,7 +1104,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
streams, params, 1, 1, allocate_gpu_memory, size_tracker);
|
||||
|
||||
merge_overflow_flags_luts[i]->generate_and_broadcast_bivariate_lut(
|
||||
active_gpu_count_for_bits, {0}, {lut_f_bit}, gpu_memory_allocated);
|
||||
active_gpu_count_for_bits, {0}, {lut_f_bit}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1183,29 +1176,34 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
(Torus *)cuda_malloc_with_size_tracking_async(
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
size_tracker, allocate_gpu_memory);
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
bool is_last_index = (index == (nb - 1));
|
||||
if (is_last_index) {
|
||||
if (nb == 1) {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
|
||||
auto index_generator = [nb, group_size](Torus *h_lut_indexes, uint32_t) {
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
bool is_last_index = (index == (nb - 1));
|
||||
if (is_last_index) {
|
||||
if (nb == 1) {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2;
|
||||
}
|
||||
} else if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2;
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
} else if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else {
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
first_indexes_for_overflow_sub[nb - 1], h_lut_indexes,
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
allocate_gpu_memory);
|
||||
};
|
||||
|
||||
generate_lut_indexes<Torus>(
|
||||
streams, index_generator, first_indexes_for_overflow_sub[nb - 1], nb,
|
||||
2 * group_size + 1, h_lut_indexes, allocate_gpu_memory);
|
||||
}
|
||||
// Extra indexes for the luts in second step
|
||||
uint32_t num_extra_luts = use_seq ? (group_size - 1) : 1;
|
||||
uint32_t num_luts_second_step = 2 * group_size + num_extra_luts;
|
||||
for (int nb = 1; nb <= num_blocks; nb++) {
|
||||
second_indexes_for_overflow_sub[nb - 1] =
|
||||
(Torus *)cuda_malloc_with_size_tracking_async(
|
||||
@@ -1216,24 +1214,36 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
size_tracker, allocate_gpu_memory);
|
||||
|
||||
auto index_generator = [nb, group_size, use_seq](Torus *h_lut_indexes,
|
||||
uint32_t) {
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
|
||||
if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else if (index_in_grouping == (group_size - 1)) {
|
||||
if (use_seq) {
|
||||
int inner_index = (grouping_index - 1) % (group_size - 1);
|
||||
h_lut_indexes[index] = inner_index + 2 * group_size;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
}
|
||||
} else {
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
generate_lut_indexes<Torus>(
|
||||
streams, index_generator, second_indexes_for_overflow_sub[nb - 1], nb,
|
||||
num_luts_second_step, h_lut_indexes, allocate_gpu_memory);
|
||||
|
||||
for (int index = 0; index < nb; index++) {
|
||||
uint32_t grouping_index = index / group_size;
|
||||
bool is_in_first_grouping = (grouping_index == 0);
|
||||
uint32_t index_in_grouping = index % group_size;
|
||||
|
||||
if (is_in_first_grouping) {
|
||||
h_lut_indexes[index] = index_in_grouping;
|
||||
} else if (index_in_grouping == (group_size - 1)) {
|
||||
if (use_seq) {
|
||||
int inner_index = (grouping_index - 1) % (group_size - 1);
|
||||
h_lut_indexes[index] = inner_index + 2 * group_size;
|
||||
} else {
|
||||
h_lut_indexes[index] = 2 * group_size;
|
||||
}
|
||||
} else {
|
||||
h_lut_indexes[index] = index_in_grouping + group_size;
|
||||
}
|
||||
|
||||
bool may_have_its_padding_bit_set =
|
||||
!is_in_first_grouping && (index_in_grouping == group_size - 1);
|
||||
|
||||
@@ -1247,10 +1257,6 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
h_scalar[index] = 0;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
second_indexes_for_overflow_sub[nb - 1], h_lut_indexes,
|
||||
nb * sizeof(Torus), streams.stream(0), streams.gpu_index(0),
|
||||
allocate_gpu_memory);
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
scalars_for_overflow_sub[nb - 1], h_scalar, nb * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
@@ -1525,7 +1531,7 @@ template <typename Torus> struct int_div_rem_memory {
|
||||
|
||||
compare_signed_bits_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_gpu_count_cmp, {0}, {f_compare_extracted_signed_bits},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ template <typename Torus> struct int_prepare_count_of_consecutive_bits_buffer {
|
||||
};
|
||||
|
||||
univ_lut_mem->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {generate_uni_lut_lambda}, allocate_gpu_memory);
|
||||
active_streams, {0}, {generate_uni_lut_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
auto generate_bi_lut_lambda =
|
||||
[num_bits](Torus block_num_bit_count,
|
||||
@@ -66,7 +66,7 @@ template <typename Torus> struct int_prepare_count_of_consecutive_bits_buffer {
|
||||
};
|
||||
|
||||
biv_lut_mem->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {generate_bi_lut_lambda}, allocate_gpu_memory);
|
||||
active_streams, {0}, {generate_bi_lut_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->tmp_ct = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
@@ -234,7 +234,7 @@ template <typename Torus> struct int_ilog2_buffer {
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(counter_num_blocks, params.pbs_type);
|
||||
lut_message_not->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {lut_message_lambda}, allocate_gpu_memory);
|
||||
active_streams, {0}, {lut_message_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->lut_carry_not =
|
||||
new int_radix_lut<Torus>(streams, params, 1, counter_num_blocks,
|
||||
@@ -245,7 +245,7 @@ template <typename Torus> struct int_ilog2_buffer {
|
||||
return (~carry) % this->params.message_modulus;
|
||||
};
|
||||
lut_carry_not->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {lut_carry_lambda}, allocate_gpu_memory);
|
||||
active_streams, {0}, {lut_carry_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->message_blocks_not = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -42,9 +42,7 @@ template <typename Torus> struct int_mul_memory {
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
zero_out_predicate_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {zero_out_predicate_lut_f},
|
||||
gpu_memory_allocated);
|
||||
|
||||
// zero_out_predicate_lut->broadcast_lut(active_streams);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
zero_out_mem = new int_zero_out_if_buffer<Torus>(
|
||||
streams, params, num_radix_blocks, allocate_gpu_memory, size_tracker);
|
||||
@@ -64,6 +62,10 @@ template <typename Torus> struct int_mul_memory {
|
||||
|
||||
int total_block_count = num_radix_blocks * num_radix_blocks;
|
||||
|
||||
GPU_ASSERT(lsb_vector_block_count + msb_vector_block_count ==
|
||||
total_block_count,
|
||||
"MSB and LSB vector block counts don't match");
|
||||
|
||||
// allocate memory for intermediate buffers
|
||||
vector_result_sb = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
@@ -85,8 +87,6 @@ template <typename Torus> struct int_mul_memory {
|
||||
// luts_array -> lut = {lsb_acc, msb_acc}
|
||||
luts_array = new int_radix_lut<Torus>(streams, params, 2, total_block_count,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
auto lsb_acc = luts_array->get_lut(0, 0);
|
||||
auto msb_acc = luts_array->get_lut(0, 1);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_lsb = [message_modulus](Torus x, Torus y) -> Torus {
|
||||
@@ -100,16 +100,16 @@ template <typename Torus> struct int_mul_memory {
|
||||
// first lsb_vector_block_count value should reference to lsb_acc
|
||||
// last msb_vector_block_count values should reference to msb_acc
|
||||
// for message and carry default lut_indexes_vec is fine
|
||||
if (allocate_gpu_memory)
|
||||
cuda_set_value_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
luts_array->get_lut_indexes(0, lsb_vector_block_count), 1,
|
||||
msb_vector_block_count);
|
||||
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(total_block_count, params.pbs_type);
|
||||
auto lut_index_generator = [lsb_vector_block_count](Torus *h_lut_indexes,
|
||||
uint32_t num_indexes) {
|
||||
for (uint32_t i = 0; i < num_indexes; i++) {
|
||||
h_lut_indexes[i] = (i < lsb_vector_block_count) ? 0 : 1;
|
||||
}
|
||||
};
|
||||
luts_array->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0, 1}, {lut_f_lsb, lut_f_msb}, gpu_memory_allocated);
|
||||
active_streams, {0, 1}, {lut_f_lsb, lut_f_msb}, lut_index_generator);
|
||||
|
||||
// create memory object for sum ciphertexts
|
||||
sum_ciphertexts_mem = new int_sum_ciphertexts_vec_memory<Torus>(
|
||||
|
||||
@@ -22,8 +22,7 @@ template <typename Torus> struct int_grouped_oprf_memory {
|
||||
uint32_t calculated_active_blocks =
|
||||
total_random_bits == 0
|
||||
? 0
|
||||
: (total_random_bits + message_bits_per_block - 1) /
|
||||
message_bits_per_block;
|
||||
: CEIL_DIV(total_random_bits, message_bits_per_block);
|
||||
if (num_blocks_to_process != calculated_active_blocks) {
|
||||
PANIC(
|
||||
"num_blocks_to_process should be equal to calculated_active_blocks");
|
||||
@@ -53,6 +52,10 @@ template <typename Torus> struct int_grouped_oprf_memory {
|
||||
|
||||
// Pre-generate all possible LUTs.
|
||||
//
|
||||
std::vector<std::function<Torus(Torus)>> lut_funcs;
|
||||
std::vector<uint32_t> lut_indices;
|
||||
std::vector<uint64_t> lut_degrees;
|
||||
|
||||
for (uint32_t random_bit = 1; random_bit <= message_bits_per_block;
|
||||
++random_bit) {
|
||||
uint64_t p = 1ULL << random_bit;
|
||||
@@ -70,14 +73,13 @@ template <typename Torus> struct int_grouped_oprf_memory {
|
||||
|
||||
uint64_t degree = 0;
|
||||
uint32_t lut_index = random_bit - 1;
|
||||
generate_device_accumulator_no_encoding<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), luts->get_lut(0, lut_index),
|
||||
degree, params.message_modulus, params.carry_modulus,
|
||||
params.glwe_dimension, params.polynomial_size, lut_f,
|
||||
allocate_gpu_memory);
|
||||
|
||||
lut_funcs.push_back(lut_f);
|
||||
lut_indices.push_back(lut_index);
|
||||
|
||||
// In OPRF the degree is hard set to p - 1 instead of the LUT degree
|
||||
degree = p - 1;
|
||||
*luts->get_degree(lut_index) = degree;
|
||||
lut_degrees.push_back(degree);
|
||||
}
|
||||
|
||||
// For each block, this loop determines the exact number of bits to generate
|
||||
@@ -102,10 +104,6 @@ template <typename Torus> struct int_grouped_oprf_memory {
|
||||
Torus plaintext_to_add = (p - 1) * delta / 2;
|
||||
|
||||
h_corrections[i * lwe_size + params.big_lwe_dimension] = plaintext_to_add;
|
||||
if (bits_for_this_block < 1) {
|
||||
PANIC("bits_for_this_block should be greater than 1");
|
||||
}
|
||||
this->h_lut_indexes[i] = bits_for_this_block - 1;
|
||||
|
||||
bits_processed += bits_for_this_block;
|
||||
}
|
||||
@@ -122,13 +120,35 @@ template <typename Torus> struct int_grouped_oprf_memory {
|
||||
|
||||
// Copy the prepared LUT indexes to the GPU 0, before broadcast to all other
|
||||
// GPUs.
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
luts->get_lut_indexes(0, 0), this->h_lut_indexes,
|
||||
num_blocks_to_process * sizeof(Torus), streams.stream(0),
|
||||
streams.gpu_index(0), allocate_gpu_memory);
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_blocks_to_process, params.pbs_type);
|
||||
luts->broadcast_lut(active_streams);
|
||||
// No encoding for these LUTS. Generate LUT also sets LUT degrees to default
|
||||
// values
|
||||
auto luts_index_generator = [total_random_bits, message_bits_per_block](
|
||||
Torus *h_lut_indexes, uint32_t num_blocks) {
|
||||
uint64_t bits_processed = 0;
|
||||
for (uint32_t i = 0; i < num_blocks; ++i) {
|
||||
if (total_random_bits <= bits_processed) {
|
||||
PANIC("total_random_bits should be greater than bits_processed");
|
||||
}
|
||||
uint64_t bits_remaining = total_random_bits - bits_processed;
|
||||
uint32_t bits_for_this_block =
|
||||
std::min((uint64_t)message_bits_per_block, bits_remaining);
|
||||
if (bits_for_this_block < 1) {
|
||||
PANIC("bits_for_this_block should be greater than 1");
|
||||
}
|
||||
h_lut_indexes[i] = bits_for_this_block - 1;
|
||||
bits_processed += bits_for_this_block;
|
||||
}
|
||||
};
|
||||
luts->generate_and_broadcast_lut(active_streams, lut_indices, lut_funcs,
|
||||
luts_index_generator, false, {},
|
||||
this->h_lut_indexes);
|
||||
|
||||
// OPRF requires custom LUT degrees
|
||||
for (uint32_t i = 0; i < lut_degrees.size(); ++i) {
|
||||
*luts->get_degree(i) = lut_degrees[i];
|
||||
}
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
free(h_corrections);
|
||||
@@ -170,8 +190,7 @@ template <typename Torus> struct int_grouped_oprf_custom_range_memory {
|
||||
this->allocate_gpu_memory = allocate_gpu_memory;
|
||||
|
||||
this->num_random_input_blocks =
|
||||
(num_input_random_bits + message_bits_per_block - 1) /
|
||||
message_bits_per_block;
|
||||
CEIL_DIV(num_input_random_bits, message_bits_per_block);
|
||||
|
||||
this->grouped_oprf_memory = new int_grouped_oprf_memory<Torus>(
|
||||
streams, params, this->num_random_input_blocks, message_bits_per_block,
|
||||
|
||||
@@ -89,7 +89,7 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
cur_lut_bivariate->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {shift_lut_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {shift_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
lut_buffers_bivariate.push_back(cur_lut_bivariate);
|
||||
}
|
||||
@@ -171,7 +171,7 @@ template <typename Torus> struct int_logical_scalar_shift_buffer {
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
cur_lut_bivariate->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {shift_lut_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {shift_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
lut_buffers_bivariate.push_back(cur_lut_bivariate);
|
||||
}
|
||||
}
|
||||
@@ -265,7 +265,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
streams.active_gpu_subset(1, params.pbs_type);
|
||||
shift_last_block_lut_univariate->generate_and_broadcast_lut(
|
||||
active_streams_shift_last, {0}, {last_block_lut_f},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
lut_buffers_univariate.push_back(shift_last_block_lut_univariate);
|
||||
}
|
||||
@@ -284,7 +284,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
};
|
||||
|
||||
padding_block_lut_univariate->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {padding_block_lut_f}, gpu_memory_allocated);
|
||||
active_streams, {0}, {padding_block_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
lut_buffers_univariate.push_back(padding_block_lut_univariate);
|
||||
|
||||
@@ -321,7 +321,7 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
shift_blocks_lut_bivariate->generate_and_broadcast_bivariate_lut(
|
||||
active_streams_shift_blocks, {0}, {blocks_lut_f},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
lut_buffers_bivariate.push_back(shift_blocks_lut_bivariate);
|
||||
}
|
||||
|
||||
@@ -113,12 +113,11 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
|
||||
else
|
||||
return current_bit;
|
||||
};
|
||||
;
|
||||
auto active_gpu_count_mux = streams.active_gpu_subset(
|
||||
bits_per_block * num_radix_blocks, params.pbs_type);
|
||||
|
||||
mux_lut->generate_and_broadcast_lut(active_gpu_count_mux, {0}, {mux_lut_f},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
auto cleaning_lut_f = [params](Torus x) -> Torus {
|
||||
return x % params.message_modulus;
|
||||
@@ -127,7 +126,7 @@ template <typename Torus> struct int_shift_and_rotate_buffer {
|
||||
auto active_gpu_count_cleaning =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
cleaning_lut->generate_and_broadcast_lut(
|
||||
active_gpu_count_cleaning, {0}, {cleaning_lut_f}, gpu_memory_allocated);
|
||||
active_gpu_count_cleaning, {0}, {cleaning_lut_f}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
@@ -74,26 +74,27 @@ template <typename Torus> struct int_overflowing_sub_memory {
|
||||
luts_array, size_tracker,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
if (allocate_gpu_memory)
|
||||
cuda_set_value_async<Torus>(streams.stream(0), streams.gpu_index(0),
|
||||
luts_array->get_lut_indexes(0, 1), 1,
|
||||
num_radix_blocks - 1);
|
||||
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_radix_blocks, params.pbs_type);
|
||||
luts_borrow_propagation_sum->generate_and_broadcast_bivariate_lut(
|
||||
active_streams, {0}, {f_luts_borrow_propagation_sum},
|
||||
gpu_memory_allocated);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
auto luts_array_index_generator = [](Torus *h_lut_indexes,
|
||||
uint32_t num_indexes) {
|
||||
for (uint32_t i = 0; i < num_indexes; i++) {
|
||||
h_lut_indexes[i] = (i == 0) ? 0 : 1;
|
||||
}
|
||||
};
|
||||
luts_array->generate_and_broadcast_lut(
|
||||
active_streams, {0, 1},
|
||||
{f_lut_does_block_generate_carry,
|
||||
f_lut_does_block_generate_or_propagate},
|
||||
gpu_memory_allocated);
|
||||
luts_array_index_generator);
|
||||
// generate luts (aka accumulators)
|
||||
|
||||
message_acc->generate_and_broadcast_lut(
|
||||
active_streams, {0}, {f_message_acc}, gpu_memory_allocated);
|
||||
active_streams, {0}, {f_message_acc}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
@@ -7,7 +7,8 @@
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
||||
const uint32_t MAX_STREAMS_FOR_VECTOR_FIND = 10;
|
||||
// If we use more than 5 streams the result is incorrect
|
||||
const uint32_t MAX_STREAMS_FOR_VECTOR_FIND = 5;
|
||||
|
||||
template <typename Torus> struct int_equality_selectors_buffer {
|
||||
int_radix_params params;
|
||||
@@ -60,18 +61,10 @@ template <typename Torus> struct int_equality_selectors_buffer {
|
||||
fns.push_back([i](Torus x) -> Torus { return (x == i); });
|
||||
}
|
||||
|
||||
generate_many_lut_device_accumulator<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
this->comparison_luts->get_lut(0, 0),
|
||||
this->comparison_luts->get_degree(0),
|
||||
this->comparison_luts->get_max_degree(0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
fns, allocate_gpu_memory);
|
||||
|
||||
this->comparison_luts->generate_and_broadcast_many_lut(
|
||||
active_streams, {0}, {fns}, LUT_0_FOR_ALL_BLOCKS);
|
||||
fns.clear();
|
||||
|
||||
this->comparison_luts->broadcast_lut(active_streams);
|
||||
|
||||
this->tmp_many_luts_output = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->tmp_many_luts_output,
|
||||
@@ -175,8 +168,7 @@ template <typename Torus> struct int_possible_results_buffer {
|
||||
this->lut_stride =
|
||||
(ciphertext_modulus / this->max_luts_per_call) * box_size;
|
||||
|
||||
this->num_lut_accumulators =
|
||||
(total_luts_needed + max_luts_per_call - 1) / max_luts_per_call;
|
||||
this->num_lut_accumulators = CEIL_DIV(total_luts_needed, max_luts_per_call);
|
||||
|
||||
stream_luts =
|
||||
new int_radix_lut<Torus> *[num_streams * num_lut_accumulators];
|
||||
@@ -202,15 +194,10 @@ template <typename Torus> struct int_possible_results_buffer {
|
||||
fns.push_back([c](Torus x) -> Torus { return (x == 1) * c; });
|
||||
}
|
||||
|
||||
generate_many_lut_device_accumulator<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), current_lut->get_lut(0, 0),
|
||||
current_lut->get_degree(0), current_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size,
|
||||
params.message_modulus, params.carry_modulus, fns,
|
||||
allocate_gpu_memory);
|
||||
current_lut->generate_and_broadcast_many_lut(
|
||||
streams.active_gpu_subset(1, params.pbs_type), {0}, {fns},
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
current_lut->broadcast_lut(
|
||||
streams.active_gpu_subset(1, params.pbs_type));
|
||||
stream_luts[lut_count++] = current_lut;
|
||||
lut_value_start += luts_in_this_call;
|
||||
}
|
||||
@@ -300,7 +287,7 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
|
||||
|
||||
lut->generate_and_broadcast_lut(
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type), {0}, {id_fn},
|
||||
allocate_gpu_memory);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->stream_identity_luts[i] = lut;
|
||||
}
|
||||
@@ -317,14 +304,14 @@ template <typename Torus> struct int_aggregate_one_hot_buffer {
|
||||
|
||||
this->message_extract_lut->generate_and_broadcast_lut(
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type), {0}, {msg_fn},
|
||||
allocate_gpu_memory);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->carry_extract_lut = new int_radix_lut<Torus>(
|
||||
streams, params, 1, num_blocks, allocate_gpu_memory, size_tracker);
|
||||
|
||||
this->carry_extract_lut->generate_and_broadcast_lut(
|
||||
streams.active_gpu_subset(num_blocks, params.pbs_type), {0}, {carry_fn},
|
||||
allocate_gpu_memory);
|
||||
LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
this->partial_aggregated_vectors =
|
||||
new CudaRadixCiphertextFFI *[num_streams];
|
||||
@@ -1173,7 +1160,7 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
|
||||
|
||||
this->prefix_sum_lut->generate_and_broadcast_bivariate_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
|
||||
{prefix_sum_fn}, allocate_gpu_memory);
|
||||
{prefix_sum_fn}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
auto cleanup_fn = [ALREADY_SEEN, params](Torus x) -> Torus {
|
||||
Torus val = x % params.message_modulus;
|
||||
@@ -1185,7 +1172,7 @@ template <typename Torus> struct int_unchecked_first_index_of_clear_buffer {
|
||||
streams, params, 1, num_inputs, allocate_gpu_memory, size_tracker);
|
||||
this->cleanup_lut->generate_and_broadcast_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
|
||||
{cleanup_fn}, allocate_gpu_memory);
|
||||
{cleanup_fn}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
@@ -1353,7 +1340,7 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
|
||||
|
||||
this->prefix_sum_lut->generate_and_broadcast_bivariate_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
|
||||
{prefix_sum_fn}, allocate_gpu_memory);
|
||||
{prefix_sum_fn}, LUT_0_FOR_ALL_BLOCKS);
|
||||
|
||||
auto cleanup_fn = [ALREADY_SEEN, params](Torus x) -> Torus {
|
||||
Torus val = x % params.message_modulus;
|
||||
@@ -1365,7 +1352,7 @@ template <typename Torus> struct int_unchecked_first_index_of_buffer {
|
||||
streams, params, 1, num_inputs, allocate_gpu_memory, size_tracker);
|
||||
this->cleanup_lut->generate_and_broadcast_lut(
|
||||
streams.active_gpu_subset(num_inputs, params.pbs_type), {0},
|
||||
{cleanup_fn}, allocate_gpu_memory);
|
||||
{cleanup_fn}, LUT_0_FOR_ALL_BLOCKS);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
|
||||
@@ -73,9 +73,10 @@ void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,
|
||||
int8_t **fp_ks_buffer,
|
||||
bool gpu_memory_allocated);
|
||||
|
||||
void cuda_closest_representable_64(void *stream, uint32_t gpu_index,
|
||||
void const *input, void *output,
|
||||
uint32_t base_log, uint32_t level_count);
|
||||
void cuda_closest_representable_64_async(void *stream, uint32_t gpu_index,
|
||||
void const *input, void *output,
|
||||
uint32_t base_log,
|
||||
uint32_t level_count);
|
||||
}
|
||||
|
||||
#endif // CNCRT_KS_H_
|
||||
|
||||
24
backends/tfhe-cuda-backend/cuda/include/kreyvium/kreyvium.h
Normal file
24
backends/tfhe-cuda-backend/cuda/include/kreyvium/kreyvium.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef KREYVIUM_H
|
||||
#define KREYVIUM_H
|
||||
|
||||
#include "../integer/integer.h"
|
||||
|
||||
extern "C" {
|
||||
uint64_t scratch_cuda_kreyvium_64(
|
||||
CudaStreamsFFI streams, int8_t **mem_ptr, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t message_modulus, uint32_t carry_modulus,
|
||||
PBS_TYPE pbs_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type, uint32_t num_inputs);
|
||||
|
||||
void cuda_kreyvium_generate_keystream_64(
|
||||
CudaStreamsFFI streams, CudaRadixCiphertextFFI *keystream_output,
|
||||
const CudaRadixCiphertextFFI *key, const CudaRadixCiphertextFFI *iv,
|
||||
uint32_t num_inputs, uint32_t num_steps, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks);
|
||||
|
||||
void cleanup_cuda_kreyvium_64(CudaStreamsFFI streams, int8_t **mem_ptr_void);
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -0,0 +1,320 @@
|
||||
#ifndef KREYVIUM_UTILITIES_H
|
||||
#define KREYVIUM_UTILITIES_H
|
||||
#include "../integer/integer_utilities.h"
|
||||
|
||||
// Kreyvium specific constants
|
||||
// The batch size is set to 64 to allow efficient parallel processing of 64
|
||||
// steps at once.
|
||||
constexpr uint32_t KREYVIUM_BATCH_SIZE = 64;
|
||||
|
||||
// In each Kreyvium step, there are exactly 3 non-linear AND operations:
|
||||
// 1. (c109 & c108)
|
||||
// 2. (a91 & a90)
|
||||
// 3. (b82 & b81)
|
||||
constexpr uint32_t KREYVIUM_NUM_AND_GATES = 3;
|
||||
|
||||
// In each Kreyvium step, there are 4 paths that require a "flush"
|
||||
// to noise-cancel and extract the bit:
|
||||
// 1. New bit for Register A
|
||||
// 2. New bit for Register B
|
||||
// 3. New bit for Register C
|
||||
// 4. The Output Keystream bit
|
||||
constexpr uint32_t KREYVIUM_NUM_FLUSH_PATHS = 4;
|
||||
|
||||
/// Struct to hold the LUTs.
|
||||
template <typename Torus> struct int_kreyvium_lut_buffers {
|
||||
// Bivariate AND Gate LUT:
|
||||
// AND operation: f(a, b) = (a & 1) & (b & 1).
|
||||
// This is a Bivariate PBS used for the non-linear parts of Kreyvium.
|
||||
int_radix_lut<Torus> *and_lut;
|
||||
|
||||
// Univariate Flush/Identity LUT:
|
||||
// MESSAGE EXTRACTION operation: f(x) = x & 1.
|
||||
// This is a Univariate PBS used to "flush" the state (reset noise/carries).
|
||||
int_radix_lut<Torus> *flush_lut;
|
||||
|
||||
int_kreyvium_lut_buffers(CudaStreams streams, const int_radix_params ¶ms,
|
||||
bool allocate_gpu_memory, uint32_t num_inputs,
|
||||
uint64_t &size_tracker) {
|
||||
|
||||
uint32_t and_ops =
|
||||
num_inputs * KREYVIUM_BATCH_SIZE * KREYVIUM_NUM_AND_GATES;
|
||||
uint32_t flush_ops =
|
||||
num_inputs * KREYVIUM_BATCH_SIZE * KREYVIUM_NUM_FLUSH_PATHS;
|
||||
|
||||
this->and_lut = new int_radix_lut<Torus>(streams, params, 1, and_ops,
|
||||
allocate_gpu_memory, size_tracker);
|
||||
|
||||
std::function<Torus(Torus, Torus)> and_lambda =
|
||||
[](Torus lhs, Torus rhs) -> Torus { return (lhs & 1) & (rhs & 1); };
|
||||
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->and_lut->get_lut(0, 0),
|
||||
this->and_lut->get_degree(0), this->and_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, and_lambda, allocate_gpu_memory);
|
||||
|
||||
auto active_streams_and =
|
||||
streams.active_gpu_subset(and_ops, params.pbs_type);
|
||||
this->and_lut->broadcast_lut(active_streams_and);
|
||||
this->and_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
|
||||
this->flush_lut = new int_radix_lut<Torus>(
|
||||
streams, params, 1, flush_ops, allocate_gpu_memory, size_tracker);
|
||||
|
||||
std::function<Torus(Torus)> flush_lambda = [](Torus x) -> Torus {
|
||||
return x & 1;
|
||||
};
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->flush_lut->get_lut(0, 0),
|
||||
this->flush_lut->get_degree(0), this->flush_lut->get_max_degree(0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, flush_lambda, allocate_gpu_memory);
|
||||
|
||||
auto active_streams_flush =
|
||||
streams.active_gpu_subset(flush_ops, params.pbs_type);
|
||||
this->flush_lut->broadcast_lut(active_streams_flush);
|
||||
this->flush_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
this->and_lut->release(streams);
|
||||
delete this->and_lut;
|
||||
this->and_lut = nullptr;
|
||||
|
||||
this->flush_lut->release(streams);
|
||||
delete this->flush_lut;
|
||||
this->flush_lut = nullptr;
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
}
|
||||
};
|
||||
|
||||
/// Struct to hold the Kreyvium internal state and temporary workspaces.
|
||||
template <typename Torus> struct int_kreyvium_state_workspaces {
|
||||
|
||||
CudaRadixCiphertextFFI *a_reg;
|
||||
CudaRadixCiphertextFFI *b_reg;
|
||||
CudaRadixCiphertextFFI *c_reg;
|
||||
CudaRadixCiphertextFFI *k_reg;
|
||||
CudaRadixCiphertextFFI *iv_reg;
|
||||
|
||||
// Shift Workspace
|
||||
CudaRadixCiphertextFFI *shift_workspace;
|
||||
|
||||
// Temporary Update Buffers
|
||||
CudaRadixCiphertextFFI *temp_a;
|
||||
CudaRadixCiphertextFFI *temp_b;
|
||||
CudaRadixCiphertextFFI *temp_c;
|
||||
|
||||
CudaRadixCiphertextFFI *packed_and_lhs;
|
||||
CudaRadixCiphertextFFI *packed_and_rhs;
|
||||
CudaRadixCiphertextFFI *packed_and_out;
|
||||
|
||||
// Flush/Cleanup Packing Buffers
|
||||
CudaRadixCiphertextFFI *packed_flush_in;
|
||||
CudaRadixCiphertextFFI *packed_flush_out;
|
||||
|
||||
uint32_t max_batch_blocks;
|
||||
uint32_t k_offset;
|
||||
uint32_t iv_offset;
|
||||
|
||||
int_kreyvium_state_workspaces(CudaStreams streams,
|
||||
const int_radix_params ¶ms,
|
||||
bool allocate_gpu_memory, uint32_t num_inputs,
|
||||
uint64_t &size_tracker) {
|
||||
|
||||
uint32_t batch_blocks = KREYVIUM_BATCH_SIZE * num_inputs;
|
||||
this->max_batch_blocks = batch_blocks;
|
||||
this->k_offset = 0;
|
||||
this->iv_offset = 0;
|
||||
|
||||
this->a_reg = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->a_reg, 93 * num_inputs,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->b_reg = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->b_reg, 84 * num_inputs,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->c_reg = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->c_reg, 111 * num_inputs,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->k_reg = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->k_reg, 128 * num_inputs,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->iv_reg = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->iv_reg, 128 * num_inputs,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->shift_workspace = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->shift_workspace,
|
||||
128 * num_inputs, params.big_lwe_dimension, size_tracker,
|
||||
allocate_gpu_memory);
|
||||
|
||||
this->temp_a = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->temp_a, batch_blocks,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->temp_b = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->temp_b, batch_blocks,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->temp_c = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->temp_c, batch_blocks,
|
||||
params.big_lwe_dimension, size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->packed_and_lhs = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->packed_and_lhs,
|
||||
KREYVIUM_NUM_AND_GATES * batch_blocks, params.big_lwe_dimension,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->packed_and_rhs = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->packed_and_rhs,
|
||||
KREYVIUM_NUM_AND_GATES * batch_blocks, params.big_lwe_dimension,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->packed_and_out = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->packed_and_out,
|
||||
KREYVIUM_NUM_AND_GATES * batch_blocks, params.big_lwe_dimension,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->packed_flush_in = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->packed_flush_in,
|
||||
KREYVIUM_NUM_FLUSH_PATHS * batch_blocks, params.big_lwe_dimension,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
|
||||
this->packed_flush_out = new CudaRadixCiphertextFFI;
|
||||
create_zero_radix_ciphertext_async<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), this->packed_flush_out,
|
||||
KREYVIUM_NUM_FLUSH_PATHS * batch_blocks, params.big_lwe_dimension,
|
||||
size_tracker, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams, bool allocate_gpu_memory) {
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->a_reg, allocate_gpu_memory);
|
||||
delete this->a_reg;
|
||||
this->a_reg = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->b_reg, allocate_gpu_memory);
|
||||
delete this->b_reg;
|
||||
this->b_reg = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->c_reg, allocate_gpu_memory);
|
||||
delete this->c_reg;
|
||||
this->c_reg = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->k_reg, allocate_gpu_memory);
|
||||
delete this->k_reg;
|
||||
this->k_reg = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->iv_reg, allocate_gpu_memory);
|
||||
delete this->iv_reg;
|
||||
this->iv_reg = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->shift_workspace, allocate_gpu_memory);
|
||||
delete this->shift_workspace;
|
||||
this->shift_workspace = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->temp_a, allocate_gpu_memory);
|
||||
delete this->temp_a;
|
||||
this->temp_a = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->temp_b, allocate_gpu_memory);
|
||||
delete this->temp_b;
|
||||
this->temp_b = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->temp_c, allocate_gpu_memory);
|
||||
delete this->temp_c;
|
||||
this->temp_c = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->packed_and_lhs, allocate_gpu_memory);
|
||||
delete this->packed_and_lhs;
|
||||
this->packed_and_lhs = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->packed_and_rhs, allocate_gpu_memory);
|
||||
delete this->packed_and_rhs;
|
||||
this->packed_and_rhs = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->packed_and_out, allocate_gpu_memory);
|
||||
delete this->packed_and_out;
|
||||
this->packed_and_out = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->packed_flush_in, allocate_gpu_memory);
|
||||
delete this->packed_flush_in;
|
||||
this->packed_flush_in = nullptr;
|
||||
|
||||
release_radix_ciphertext_async(streams.stream(0), streams.gpu_index(0),
|
||||
this->packed_flush_out, allocate_gpu_memory);
|
||||
delete this->packed_flush_out;
|
||||
this->packed_flush_out = nullptr;
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Torus> struct int_kreyvium_buffer {
|
||||
int_radix_params params;
|
||||
bool allocate_gpu_memory;
|
||||
uint32_t num_inputs;
|
||||
|
||||
int_kreyvium_lut_buffers<Torus> *luts;
|
||||
int_kreyvium_state_workspaces<Torus> *state;
|
||||
|
||||
int_kreyvium_buffer(CudaStreams streams, const int_radix_params ¶ms,
|
||||
bool allocate_gpu_memory, uint32_t num_inputs,
|
||||
uint64_t &size_tracker) {
|
||||
this->params = params;
|
||||
this->allocate_gpu_memory = allocate_gpu_memory;
|
||||
this->num_inputs = num_inputs;
|
||||
|
||||
this->luts = new int_kreyvium_lut_buffers<Torus>(
|
||||
streams, params, allocate_gpu_memory, num_inputs, size_tracker);
|
||||
|
||||
this->state = new int_kreyvium_state_workspaces<Torus>(
|
||||
streams, params, allocate_gpu_memory, num_inputs, size_tracker);
|
||||
}
|
||||
|
||||
void release(CudaStreams streams) {
|
||||
luts->release(streams);
|
||||
delete luts;
|
||||
luts = nullptr;
|
||||
|
||||
state->release(streams, allocate_gpu_memory);
|
||||
delete state;
|
||||
state = nullptr;
|
||||
|
||||
cuda_synchronize_stream(streams.stream(0), streams.gpu_index(0));
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
@@ -33,7 +33,7 @@ template <typename Torus> struct int_trivium_lut_buffers {
|
||||
auto active_streams_and =
|
||||
streams.active_gpu_subset(total_lut_ops, params.pbs_type);
|
||||
this->and_lut->generate_and_broadcast_bivariate_lut(
|
||||
active_streams_and, {0}, {and_lambda}, allocate_gpu_memory);
|
||||
active_streams_and, {0}, {and_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
this->and_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
|
||||
uint32_t total_flush_ops = num_trivium_inputs * BATCH_SIZE * 4;
|
||||
@@ -48,7 +48,7 @@ template <typename Torus> struct int_trivium_lut_buffers {
|
||||
auto active_streams_flush =
|
||||
streams.active_gpu_subset(total_flush_ops, params.pbs_type);
|
||||
this->flush_lut->generate_and_broadcast_lut(
|
||||
active_streams_flush, {0}, {flush_lambda}, allocate_gpu_memory);
|
||||
active_streams_flush, {0}, {flush_lambda}, LUT_0_FOR_ALL_BLOCKS);
|
||||
this->flush_lut->setup_gemm_batch_ks_temp_buffers(size_tracker);
|
||||
}
|
||||
|
||||
|
||||
@@ -14,10 +14,10 @@ uint64_t scratch_cuda_expand_without_verification_64(
|
||||
uint32_t casting_output_dimension, uint32_t casting_ks_level,
|
||||
uint32_t casting_ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, const uint32_t *num_lwes_per_compact_list,
|
||||
const bool *is_boolean_array, uint32_t num_compact_lists,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
KS_TYPE casting_key_type, bool allocate_gpu_memory,
|
||||
PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
const bool *is_boolean_array, const uint32_t is_boolean_array_len,
|
||||
uint32_t num_compact_lists, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, KS_TYPE casting_key_type,
|
||||
bool allocate_gpu_memory, PBS_MS_REDUCTION_T noise_reduction_type);
|
||||
|
||||
void cuda_expand_without_verification_64(
|
||||
CudaStreamsFFI streams, void *lwe_array_out,
|
||||
|
||||
@@ -118,7 +118,8 @@ template <typename Torus> struct zk_expand_mem {
|
||||
zk_expand_mem(CudaStreams streams, int_radix_params computing_params,
|
||||
int_radix_params casting_params, KS_TYPE casting_key_type,
|
||||
const uint32_t *num_lwes_per_compact_list,
|
||||
const bool *is_boolean_array, uint32_t num_compact_lists,
|
||||
const bool *is_boolean_array,
|
||||
const uint32_t is_boolean_array_len, uint32_t num_compact_lists,
|
||||
bool allocate_gpu_memory, uint64_t &size_tracker)
|
||||
: computing_params(computing_params), casting_params(casting_params),
|
||||
num_compact_lists(num_compact_lists),
|
||||
@@ -236,35 +237,65 @@ template <typename Torus> struct zk_expand_mem {
|
||||
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
|
||||
auto lwe_index = i + num_packed_msgs * offset;
|
||||
auto lwe_index_in_list = i % num_lwes_in_kth;
|
||||
PANIC_IF_FALSE(lwe_index < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %d is beyond the max value %d",
|
||||
lwe_index, num_packed_msgs * num_lwes);
|
||||
h_indexes_in[lwe_index] = lwe_index_in_list + offset;
|
||||
h_indexes_out[lwe_index] =
|
||||
num_packed_msgs * h_indexes_in[lwe_index] + i / num_lwes_in_kth;
|
||||
// If the input relates to a boolean, shift the LUT so the correct one
|
||||
// with sanitization is used
|
||||
auto boolean_offset =
|
||||
is_boolean_array[h_indexes_out[lwe_index]] ? num_packed_msgs : 0;
|
||||
h_lut_indexes[lwe_index] = i / num_lwes_in_kth + boolean_offset;
|
||||
PANIC_IF_FALSE(h_indexes_in[lwe_index] < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %d is beyond the max value %d",
|
||||
h_indexes_in[lwe_index], num_packed_msgs * num_lwes);
|
||||
PANIC_IF_FALSE(h_indexes_out[lwe_index] < num_packed_msgs * num_lwes,
|
||||
"Cuda error: index %d is beyond the max value %d",
|
||||
h_indexes_out[lwe_index], num_packed_msgs * num_lwes);
|
||||
// is_boolean_array tells us which input is a boolean and thus the
|
||||
// related output needs boolean sanitization. It naturally has
|
||||
// total_blocks entries, but h_indexes_out reaches
|
||||
// message_modulus * ceil(total_blocks/2) - 1. When total_blocks is odd,
|
||||
// the ceiling causes out-of-bounds access. Reading garbage "true" would
|
||||
// set h_lut_indexes to an invalid index pointing to uninitialized
|
||||
// memory instead of a real LUT. Rust pads is_boolean_array with FALSE
|
||||
// to match.
|
||||
PANIC_IF_FALSE(h_indexes_out[lwe_index] < is_boolean_array_len,
|
||||
"Cuda error: index %d for is_boolean_array is out of "
|
||||
"bounds (len is %d)",
|
||||
h_indexes_out[lwe_index], is_boolean_array_len);
|
||||
}
|
||||
offset += num_lwes_in_kth;
|
||||
}
|
||||
|
||||
message_and_carry_extract_luts->set_lwe_indexes(
|
||||
streams.stream(0), streams.gpu_index(0), h_indexes_in, h_indexes_out);
|
||||
auto lut_indexes = message_and_carry_extract_luts->get_lut_indexes(0, 0);
|
||||
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(
|
||||
lut_indexes, h_lut_indexes, num_packed_msgs * num_lwes * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0), allocate_gpu_memory);
|
||||
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(2 * num_lwes, params.pbs_type);
|
||||
|
||||
// Index generator for message/carry extraction LUTs
|
||||
auto index_gen = [num_compact_lists,
|
||||
num_lwes_per_compact_list =
|
||||
this->num_lwes_per_compact_list,
|
||||
num_packed_msgs, is_boolean_array,
|
||||
h_indexes_out](Torus *h_lut_indexes, uint32_t) {
|
||||
auto offset = 0;
|
||||
for (int k = 0; k < num_compact_lists; k++) {
|
||||
auto num_lwes_in_kth = num_lwes_per_compact_list[k];
|
||||
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
|
||||
auto lwe_index = i + num_packed_msgs * offset;
|
||||
auto boolean_offset =
|
||||
is_boolean_array[h_indexes_out[lwe_index]] ? num_packed_msgs : 0;
|
||||
h_lut_indexes[lwe_index] = i / num_lwes_in_kth + boolean_offset;
|
||||
}
|
||||
offset += num_lwes_in_kth;
|
||||
}
|
||||
};
|
||||
|
||||
message_and_carry_extract_luts->generate_and_broadcast_lut(
|
||||
active_streams, {0, 1, 2, 3},
|
||||
{message_extract_lut_f, carry_extract_lut_f,
|
||||
message_extract_and_sanitize_bool_lut_f,
|
||||
carry_extract_and_sanitize_bool_lut_f},
|
||||
gpu_memory_allocated);
|
||||
index_gen, true, {}, h_lut_indexes);
|
||||
|
||||
message_and_carry_extract_luts->allocate_lwe_vector_for_non_trivial_indexes(
|
||||
active_streams, 2 * num_lwes, size_tracker, allocate_gpu_memory);
|
||||
|
||||
@@ -183,9 +183,10 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_128(
|
||||
base_log, level_count, num_lwes);
|
||||
}
|
||||
|
||||
void cuda_closest_representable_64(void *stream, uint32_t gpu_index,
|
||||
void const *input, void *output,
|
||||
uint32_t base_log, uint32_t level_count) {
|
||||
void cuda_closest_representable_64_async(void *stream, uint32_t gpu_index,
|
||||
void const *input, void *output,
|
||||
uint32_t base_log,
|
||||
uint32_t level_count) {
|
||||
host_cuda_closest_representable(static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<const uint64_t *>(input),
|
||||
static_cast<uint64_t *>(output), base_log,
|
||||
|
||||
@@ -10,7 +10,6 @@
|
||||
#include "polynomial/polynomial_math.cuh"
|
||||
#include "torus.cuh"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
@@ -351,6 +350,7 @@ keyswitch(KSTorus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
|
||||
Torus state =
|
||||
init_decomposer_state(block_lwe_array_in[i], base_log, level_count);
|
||||
uint32_t offset = i * level_count * (lwe_dimension_out + 1);
|
||||
#pragma unroll 1
|
||||
for (int j = 0; j < level_count; j++) {
|
||||
|
||||
KSTorus decomposed = decompose_one<Torus>(state, mask_mod_b, base_log);
|
||||
@@ -363,16 +363,15 @@ keyswitch(KSTorus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
|
||||
lwe_acc_out[shmem_index] = local_lwe_out;
|
||||
}
|
||||
|
||||
if (tid <= lwe_dimension_out) {
|
||||
for (int offset = blockDim.y / 2; offset > 0 && threadIdx.y < offset;
|
||||
offset /= 2) {
|
||||
__syncthreads();
|
||||
for (int offset = blockDim.y / 2; offset > 0; offset /= 2) {
|
||||
__syncthreads();
|
||||
if (tid <= lwe_dimension_out && threadIdx.y < offset) {
|
||||
lwe_acc_out[shmem_index] +=
|
||||
lwe_acc_out[shmem_index + offset * blockDim.x];
|
||||
}
|
||||
if (threadIdx.y == 0)
|
||||
block_lwe_array_out[tid] = -lwe_acc_out[shmem_index];
|
||||
}
|
||||
if (tid <= lwe_dimension_out && threadIdx.y == 0)
|
||||
block_lwe_array_out[tid] = -lwe_acc_out[shmem_index];
|
||||
}
|
||||
|
||||
template <typename Torus, typename KSTorus>
|
||||
|
||||
@@ -12,12 +12,9 @@
|
||||
#include "polynomial/polynomial_math.cuh"
|
||||
#include "torus.cuh"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
|
||||
|
||||
// Finish the keyswitching operation and prepare GLWEs for accumulation.
|
||||
// 1. Finish the keyswitching computation partially performed with a GEMM:
|
||||
// - negate the dot product between the GLWE and KSK polynomial
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
#include "helper_multi_gpu.h"
|
||||
#include "polynomial/parameters.cuh"
|
||||
#include "types/int128.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include "utils/helper.cuh"
|
||||
#include <limits>
|
||||
|
||||
template <typename T>
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "device.h"
|
||||
#include "utils/helper.cuh"
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
#include <mutex>
|
||||
@@ -6,6 +7,27 @@
|
||||
#include <cuda_profiler_api.h>
|
||||
#endif
|
||||
|
||||
void validate_device_ptr_and_gpu_index(const void *ptr, uint32_t gpu_index) {
|
||||
GPU_ASSERT(ptr != nullptr, "Cuda error: null device ptr");
|
||||
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, ptr));
|
||||
if (attr.device != gpu_index || attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid device pointer.")
|
||||
}
|
||||
}
|
||||
|
||||
int validate_device_ptr(const void *ptr) {
|
||||
GPU_ASSERT(ptr != nullptr, "Cuda error: null device ptr");
|
||||
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, ptr));
|
||||
if (attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid device pointer.")
|
||||
}
|
||||
return attr.device;
|
||||
}
|
||||
|
||||
uint32_t cuda_get_device() {
|
||||
int device;
|
||||
check_cuda_error(cudaGetDevice(&device));
|
||||
@@ -30,8 +52,9 @@ bool mem_pools_enabled = false;
|
||||
// better results.
|
||||
void cuda_setup_mempool(uint32_t caller_gpu_index) {
|
||||
if (!mem_pools_enabled) {
|
||||
pool_mutex.lock();
|
||||
if (mem_pools_enabled)
|
||||
std::lock_guard lock(pool_mutex);
|
||||
if (mem_pools_enabled) // double-check - mem_pools_enabled might have been
|
||||
// changed in a different thread
|
||||
return; // If mem pools are already enabled, we don't need to do anything
|
||||
|
||||
// We do it only once for all GPUs
|
||||
@@ -78,7 +101,6 @@ void cuda_setup_mempool(uint32_t caller_gpu_index) {
|
||||
}
|
||||
// We return to the original gpu_index
|
||||
cuda_set_device(caller_gpu_index);
|
||||
pool_mutex.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -234,60 +256,61 @@ bool cuda_check_support_thread_block_clusters() {
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Copy memory to the GPU asynchronously
|
||||
/// Copy memory from the CPU to a GPU with size tracking.
|
||||
/// This copy is asynchronous only if the CPU memory was pinned, i.e.
|
||||
/// allocated using cudaMallocHost. This was shown to come with a performance
|
||||
/// penalty if we allocate all CPU data in this way in the backend, so
|
||||
/// cudaMallocHost is only used in specific places where we need an
|
||||
/// asynchronous data copy from the CPU to all the GPUs simultaneously (for
|
||||
/// example to copy the bootstrapping key).
|
||||
/// The copy only happens if gpu_memory_allocated is true.
|
||||
void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
|
||||
uint64_t size,
|
||||
cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated) {
|
||||
|
||||
GPU_ASSERT(src != nullptr, "Cuda error: null device ptr");
|
||||
|
||||
if (size == 0 || !gpu_memory_allocated)
|
||||
return;
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid device pointer in async copy to GPU.")
|
||||
}
|
||||
validate_device_ptr_and_gpu_index(dest, gpu_index);
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyHostToDevice, stream));
|
||||
}
|
||||
|
||||
/// Copy memory to the GPU asynchronously
|
||||
/// Copy memory from the CPU to a GPU.
|
||||
/// This copy is asynchronous only if the CPU memory was pinned, i.e.
|
||||
/// allocated using cudaMallocHost. This was shown to come with a performance
|
||||
/// penalty if we allocate all CPU data in this way in the backend, so
|
||||
/// cudaMallocHost is only used in specific places where we need an
|
||||
/// asynchronous data copy from the CPU to all the GPUs simultaneously (for
|
||||
/// example to copy the bootstrapping key).
|
||||
void cuda_memcpy_async_to_gpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_memcpy_with_size_tracking_async_to_gpu(dest, src, size, stream,
|
||||
gpu_index, true);
|
||||
}
|
||||
|
||||
/// Copy memory within a GPU asynchronously
|
||||
/// Copy memory within a GPU asynchronously.
|
||||
/// The copy only happens if gpu_memory_allocated is true
|
||||
void cuda_memcpy_with_size_tracking_async_gpu_to_gpu(
|
||||
void *dest, void const *src, uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index, bool gpu_memory_allocated) {
|
||||
if (size == 0 || !gpu_memory_allocated)
|
||||
return;
|
||||
GPU_ASSERT(dest != nullptr,
|
||||
"Cuda error: trying to copy gpu->gpu to null ptr");
|
||||
GPU_ASSERT(src != nullptr,
|
||||
"Cuda error: trying to copy gpu->gpu from null ptr");
|
||||
|
||||
cudaPointerAttributes attr_dest;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr_dest, dest));
|
||||
PANIC_IF_FALSE(
|
||||
attr_dest.type == cudaMemoryTypeDevice,
|
||||
"Cuda error: invalid dest device pointer in copy from GPU to GPU.");
|
||||
cudaPointerAttributes attr_src;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr_src, src));
|
||||
PANIC_IF_FALSE(
|
||||
attr_src.type == cudaMemoryTypeDevice,
|
||||
"Cuda error: invalid src device pointer in copy from GPU to GPU.");
|
||||
int src_gpu_index = validate_device_ptr(src);
|
||||
int dest_gpu_index = validate_device_ptr(dest);
|
||||
cuda_set_device(gpu_index);
|
||||
if (attr_src.device == attr_dest.device) {
|
||||
if (src_gpu_index == dest_gpu_index) {
|
||||
check_cuda_error(
|
||||
cudaMemcpyAsync(dest, src, size, cudaMemcpyDeviceToDevice, stream));
|
||||
} else {
|
||||
check_cuda_error(cudaMemcpyPeerAsync(dest, attr_dest.device, src,
|
||||
attr_src.device, size, stream));
|
||||
check_cuda_error(cudaMemcpyPeerAsync(dest, dest_gpu_index, src,
|
||||
src_gpu_index, size, stream));
|
||||
}
|
||||
}
|
||||
void cuda_memcpy_async_gpu_to_gpu(void *dest, void const *src, uint64_t size,
|
||||
@@ -327,21 +350,20 @@ void cuda_synchronize_device(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaDeviceSynchronize());
|
||||
}
|
||||
|
||||
/// cuda_memset sets bytes, we basically only use it to initialize data to 0
|
||||
/// The memset only happens if gpu_memory_allocated is true
|
||||
void cuda_memset_with_size_tracking_async(void *dest, uint64_t val,
|
||||
uint64_t size, cudaStream_t stream,
|
||||
uint32_t gpu_index,
|
||||
bool gpu_memory_allocated) {
|
||||
if (size == 0 || !gpu_memory_allocated)
|
||||
return;
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, dest));
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid dest device pointer in cuda memset.")
|
||||
}
|
||||
validate_device_ptr_and_gpu_index(dest, gpu_index);
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(cudaMemsetAsync(dest, val, size, stream));
|
||||
}
|
||||
|
||||
/// cuda_memset sets bytes, we basically only use it to initialize data to 0
|
||||
void cuda_memset_async(void *dest, uint64_t val, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
cuda_memset_with_size_tracking_async(dest, val, size, stream, gpu_index,
|
||||
@@ -366,7 +388,7 @@ void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
}
|
||||
cuda_set_device(gpu_index);
|
||||
int block_size = 256;
|
||||
int num_blocks = (n + block_size - 1) / block_size;
|
||||
int num_blocks = CEIL_DIV(n, block_size);
|
||||
|
||||
// Launch the kernel
|
||||
cuda_set_value_kernel<Torus>
|
||||
@@ -384,15 +406,15 @@ template void cuda_set_value_async(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t n);
|
||||
|
||||
/// Copy memory to the CPU asynchronously
|
||||
/// This comes with a big penalty on performance even if the CPU
|
||||
/// memory is pinned (using cudaMallocHost for the CPU allocation),
|
||||
/// so it should be avoided at all costs
|
||||
void cuda_memcpy_async_to_cpu(void *dest, const void *src, uint64_t size,
|
||||
cudaStream_t stream, uint32_t gpu_index) {
|
||||
GPU_ASSERT(dest != nullptr, "Cuda error: null host ptr");
|
||||
if (size == 0)
|
||||
return;
|
||||
cudaPointerAttributes attr;
|
||||
check_cuda_error(cudaPointerGetAttributes(&attr, src));
|
||||
if (attr.device != gpu_index && attr.type != cudaMemoryTypeDevice) {
|
||||
PANIC("Cuda error: invalid src device pointer in copy to CPU async.")
|
||||
}
|
||||
validate_device_ptr_and_gpu_index(src, gpu_index);
|
||||
|
||||
cuda_set_device(gpu_index);
|
||||
check_cuda_error(
|
||||
|
||||
@@ -68,9 +68,15 @@ struct alignas(16) f128 {
|
||||
auto t = two_sum(a.lo, b.lo);
|
||||
|
||||
double hi = s.hi;
|
||||
#ifdef __CUDA_ARCH__
|
||||
double lo = __dadd_rn(s.lo, t.hi);
|
||||
hi = __dadd_rn(hi, lo);
|
||||
lo = __dsub_rn(lo, __dsub_rn(hi, s.hi));
|
||||
#else
|
||||
double lo = s.lo + t.hi;
|
||||
hi = hi + lo;
|
||||
lo = lo - (hi - s.hi);
|
||||
#endif
|
||||
|
||||
return f128(hi, lo + t.lo);
|
||||
}
|
||||
@@ -104,8 +110,13 @@ struct alignas(16) f128 {
|
||||
__host__ __device__ static f128 sub(const f128 &a, const f128 &b) {
|
||||
auto s = two_diff(a.hi, b.hi);
|
||||
auto t = two_diff(a.lo, b.lo);
|
||||
#ifdef __CUDA_ARCH__
|
||||
s = quick_two_sum(s.hi, __dadd_rn(s.lo, t.hi));
|
||||
return quick_two_sum(s.hi, __dadd_rn(s.lo, t.lo));
|
||||
#else
|
||||
s = quick_two_sum(s.hi, s.lo + t.hi);
|
||||
return quick_two_sum(s.hi, s.lo + t.lo);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Multiplication
|
||||
@@ -220,16 +231,16 @@ struct f128x2 {
|
||||
// Subtraction
|
||||
__host__ __device__ friend f128x2 operator-(const f128x2 &a,
|
||||
const f128x2 &b) {
|
||||
return f128x2(f128::add(a.re, f128(-b.re.hi, -b.re.lo)),
|
||||
f128::add(a.im, f128(-b.im.hi, -b.im.lo)));
|
||||
return f128x2(f128::sub_estimate(a.re, b.re),
|
||||
f128::sub_estimate(a.im, b.im));
|
||||
}
|
||||
|
||||
// Multiplication (complex multiplication)
|
||||
__host__ __device__ friend f128x2 operator*(const f128x2 &a,
|
||||
const f128x2 &b) {
|
||||
const f128 a_im_b_im = f128::mul(a.im, b.im);
|
||||
f128 real_part =
|
||||
f128::add(f128::mul(a.re, b.re),
|
||||
f128(-f128::mul(a.im, b.im).hi, -f128::mul(a.im, b.im).lo));
|
||||
f128::add(f128::mul(a.re, b.re), f128(-a_im_b_im.hi, -a_im_b_im.lo));
|
||||
f128 imag_part = f128::add(f128::mul(a.re, b.im), f128::mul(a.im, b.re));
|
||||
return f128x2(real_part, imag_part);
|
||||
}
|
||||
@@ -243,8 +254,8 @@ struct f128x2 {
|
||||
|
||||
// Subtraction-assignment operator
|
||||
__host__ __device__ f128x2 &operator-=(const f128x2 &other) {
|
||||
re = f128::add(re, f128(-other.re.hi, -other.re.lo));
|
||||
im = f128::add(im, f128(-other.im.hi, -other.im.lo));
|
||||
re = f128::sub_estimate(re, other.re);
|
||||
im = f128::sub_estimate(im, other.im);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@@ -261,12 +272,20 @@ struct f128x2 {
|
||||
};
|
||||
|
||||
__host__ __device__ inline uint64_t double_to_bits(double d) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
uint64_t bits = __double_as_longlong(d);
|
||||
#else
|
||||
uint64_t bits = *reinterpret_cast<uint64_t *>(&d);
|
||||
#endif
|
||||
return bits;
|
||||
}
|
||||
|
||||
__host__ __device__ inline double bits_to_double(uint64_t bits) {
|
||||
#ifdef __CUDA_ARCH__
|
||||
double d = __longlong_as_double(bits);
|
||||
#else
|
||||
double d = *reinterpret_cast<double *>(&bits);
|
||||
#endif
|
||||
return d;
|
||||
}
|
||||
|
||||
@@ -275,6 +294,8 @@ __host__ __device__ inline double u128_to_f64(__uint128_t x) {
|
||||
const double A = ONE << 52;
|
||||
const double B = ONE << 104;
|
||||
const double C = ONE << 76;
|
||||
// NOTE: for some reason __longlong_as_double(0x37f0000000000000ULL)
|
||||
// does not work here
|
||||
const double D = 340282366920938500000000000000000000000.;
|
||||
|
||||
const __uint128_t threshold = (ONE << 104);
|
||||
@@ -288,15 +309,20 @@ __host__ __device__ inline double u128_to_f64(__uint128_t x) {
|
||||
|
||||
uint64_t bits_l = A_bits | lower64;
|
||||
double l_temp = bits_to_double(bits_l);
|
||||
double l = l_temp - A;
|
||||
|
||||
uint64_t B_bits = double_to_bits(B);
|
||||
uint64_t top64 = static_cast<uint64_t>(x >> 52);
|
||||
uint64_t bits_h = B_bits | top64;
|
||||
double h_temp = bits_to_double(bits_h);
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
return __dadd_rn(__dsub_rn(l_temp, A), __dsub_rn(h_temp, B));
|
||||
#else
|
||||
double l = l_temp - A;
|
||||
double h = h_temp - B;
|
||||
|
||||
return (l + h);
|
||||
#endif
|
||||
|
||||
} else {
|
||||
uint64_t C_bits = double_to_bits(C);
|
||||
@@ -310,15 +336,20 @@ __host__ __device__ inline double u128_to_f64(__uint128_t x) {
|
||||
|
||||
uint64_t bits_l = C_bits | lower64 | mask_part;
|
||||
double l_temp = bits_to_double(bits_l);
|
||||
double l = l_temp - C;
|
||||
|
||||
uint64_t D_bits = double_to_bits(D);
|
||||
uint64_t top64 = static_cast<uint64_t>(x >> 76);
|
||||
uint64_t bits_h = D_bits | top64;
|
||||
double h_temp = bits_to_double(bits_h);
|
||||
|
||||
#ifdef __CUDA_ARCH__
|
||||
return __dadd_rn(__dsub_rn(l_temp, C), __dsub_rn(h_temp, D));
|
||||
#else
|
||||
double l = l_temp - C;
|
||||
double h = h_temp - D;
|
||||
|
||||
return (l + h);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
@@ -389,6 +420,8 @@ __host__ __device__ inline f128 u128_to_signed_to_f128(__uint128_t x) {
|
||||
|
||||
__host__ __device__ inline __uint128_t u128_from_torus_f128(const f128 &a) {
|
||||
auto x = f128::sub_estimate(a, f128::f128_floor(a));
|
||||
// NOTE: for some reason __longlong_as_double(0x37f0000000000000ULL)
|
||||
// does not work here
|
||||
const double normalization = 340282366920938500000000000000000000000.;
|
||||
#ifdef __CUDA_ARCH__
|
||||
x.hi = __dmul_rn(x.hi, normalization);
|
||||
@@ -398,7 +431,7 @@ __host__ __device__ inline __uint128_t u128_from_torus_f128(const f128 &a) {
|
||||
x.lo *= normalization;
|
||||
#endif
|
||||
|
||||
// TODO has to be round
|
||||
x = f128::add_estimate(x, f128(0.5, 0.0));
|
||||
x = f128::f128_floor(x);
|
||||
|
||||
__uint128_t x0 = f64_to_u128(x.hi);
|
||||
|
||||
@@ -12,8 +12,9 @@
|
||||
using Index = unsigned;
|
||||
|
||||
#define NEG_TWID(i) \
|
||||
f128x2(f128(neg_twiddles_re_hi[(i)], neg_twiddles_re_lo[(i)]), \
|
||||
f128(neg_twiddles_im_hi[(i)], neg_twiddles_im_lo[(i)]))
|
||||
f128x2( \
|
||||
f128(__ldg(&neg_twiddles_re_hi[(i)]), __ldg(&neg_twiddles_re_lo[(i)])), \
|
||||
f128(__ldg(&neg_twiddles_im_hi[(i)]), __ldg(&neg_twiddles_im_lo[(i)])))
|
||||
|
||||
#define F64x4_TO_F128x2(f128x2_reg, ind) \
|
||||
f128x2_reg.re.hi = dt_re_hi[ind]; \
|
||||
@@ -75,7 +76,11 @@ __device__ void negacyclic_forward_fft_f128(double *dt_re_hi, double *dt_re_lo,
|
||||
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
|
||||
Index rank = tid & thread_mask;
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F128x2_TO_F64x4(((u_stays_in_register) ? v[i] : u[i]), tid);
|
||||
if (u_stays_in_register) {
|
||||
F128x2_TO_F64x4(v[i], tid);
|
||||
} else {
|
||||
F128x2_TO_F64x4(u[i], tid);
|
||||
}
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
__syncthreads();
|
||||
@@ -86,8 +91,11 @@ __device__ void negacyclic_forward_fft_f128(double *dt_re_hi, double *dt_re_lo,
|
||||
Index rank = tid & thread_mask;
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F64x4_TO_F128x2(w, tid ^ lane_mask);
|
||||
u[i] = (u_stays_in_register) ? u[i] : w;
|
||||
v[i] = (u_stays_in_register) ? w : v[i];
|
||||
if (u_stays_in_register) {
|
||||
v[i] = w;
|
||||
} else {
|
||||
u[i] = w;
|
||||
}
|
||||
w = NEG_TWID(tid / lane_mask + twiddle_shift);
|
||||
f128::cplx_f128_mul_assign(w.re, w.im, v[i].re, v[i].im, w.re, w.im);
|
||||
f128::cplx_f128_sub_assign(v[i].re, v[i].im, u[i].re, u[i].im, w.re,
|
||||
@@ -151,7 +159,11 @@ __device__ void negacyclic_backward_fft_f128(double *dt_re_hi, double *dt_re_lo,
|
||||
// keep one of the register for next iteration and store another one in sm
|
||||
Index rank = tid & thread_mask;
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F128x2_TO_F64x4(((u_stays_in_register) ? v[i] : u[i]), tid);
|
||||
if (u_stays_in_register) {
|
||||
F128x2_TO_F64x4(v[i], tid);
|
||||
} else {
|
||||
F128x2_TO_F64x4(u[i], tid);
|
||||
}
|
||||
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
@@ -165,8 +177,11 @@ __device__ void negacyclic_backward_fft_f128(double *dt_re_hi, double *dt_re_lo,
|
||||
bool u_stays_in_register = rank < lane_mask;
|
||||
F64x4_TO_F128x2(w, tid ^ lane_mask);
|
||||
|
||||
u[i] = (u_stays_in_register) ? u[i] : w;
|
||||
v[i] = (u_stays_in_register) ? w : v[i];
|
||||
if (u_stays_in_register) {
|
||||
v[i] = w;
|
||||
} else {
|
||||
u[i] = w;
|
||||
}
|
||||
|
||||
tid = tid + STRIDE;
|
||||
}
|
||||
@@ -218,7 +233,7 @@ __device__ void convert_u128_to_f128_as_torus(
|
||||
double *out_re_hi, double *out_re_lo, double *out_im_hi, double *out_im_lo,
|
||||
const __uint128_t *in_re, const __uint128_t *in_im) {
|
||||
|
||||
const double normalization = pow(2., -128.);
|
||||
const double normalization = __longlong_as_double(0x37f0000000000000ULL);
|
||||
Index tid = threadIdx.x;
|
||||
// #pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; i++) {
|
||||
@@ -241,7 +256,7 @@ __device__ void convert_u128_on_regs_to_f128_as_torus(
|
||||
double *out_re_hi, double *out_re_lo, double *out_im_hi, double *out_im_lo,
|
||||
const __uint128_t *in_re_on_regs, const __uint128_t *in_im_on_regs) {
|
||||
|
||||
const double normalization = pow(2., -128.);
|
||||
const double normalization = __longlong_as_double(0x37f0000000000000ULL);
|
||||
Index tid = threadIdx.x;
|
||||
// #pragma unroll
|
||||
for (Index i = 0; i < params::opt / 2; i++) {
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
#include "integer/subtraction.cuh"
|
||||
#include "pbs/programmable_bootstrap_classic.cuh"
|
||||
#include "pbs/programmable_bootstrap_multibit.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include "utils/helper.cuh"
|
||||
|
||||
// lwe_dimension + 1 threads
|
||||
// todo: This kernel MUST be refactored to a binary reduction
|
||||
@@ -98,7 +98,7 @@ __host__ void are_all_comparisons_block_true(
|
||||
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
int num_chunks = CEIL_DIV(remaining_blocks, max_value);
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
@@ -134,28 +134,26 @@ __host__ void are_all_comparisons_block_true(
|
||||
auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
|
||||
return x == chunk_length;
|
||||
};
|
||||
generate_device_accumulator_with_cpu_prealloc<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0),
|
||||
is_max_value_lut->get_lut(0, 1), is_max_value_lut->get_degree(1),
|
||||
is_max_value_lut->get_max_degree(1), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
is_equal_to_num_blocks_lut_f, true,
|
||||
are_all_block_true_buffer->preallocated_h_lut);
|
||||
|
||||
Torus *h_lut_indexes = is_max_value_lut->h_lut_indexes;
|
||||
for (int index = 0; index < num_chunks; index++) {
|
||||
if (index == num_chunks - 1) {
|
||||
h_lut_indexes[index] = 1;
|
||||
} else {
|
||||
h_lut_indexes[index] = 0;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
|
||||
h_lut_indexes, num_chunks * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0));
|
||||
auto num_blocks = is_max_value_lut->num_blocks;
|
||||
auto active_streams =
|
||||
streams.active_gpu_subset(num_chunks, params.pbs_type);
|
||||
is_max_value_lut->broadcast_lut(active_streams);
|
||||
|
||||
// Index generator: last chunk uses LUT 1, others use LUT 0
|
||||
auto index_gen = [num_chunks, num_blocks](Torus *h_lut_indexes,
|
||||
uint32_t) {
|
||||
for (uint32_t index = 0; index < num_blocks; index++) {
|
||||
if (index == num_chunks - 1) {
|
||||
h_lut_indexes[index] = 1;
|
||||
} else if (index < num_chunks - 1 || index >= num_chunks) {
|
||||
h_lut_indexes[index] = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
is_max_value_lut->generate_and_broadcast_lut(
|
||||
active_streams, {1}, {is_equal_to_num_blocks_lut_f}, index_gen,
|
||||
true, {are_all_block_true_buffer->preallocated_h_lut});
|
||||
}
|
||||
lut = is_max_value_lut;
|
||||
}
|
||||
@@ -167,15 +165,10 @@ __host__ void are_all_comparisons_block_true(
|
||||
streams, lwe_array_out, accumulator, bsks, ksks, lut, 1);
|
||||
// Reset max_value_lut_indexes before returning, otherwise if the lut is
|
||||
// reused the lut indexes will be wrong
|
||||
memset(is_max_value_lut->h_lut_indexes, 0,
|
||||
is_max_value_lut->num_blocks * sizeof(Torus));
|
||||
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
|
||||
is_max_value_lut->h_lut_indexes,
|
||||
is_max_value_lut->num_blocks * sizeof(Torus),
|
||||
streams.stream(0), streams.gpu_index(0));
|
||||
auto active_gpu_count_is_max = streams.active_gpu_subset(
|
||||
is_max_value_lut->num_blocks, params.pbs_type);
|
||||
is_max_value_lut->broadcast_lut(active_gpu_count_is_max, false);
|
||||
is_max_value_lut->set_lut_indexes_and_broadcast_constant(
|
||||
active_gpu_count_is_max, 0);
|
||||
|
||||
reset_radix_ciphertext_blocks(lwe_array_out, 1);
|
||||
return;
|
||||
@@ -222,7 +215,7 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
uint32_t remaining_blocks = num_radix_blocks;
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
int num_chunks = CEIL_DIV(remaining_blocks, max_value);
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
@@ -483,14 +476,11 @@ tree_sign_reduction(CudaStreams streams, CudaRadixCiphertextFFI *lwe_array_out,
|
||||
y = x;
|
||||
f = sign_handler_f;
|
||||
}
|
||||
generate_device_accumulator_with_cpu_prealloc<Torus>(
|
||||
streams.stream(0), streams.gpu_index(0), last_lut->get_lut(0, 0),
|
||||
last_lut->get_degree(0), last_lut->get_max_degree(0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f, true,
|
||||
tree_buffer->preallocated_h_lut);
|
||||
|
||||
auto active_streams = streams.active_gpu_subset(1, params.pbs_type);
|
||||
last_lut->broadcast_lut(active_streams);
|
||||
last_lut->generate_and_broadcast_lut(active_streams, {0}, {f},
|
||||
LUT_0_FOR_ALL_BLOCKS, true,
|
||||
{tree_buffer->preallocated_h_lut});
|
||||
|
||||
// Last leaf
|
||||
integer_radix_apply_univariate_lookup_table<Torus>(streams, lwe_array_out, y,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user