mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-01-11 15:48:20 -05:00
Compare commits
80 Commits
al/tunifor
...
al/bench_m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
155e810376 | ||
|
|
aa16aaf3a0 | ||
|
|
3a99ee9718 | ||
|
|
86f07045fe | ||
|
|
b1ce34f8a7 | ||
|
|
4388a3dc99 | ||
|
|
805436839d | ||
|
|
bdbec55e84 | ||
|
|
33131c664a | ||
|
|
1151bb267e | ||
|
|
ce9679f1ee | ||
|
|
23b43c33c7 | ||
|
|
6feaf49906 | ||
|
|
25f4e5f279 | ||
|
|
c1f05cbf85 | ||
|
|
382f44766c | ||
|
|
c47b37c0e1 | ||
|
|
76d1f05e6a | ||
|
|
8b460072d3 | ||
|
|
7c2bcaee15 | ||
|
|
ef86669069 | ||
|
|
76c23d9c1f | ||
|
|
ca18eb3cb0 | ||
|
|
86505a1467 | ||
|
|
e363b76f17 | ||
|
|
3dcf7f2492 | ||
|
|
f24fa62331 | ||
|
|
fd31694608 | ||
|
|
b2fc479b32 | ||
|
|
430061d9dd | ||
|
|
abdbd4b45c | ||
|
|
c34cf6cdb1 | ||
|
|
38a7e4feef | ||
|
|
5465e0f79b | ||
|
|
3a7e186513 | ||
|
|
ef1a85b0c8 | ||
|
|
ee3afe4935 | ||
|
|
8dd419fe3f | ||
|
|
a0ad0c735c | ||
|
|
f034ca8ddc | ||
|
|
d344e70ca9 | ||
|
|
7d5d9dac0b | ||
|
|
d6caecb9d8 | ||
|
|
95772b58e4 | ||
|
|
9d5edfa8a1 | ||
|
|
45717275f6 | ||
|
|
2b17f37506 | ||
|
|
89d24d992e | ||
|
|
564ef4aff6 | ||
|
|
966f940c08 | ||
|
|
b669ba1976 | ||
|
|
04917d3b47 | ||
|
|
6b5f1813c6 | ||
|
|
0898cdd05b | ||
|
|
9584f57dca | ||
|
|
ade9a663c5 | ||
|
|
0ff895861e | ||
|
|
1746811b74 | ||
|
|
7075f45084 | ||
|
|
a1f681e3ff | ||
|
|
24e859dd33 | ||
|
|
8cfe540647 | ||
|
|
baf161e1f6 | ||
|
|
c07fb7cbb4 | ||
|
|
81f071c30e | ||
|
|
530b18063a | ||
|
|
c5caacf56e | ||
|
|
68cfd1008a | ||
|
|
87dbfdcd5e | ||
|
|
770ae22bb6 | ||
|
|
1e19bae29a | ||
|
|
811ae3c551 | ||
|
|
832703a46a | ||
|
|
81e11a6d70 | ||
|
|
100c3ae77a | ||
|
|
db61b0bb9b | ||
|
|
dc8091ad0f | ||
|
|
3ccfb9616a | ||
|
|
83dc9b9453 | ||
|
|
4fe72a15c0 |
53
.github/actions/hyperstack_setup/action.yml
vendored
Normal file
53
.github/actions/hyperstack_setup/action.yml
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
name: Setup Cuda
|
||||
description: Setup Cuda on Hyperstack instance
|
||||
|
||||
inputs:
|
||||
cuda-version:
|
||||
description: Version of Cuda to use
|
||||
required: true
|
||||
gcc-version:
|
||||
description: Version of GCC to use
|
||||
required: true
|
||||
cmake-version:
|
||||
description: Version of cmake to use
|
||||
default: 3.29.6
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
cd cmake-${{ inputs.cmake-version }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
|
||||
echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
shell: bash
|
||||
run: nvidia-smi
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
13
.github/workflows/aws_tfhe_fast_tests.yml
vendored
13
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -54,10 +54,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -132,7 +133,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -158,7 +159,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -198,7 +199,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -211,7 +212,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -269,7 +270,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/aws_tfhe_integer_tests.yml
vendored
10
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -42,11 +42,12 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -58,6 +59,7 @@ jobs:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- .github/workflows/aws_tfhe_integer_tests.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
@@ -73,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -97,7 +99,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -138,7 +140,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -42,11 +42,12 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -58,6 +59,7 @@ jobs:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- .github/workflows/aws_tfhe_signed_integer_tests.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
@@ -73,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -97,7 +99,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -142,7 +144,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
9
.github/workflows/aws_tfhe_tests.yml
vendored
9
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -63,10 +63,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -141,7 +142,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -167,7 +168,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -249,7 +250,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
10
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_boolean.yml
vendored
6
.github/workflows/benchmark_boolean.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_core_crypto.yml
vendored
6
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_erc20.yml
vendored
6
.github/workflows/benchmark_erc20.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
5
.github/workflows/benchmark_gpu_4090.yml
vendored
5
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -116,6 +116,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
@@ -126,7 +127,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
|
||||
48
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
48
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -48,28 +48,19 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -84,31 +75,10 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_pbs_gpu
|
||||
@@ -167,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/benchmark_gpu_erc20.yml
vendored
2
.github/workflows/benchmark_gpu_erc20.yml
vendored
@@ -13,6 +13,8 @@ on:
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
|
||||
51
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
51
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -75,28 +75,19 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -111,34 +102,10 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_hlapi_erc20_gpu
|
||||
@@ -196,7 +163,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
1
.github/workflows/benchmark_gpu_integer.yml
vendored
1
.github/workflows/benchmark_gpu_integer.yml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
command:
|
||||
description: "Benchmark command to run"
|
||||
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
- name: Set multiple operations flavors
|
||||
if: ${{ contains(inputs.op_flavor, ',')}}
|
||||
run: |
|
||||
PARSED_OP_FLAVOR=$(echo "${{ inputs.op_flavor }}" | sed 's/[[:space:]]*,[[:space:]]*/\\", \\"/g')
|
||||
PARSED_OP_FLAVOR=$(echo "${{ inputs.op_flavor }}" | sed 's/[[:space:]]*,[[:space:]]*/", "/g')
|
||||
echo "OP_FLAVOR=[\"${PARSED_OP_FLAVOR}\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set benchmark types
|
||||
@@ -118,7 +118,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -145,28 +145,19 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -181,41 +172,10 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
@@ -236,7 +196,8 @@ jobs:
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
@@ -244,6 +205,13 @@ jobs:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -271,7 +239,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
9
.github/workflows/benchmark_integer.yml
vendored
9
.github/workflows/benchmark_integer.yml
vendored
@@ -35,7 +35,6 @@ env:
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
FAST_BENCH: TRUE
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
prepare-matrix:
|
||||
@@ -91,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -131,7 +130,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -168,7 +167,7 @@ jobs:
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
@@ -198,7 +197,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_shortint.yml
vendored
6
.github/workflows/benchmark_shortint.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
17
.github/workflows/benchmark_tfhe_fft.yml
vendored
17
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -16,6 +16,9 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- tfhe-fft/**
|
||||
- .github/workflows/benchmark_tfhe_fft.yml
|
||||
schedule:
|
||||
# Job will be triggered each Thursday at 11p.m.
|
||||
- cron: '0 23 * * 4'
|
||||
@@ -29,7 +32,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -47,7 +50,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -81,13 +84,13 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -110,7 +113,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-fft benchmarks failed. (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -123,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -134,7 +137,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (fft-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
17
.github/workflows/benchmark_tfhe_ntt.yml
vendored
17
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -16,6 +16,9 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- "main"
|
||||
paths:
|
||||
- tfhe-ntt/**
|
||||
- .github/workflows/benchmark_tfhe_ntt.yml
|
||||
schedule:
|
||||
# Job will be triggered each Friday at 11p.m.
|
||||
- cron: "0 23 * * 5"
|
||||
@@ -29,7 +32,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -47,7 +50,7 @@ jobs:
|
||||
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -81,13 +84,13 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@0b7f8abb1508181956e8e162db84b466c27e18ce
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
@@ -110,7 +113,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "tfhe-ntt benchmarks failed. (${{ env.ACTION_RUN_URL }})"
|
||||
@@ -123,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@1dced74825027fe3d481392163ed8fc56813fb5d
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -134,7 +137,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "EC2 teardown (ntt-benchmarks) failed. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
11
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
11
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@c3a1bb2c992d77180ae65be6ae6c166cf40f857c
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -118,8 +118,7 @@ jobs:
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--throughput
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
@@ -156,7 +155,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
13
.github/workflows/benchmark_wasm_client.yml
vendored
13
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -36,10 +36,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -64,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -98,7 +99,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -108,7 +109,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -121,7 +122,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -199,7 +200,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
66
.github/workflows/benchmark_zk_pke.yml
vendored
66
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -4,10 +4,14 @@ name: PKE ZK benchmarks
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
run_throughput:
|
||||
description: "Run throughput benchmarks"
|
||||
type: boolean
|
||||
default: false
|
||||
bench_type:
|
||||
description: "Benchmarks type"
|
||||
type: choice
|
||||
default: latency
|
||||
options:
|
||||
- latency
|
||||
- throughput
|
||||
- both
|
||||
|
||||
push:
|
||||
branches:
|
||||
@@ -26,7 +30,6 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
BENCH_TYPE: latency
|
||||
|
||||
jobs:
|
||||
should-run:
|
||||
@@ -40,10 +43,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -59,10 +63,37 @@ jobs:
|
||||
- tfhe/benches/integer/zk_pke.rs
|
||||
- .github/workflows/zk_pke_benchmark.yml
|
||||
|
||||
prepare-matrix:
|
||||
name: Prepare operations matrix
|
||||
runs-on: ubuntu-latest
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
outputs:
|
||||
bench_type: ${{ steps.set_bench_type.outputs.bench_type }}
|
||||
steps:
|
||||
- name: Set benchmark types
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
if [[ "${{ inputs.bench_type }}" == "both" ]]; then
|
||||
echo "BENCH_TYPE=[\"latency\", \"throughput\"]" >> "${GITHUB_ENV}"
|
||||
else
|
||||
echo "BENCH_TYPE=[\"${{ inputs.bench_type }}\"]" >> "${GITHUB_ENV}"
|
||||
fi
|
||||
|
||||
- name: Default benchmark type
|
||||
if: github.event_name != 'workflow_dispatch'
|
||||
run: |
|
||||
echo "BENCH_TYPE=[\"latency\"]" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Set benchmark types output
|
||||
id: set_bench_type
|
||||
run: |
|
||||
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (pke-zk-benchmarks)
|
||||
runs-on: ubuntu-latest
|
||||
needs: should-run
|
||||
needs: [ should-run, prepare-matrix ]
|
||||
if: github.event_name == 'workflow_dispatch' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs') ||
|
||||
(github.event_name == 'push' &&
|
||||
@@ -73,7 +104,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -85,11 +116,15 @@ jobs:
|
||||
pke-zk-benchmarks:
|
||||
name: Execute PKE ZK benchmarks
|
||||
if: needs.setup-instance.result != 'skipped'
|
||||
needs: setup-instance
|
||||
needs: [ prepare-matrix, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
|
||||
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
max-parallel: 1
|
||||
matrix:
|
||||
bench_type: ${{ fromJSON(needs.prepare-matrix.outputs.bench_type) }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -106,7 +141,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -117,14 +152,9 @@ jobs:
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Should run throughput benchmarks
|
||||
if: inputs.run_throughput
|
||||
run: |
|
||||
echo "BENCH_TYPE=throughput" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_integer_zk
|
||||
make BENCH_TYPE=${{ matrix.bench_type }} bench_integer_zk
|
||||
|
||||
- name: Parse results
|
||||
run: |
|
||||
@@ -138,7 +168,7 @@ jobs:
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ env.BENCH_TYPE }}
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Parse CRS sizes results
|
||||
run: |
|
||||
@@ -181,7 +211,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
2
.github/workflows/cargo_build_tfhe_fft.yml
vendored
2
.github/workflows/cargo_build_tfhe_fft.yml
vendored
@@ -21,7 +21,7 @@ jobs:
|
||||
fail-fast: false
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
|
||||
2
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
2
.github/workflows/cargo_build_tfhe_ntt.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
|
||||
6
.github/workflows/cargo_test_fft.yml
vendored
6
.github/workflows/cargo_test_fft.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -44,7 +44,7 @@ jobs:
|
||||
matrix:
|
||||
runner_type: [ubuntu-latest, macos-latest, windows-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
cargo-tests-node-js:
|
||||
runs-on: "ubuntu-latest"
|
||||
steps:
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Test node js
|
||||
run: |
|
||||
|
||||
4
.github/workflows/cargo_test_ntt.yml
vendored
4
.github/workflows/cargo_test_ntt.yml
vendored
@@ -19,7 +19,7 @@ jobs:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
fail-fast: false
|
||||
steps:
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||
steps:
|
||||
- uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
|
||||
|
||||
2
.github/workflows/ci_lint.yml
vendored
2
.github/workflows/ci_lint.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
make lint_workflow
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@38608ef4fb69adae7f1eac6eeb88e67b7d083bfd # v3.0.16
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
|
||||
12
.github/workflows/code_coverage.yml
vendored
12
.github/workflows/code_coverage.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -47,13 +47,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@5c47607acb93fed5485fdbf7232e8a31425f672a
|
||||
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@5c47607acb93fed5485fdbf7232e8a31425f672a
|
||||
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/gpu_4090_tests.yml
vendored
2
.github/workflows/gpu_4090_tests.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
53
.github/workflows/gpu_fast_h100_tests.yml
vendored
53
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -31,10 +31,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -67,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -93,60 +94,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
|
||||
@@ -186,7 +155,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
53
.github/workflows/gpu_fast_tests.yml
vendored
53
.github/workflows/gpu_fast_tests.yml
vendored
@@ -30,10 +30,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -65,7 +66,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,60 +92,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
make test_core_crypto_gpu
|
||||
@@ -184,7 +153,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
38
.github/workflows/gpu_full_h100_tests.yml
vendored
38
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -49,9 +49,6 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
@@ -71,38 +68,21 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto, integer and internal CUDA backend tests
|
||||
run: |
|
||||
make test_gpu
|
||||
@@ -139,7 +119,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
53
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
53
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -31,10 +31,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -67,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -93,60 +94,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run multi-bit CUDA integer compression tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
|
||||
@@ -189,7 +158,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
114
.github/workflows/gpu_integer_long_run_tests.yml
vendored
Normal file
114
.github/workflows/gpu_integer_long_run_tests.yml
vendored
Normal file
@@ -0,0 +1,114 @@
|
||||
name: AWS Long Run Tests on GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (gpu-tests)
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: 2-h100
|
||||
|
||||
cuda-tests:
|
||||
name: Long run GPU H100 tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
# explicit include-based build matrix, of known valid options
|
||||
matrix:
|
||||
include:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
make test_integer_long_run_gpu
|
||||
|
||||
slack-notify:
|
||||
name: Slack Notification
|
||||
needs: [ setup-instance, cuda-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
if: ${{ always() && needs.cuda-tests.result != 'skipped' && failure() }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Send message
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests.result }}
|
||||
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (gpu-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cuda-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (gpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
6
.github/workflows/gpu_pcc.yml
vendored
6
.github/workflows/gpu_pcc.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
54
.github/workflows/gpu_signed_integer_tests.yml
vendored
54
.github/workflows/gpu_signed_integer_tests.yml
vendored
@@ -38,10 +38,11 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -74,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -100,57 +101,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
@@ -159,10 +131,6 @@ jobs:
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
make test_signed_integer_multi_bit_gpu_ci
|
||||
@@ -188,7 +156,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
53
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
53
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -100,54 +100,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
@@ -156,10 +127,6 @@ jobs:
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
make test_unsigned_integer_multi_bit_gpu_ci
|
||||
@@ -185,7 +152,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
94
.github/workflows/integer_long_run_tests.yml
vendored
Normal file
94
.github/workflows/integer_long_run_tests.yml
vendored
Normal file
@@ -0,0 +1,94 @@
|
||||
name: AWS Long Run Tests on CPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
|
||||
RUSTFLAGS: "-C target-cpu=native"
|
||||
RUST_BACKTRACE: "full"
|
||||
RUST_MIN_STACK: "8388608"
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
name: Setup instance (cpu-tests)
|
||||
if: github.event_name != 'schedule' ||
|
||||
(github.event_name == 'schedule' && github.repository == 'zama-ai/tfhe-rs')
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
runner-name: ${{ steps.start-instance.outputs.label }}
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: aws
|
||||
profile: cpu-big
|
||||
|
||||
cpu-tests:
|
||||
name: Long run CPU tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
make test_integer_long_run
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "CPU long run tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (cpu-tests)
|
||||
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
|
||||
needs: [ setup-instance, cpu-tests ]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
label: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_MESSAGE: "Instance teardown (cpu-long-run-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
|
||||
2
.github/workflows/m1_tests.yml
vendored
2
.github/workflows/m1_tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
2
.github/workflows/make_release.yml
vendored
2
.github/workflows/make_release.yml
vendored
@@ -46,6 +46,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
@@ -84,6 +85,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: Create NPM version tag
|
||||
if: ${{ inputs.npm_latest_tag }}
|
||||
run: |
|
||||
|
||||
@@ -27,6 +27,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
|
||||
7
.github/workflows/make_release_cuda.yml
vendored
7
.github/workflows/make_release_cuda.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -64,13 +64,14 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@7b1c307e0dcbda6122208f10795a713336a9b35a
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -119,7 +120,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@801df0b8db5ea2b06128b7476c652f5ed5f193a8
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
4
.github/workflows/make_release_tfhe_fft.yml
vendored
4
.github/workflows/make_release_tfhe_fft.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
needs: verify_tag
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
|
||||
4
.github/workflows/make_release_tfhe_ntt.yml
vendored
4
.github/workflows/make_release_tfhe_ntt.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
needs: verify_tag
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
@@ -39,7 +39,7 @@ jobs:
|
||||
- name: Slack Notification
|
||||
if: ${{ failure() }}
|
||||
continue-on-error: true
|
||||
uses: rtCamp/action-slack-notify@b24d75fe0e728a4bf9fc42ee217caa686d141ee8
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ job.status }}
|
||||
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
|
||||
|
||||
@@ -27,6 +27,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Publish proc-macro crate
|
||||
env:
|
||||
|
||||
1
.github/workflows/make_release_zk_pok.yml
vendored
1
.github/workflows/make_release_zk_pok.yml
vendored
@@ -28,6 +28,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Publish crate.io package
|
||||
env:
|
||||
|
||||
1
.github/workflows/sync_on_push.yml
vendored
1
.github/workflows/sync_on_push.yml
vendored
@@ -16,6 +16,7 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
- name: git-sync
|
||||
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
|
||||
with:
|
||||
|
||||
@@ -19,11 +19,14 @@ exclude = [
|
||||
"utils/cargo-tfhe-lints"
|
||||
]
|
||||
[workspace.dependencies]
|
||||
aligned-vec = { version = "0.5", default-features = false }
|
||||
aligned-vec = { version = "0.6", default-features = false }
|
||||
bytemuck = "1.14.3"
|
||||
dyn-stack = { version = "0.10", default-features = false }
|
||||
dyn-stack = { version = "0.11", default-features = false }
|
||||
itertools = "0.13"
|
||||
num-complex = "0.4"
|
||||
pulp = { version = "0.18.22", default-features = false }
|
||||
pulp = { version = "0.20.0", default-features = false }
|
||||
rand = "0.8"
|
||||
rayon = "1"
|
||||
serde = { version = "1.0", default-features = false }
|
||||
wasm-bindgen = ">=0.2.86,<0.2.94"
|
||||
|
||||
|
||||
24
Makefile
24
Makefile
@@ -25,6 +25,7 @@ BACKWARD_COMPAT_DATA_BRANCH?=v0.4
|
||||
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
|
||||
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
|
||||
TFHE_SPEC:=tfhe
|
||||
WASM_PACK_VERSION="0.13.1"
|
||||
# We are kind of hacking the cut here, the version cannot contain a quote '"'
|
||||
WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
|
||||
WEB_RUNNER_DIR=web-test-runner
|
||||
@@ -116,8 +117,8 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
|
||||
|
||||
.PHONY: install_wasm_pack # Install wasm-pack to build JS packages
|
||||
install_wasm_pack: install_rs_build_toolchain
|
||||
@wasm-pack --version > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.0 || \
|
||||
@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
|
||||
( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_node # Install last version of NodeJS via nvm
|
||||
@@ -585,6 +586,11 @@ test_integer_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
|
||||
.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_long_run_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
|
||||
|
||||
.PHONY: test_integer_compression
|
||||
test_integer_compression: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -765,6 +771,12 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_long_run # Run the long run tests for integer
|
||||
test_integer_long_run: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
|
||||
|
||||
|
||||
.PHONY: test_safe_serialization # Run the tests for safe serialization
|
||||
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
@@ -896,7 +908,7 @@ doc: install_rs_check_toolchain
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: docs # Build rust doc alias for doc
|
||||
docs: doc
|
||||
@@ -907,7 +919,7 @@ lint_doc: install_rs_check_toolchain
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
|
||||
|
||||
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
|
||||
lint_docs: lint_doc
|
||||
@@ -1294,7 +1306,9 @@ sha256_bool: install_rs_check_toolchain
|
||||
|
||||
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
|
||||
pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
|
||||
clippy_all tfhe_lints check_compile_tests
|
||||
clippy_all check_compile_tests
|
||||
# TFHE lints deactivated as it's incompatible with 1.83 - temporary
|
||||
# tfhe_lints
|
||||
|
||||
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
|
||||
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
|
||||
|
||||
@@ -6,7 +6,7 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rayon = { version = "1.7.0"}
|
||||
rayon = { workspace = true }
|
||||
|
||||
[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
|
||||
path = "../../tfhe"
|
||||
|
||||
@@ -27,12 +27,23 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
|
||||
std::abort(); \
|
||||
}
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index);
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index);
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index);
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index);
|
||||
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index);
|
||||
|
||||
void cuda_destroy_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
uint32_t cuda_is_available();
|
||||
|
||||
void *cuda_malloc(uint64_t size, uint32_t gpu_index);
|
||||
|
||||
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
|
||||
|
||||
@@ -102,13 +102,12 @@ template <typename Torus> struct int_decompression {
|
||||
};
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
carry_extract_lut->get_lut(gpu_indexes[0], 0),
|
||||
streams[0], gpu_indexes[0], carry_extract_lut->get_lut(0, 0),
|
||||
encryption_params.glwe_dimension, encryption_params.polynomial_size,
|
||||
encryption_params.message_modulus, encryption_params.carry_modulus,
|
||||
carry_extract_f);
|
||||
|
||||
carry_extract_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
carry_extract_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
}
|
||||
}
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
|
||||
@@ -35,6 +35,8 @@ enum CMP_ORDERING { IS_INFERIOR = 0, IS_EQUAL = 1, IS_SUPERIOR = 2 };
|
||||
|
||||
enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
|
||||
|
||||
enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };
|
||||
|
||||
extern "C" {
|
||||
void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -282,23 +284,61 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory);
|
||||
|
||||
void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array, void *carry_out, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks);
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array, void *carry_out, void *input_carries, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks);
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry);
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void cleanup_cuda_add_and_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t compute_overflow,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs_array, const void *rhs_array, void *overflow_block,
|
||||
const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow);
|
||||
|
||||
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
@@ -318,25 +358,6 @@ void cleanup_cuda_integer_radix_partial_sum_ciphertexts_vec(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void *radix_lwe_overflowed, void const *radix_lwe_left,
|
||||
void const *radix_lwe_right, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks_in_radix);
|
||||
|
||||
void cleanup_cuda_integer_radix_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_scalar_mul_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
@@ -376,26 +397,6 @@ void cleanup_cuda_integer_div_rem(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, int8_t signed_operation,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs, void const *rhs, void *overflowed, int8_t signed_operation,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks_in_radix);
|
||||
|
||||
void cleanup_signed_overflowing_add_or_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_compute_prefix_sum_hillis_steele_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -27,6 +27,7 @@ void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
void const *lwe_array_in_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count);
|
||||
|
||||
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
void const *lwe_array_in, void const *plaintext_array_in,
|
||||
|
||||
@@ -28,7 +28,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
#endif
|
||||
|
||||
template <typename Torus>
|
||||
@@ -46,7 +46,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
void scratch_cuda_multi_bit_programmable_bootstrap(
|
||||
@@ -63,7 +63,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
uint64_t get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle(
|
||||
|
||||
@@ -255,7 +255,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
@@ -266,7 +266,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
#if (CUDA_ARCH >= 900)
|
||||
@@ -278,7 +278,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -69,7 +69,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void *stream, uint32_t gpu_index, void *lwe_array_out,
|
||||
@@ -78,7 +78,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
void cleanup_cuda_programmable_bootstrap(void *stream, uint32_t gpu_index,
|
||||
int8_t **pbs_buffer);
|
||||
|
||||
@@ -27,7 +27,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *buffer, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
|
||||
|
||||
@@ -2,6 +2,30 @@
|
||||
#include <cstdint>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
cudaEvent_t cuda_create_event(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
cudaEvent_t event;
|
||||
check_cuda_error(cudaEventCreate(&event));
|
||||
return event;
|
||||
}
|
||||
|
||||
void cuda_event_record(cudaEvent_t event, cudaStream_t stream,
|
||||
uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventRecord(event, stream));
|
||||
}
|
||||
|
||||
void cuda_stream_wait_event(cudaStream_t stream, cudaEvent_t event,
|
||||
uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaStreamWaitEvent(stream, event, 0));
|
||||
}
|
||||
|
||||
void cuda_event_destroy(cudaEvent_t event, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
check_cuda_error(cudaEventDestroy(event));
|
||||
}
|
||||
|
||||
/// Unsafe function to create a CUDA stream, must check first that GPU exists
|
||||
cudaStream_t cuda_create_stream(uint32_t gpu_index) {
|
||||
check_cuda_error(cudaSetDevice(gpu_index));
|
||||
@@ -21,6 +45,9 @@ void cuda_synchronize_stream(cudaStream_t stream, uint32_t gpu_index) {
|
||||
check_cuda_error(cudaStreamSynchronize(stream));
|
||||
}
|
||||
|
||||
// Determine if a CUDA device is available at runtime
|
||||
uint32_t cuda_is_available() { return cudaSetDevice(0) == cudaSuccess; }
|
||||
|
||||
/// Unsafe function that will try to allocate even if gpu_index is invalid
|
||||
/// or if there's not enough memory. A safe wrapper around it must call
|
||||
/// cuda_check_valid_malloc() first
|
||||
|
||||
@@ -58,9 +58,11 @@ host_integer_abs_kb(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
|
||||
radix_params.big_lwe_dimension, num_blocks);
|
||||
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count, ct,
|
||||
nullptr, nullptr, mem_ptr->scp_mem, bsks,
|
||||
ksks, num_blocks);
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, ct, nullptr, nullptr, mem_ptr->scp_mem,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
|
||||
host_integer_radix_bitop_kb(streams, gpu_indexes, gpu_count, ct, mask, ct,
|
||||
mem_ptr->bitxor_mem, bsks, ksks, num_blocks);
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
#include "integer/addition.cuh"
|
||||
|
||||
void scratch_cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, int8_t signed_operation,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
SIGNED_OPERATION op = (signed_operation == 1) ? SIGNED_OPERATION::ADDITION
|
||||
: SIGNED_OPERATION::SUBTRACTION;
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_signed_overflowing_add_or_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_signed_overflowing_add_or_sub_memory<uint64_t> **)mem_ptr,
|
||||
num_blocks, op, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_signed_overflowing_add_or_sub_radix_ciphertext_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs, void const *rhs, void *overflowed, int8_t signed_operation,
|
||||
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_signed_overflowing_add_or_sub_memory<uint64_t> *)mem_ptr;
|
||||
SIGNED_OPERATION op = (signed_operation == 1) ? SIGNED_OPERATION::ADDITION
|
||||
: SIGNED_OPERATION::SUBTRACTION;
|
||||
|
||||
host_integer_signed_overflowing_add_or_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs), static_cast<uint64_t const *>(rhs),
|
||||
static_cast<uint64_t *>(overflowed), op, bsks, (uint64_t *const *)(ksks),
|
||||
mem, num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_signed_overflowing_add_or_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_signed_overflowing_add_or_sub_memory<uint64_t> *mem_ptr =
|
||||
(int_signed_overflowing_add_or_sub_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
@@ -1,149 +0,0 @@
|
||||
#ifndef TFHE_RS_ADDITION_CUH
|
||||
#define TFHE_RS_ADDITION_CUH
|
||||
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer/comparison.cuh"
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/integer_utilities.h"
|
||||
#include "integer/negation.cuh"
|
||||
#include "integer/scalar_shifts.cuh"
|
||||
#include "linear_algebra.h"
|
||||
#include "pbs/programmable_bootstrap.h"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
template <typename Torus>
|
||||
void host_resolve_signed_overflow(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *result, Torus *last_block_inner_propagation,
|
||||
Torus const *last_block_input_carry, Torus *last_block_output_carry,
|
||||
int_resolve_signed_overflow_memory<Torus> *mem, void *const *bsks,
|
||||
Torus *const *ksks) {
|
||||
|
||||
auto x = mem->x;
|
||||
|
||||
Torus *d_clears =
|
||||
(Torus *)cuda_malloc_async(sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
cuda_set_value_async<Torus>(streams[0], gpu_indexes[0], d_clears, 2, 1);
|
||||
|
||||
// replace with host function call
|
||||
cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
|
||||
streams[0], gpu_indexes[0], x, last_block_output_carry, d_clears,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], last_block_inner_propagation,
|
||||
last_block_inner_propagation, x,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], last_block_inner_propagation,
|
||||
last_block_inner_propagation, last_block_input_carry,
|
||||
mem->params.big_lwe_dimension, 1);
|
||||
|
||||
host_apply_univariate_lut_kb<Torus>(streams, gpu_indexes, gpu_count, result,
|
||||
last_block_inner_propagation,
|
||||
mem->resolve_overflow_lut, ksks, bsks, 1);
|
||||
|
||||
cuda_drop_async(d_clears, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void scratch_cuda_integer_signed_overflowing_add_or_sub_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int_signed_overflowing_add_or_sub_memory<Torus> **mem_ptr,
|
||||
uint32_t num_blocks, SIGNED_OPERATION op, int_radix_params params,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
*mem_ptr = new int_signed_overflowing_add_or_sub_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, op,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
/*
|
||||
* Addition - signed_operation = 1
|
||||
* Subtraction - signed_operation = -1
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_signed_overflowing_add_or_sub_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lhs, Torus const *rhs, Torus *overflowed,
|
||||
SIGNED_OPERATION op, void *const *bsks, uint64_t *const *ksks,
|
||||
int_signed_overflowing_add_or_sub_memory<uint64_t> *mem_ptr,
|
||||
uint32_t num_blocks) {
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
uint32_t big_lwe_dimension = radix_params.big_lwe_dimension;
|
||||
uint32_t big_lwe_size = big_lwe_dimension + 1;
|
||||
uint32_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
assert(radix_params.message_modulus >= 4 && radix_params.carry_modulus >= 4);
|
||||
|
||||
auto result = mem_ptr->result;
|
||||
auto neg_rhs = mem_ptr->neg_rhs;
|
||||
auto input_carries = mem_ptr->input_carries;
|
||||
auto output_carry = mem_ptr->output_carry;
|
||||
auto last_block_inner_propagation = mem_ptr->last_block_inner_propagation;
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(result, lhs, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
// phase 1
|
||||
if (op == SIGNED_OPERATION::ADDITION) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], result, lhs, rhs,
|
||||
big_lwe_dimension, num_blocks);
|
||||
} else {
|
||||
host_integer_radix_negation<Torus>(
|
||||
streams, gpu_indexes, gpu_count, neg_rhs, rhs, big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], result, lhs, neg_rhs,
|
||||
big_lwe_dimension, num_blocks);
|
||||
}
|
||||
|
||||
// phase 2
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
host_propagate_single_carry<Torus>(
|
||||
mem_ptr->sub_streams_1, gpu_indexes, gpu_count, result, output_carry,
|
||||
input_carries, mem_ptr->scp_mem, bsks, ksks, num_blocks);
|
||||
host_generate_last_block_inner_propagation<Torus>(
|
||||
mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
last_block_inner_propagation, &lhs[(num_blocks - 1) * big_lwe_size],
|
||||
&rhs[(num_blocks - 1) * big_lwe_size], mem_ptr->las_block_prop_mem, bsks,
|
||||
ksks);
|
||||
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
// phase 3
|
||||
auto input_carry = &input_carries[(num_blocks - 1) * big_lwe_size];
|
||||
if (op == SIGNED_OPERATION::SUBTRACTION && num_blocks == 1) {
|
||||
// Quick fix for the case where the subtraction is done on a single block
|
||||
Torus *one_scalar =
|
||||
(Torus *)cuda_malloc_async(sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
cuda_set_value_async<Torus>(streams[0], gpu_indexes[0], one_scalar, 1, 1);
|
||||
create_trivial_radix<Torus>(
|
||||
streams[0], gpu_indexes[0], input_carry, one_scalar, big_lwe_dimension,
|
||||
1, 1, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
cuda_drop_async(one_scalar, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
host_resolve_signed_overflow<Torus>(
|
||||
streams, gpu_indexes, gpu_count, overflowed, last_block_inner_propagation,
|
||||
input_carry, output_carry, mem_ptr->resolve_overflow_mem, bsks, ksks);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lhs, result, num_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
#endif // TFHE_RS_ADDITION_CUH
|
||||
@@ -58,6 +58,9 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
case GE:
|
||||
case LT:
|
||||
case LE:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (comparisons): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -68,6 +71,8 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
|
||||
host_integer_radix_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
|
||||
@@ -85,16 +85,19 @@ __host__ void are_all_comparisons_block_true(
|
||||
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
uint32_t chunk_length = std::min(max_value, remaining_blocks);
|
||||
int num_chunks = remaining_blocks / chunk_length;
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = tmp_out;
|
||||
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
|
||||
auto is_equal_to_num_blocks_map =
|
||||
&are_all_block_true_buffer->is_equal_to_lut_map;
|
||||
auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension,
|
||||
chunk_length);
|
||||
@@ -111,29 +114,31 @@ __host__ void are_all_comparisons_block_true(
|
||||
// is_non_zero_lut_buffer LUT
|
||||
lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
} else {
|
||||
if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
|
||||
(*is_equal_to_num_blocks_map).end()) {
|
||||
// The LUT is already computed
|
||||
lut = (*is_equal_to_num_blocks_map)[chunk_length];
|
||||
} else {
|
||||
if (chunk_lengths[num_chunks - 1] != max_value) {
|
||||
// LUT needs to be computed
|
||||
auto new_lut =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
max_value, num_radix_blocks, true);
|
||||
|
||||
uint32_t chunk_length = chunk_lengths[num_chunks - 1];
|
||||
auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
|
||||
return x == chunk_length;
|
||||
};
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], new_lut->get_lut(gpu_indexes[0], 0),
|
||||
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
is_equal_to_num_blocks_lut_f);
|
||||
|
||||
new_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
(*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
|
||||
lut = new_lut;
|
||||
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
|
||||
for (int index = 0; index < num_chunks; index++) {
|
||||
if (index == num_chunks - 1) {
|
||||
h_lut_indexes[index] = 1;
|
||||
} else {
|
||||
h_lut_indexes[index] = 0;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
|
||||
h_lut_indexes, num_chunks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
}
|
||||
lut = is_max_value_lut;
|
||||
}
|
||||
|
||||
// Applies the LUT
|
||||
@@ -182,14 +187,18 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
uint32_t remaining_blocks = num_radix_blocks;
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
uint32_t chunk_length = std::min(max_value, remaining_blocks);
|
||||
int num_chunks = remaining_blocks / chunk_length;
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = mem_ptr->tmp_lwe_array_out;
|
||||
auto accumulator = buffer->tmp_block_accumulated;
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension,
|
||||
chunk_length);
|
||||
@@ -449,9 +458,9 @@ __host__ void tree_sign_reduction(
|
||||
f = sign_handler_f;
|
||||
}
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
// Last leaf
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
@@ -481,8 +490,9 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
if (carry_modulus >= message_modulus) {
|
||||
// Packing is possible
|
||||
// Pack inputs
|
||||
Torus *packed_left = diff_buffer->tmp_packed_left;
|
||||
Torus *packed_right = diff_buffer->tmp_packed_right;
|
||||
Torus *packed_left = diff_buffer->tmp_packed;
|
||||
Torus *packed_right =
|
||||
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
|
||||
// In case the ciphertext is signed, the sign block and the one before it
|
||||
// are handled separately
|
||||
if (mem_ptr->is_signed) {
|
||||
@@ -501,10 +511,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
2 * packed_num_radix_blocks, identity_lut);
|
||||
|
||||
lhs = packed_left;
|
||||
rhs = packed_right;
|
||||
@@ -533,11 +540,13 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
|
||||
// Compare the last block before the sign block separately
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
Torus *packed_left = diff_buffer->tmp_packed;
|
||||
Torus *packed_right =
|
||||
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *last_left_block_before_sign_block =
|
||||
diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
Torus *last_right_block_before_sign_block =
|
||||
diff_buffer->tmp_packed_right +
|
||||
packed_num_radix_blocks * big_lwe_size;
|
||||
packed_right + packed_num_radix_blocks * big_lwe_size;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
|
||||
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
|
||||
|
||||
@@ -295,7 +295,7 @@ __host__ void host_integer_decompress(
|
||||
extracted_lwe = h_mem_ptr->tmp_extracted_lwe;
|
||||
|
||||
// In the case of extracting a single LWE these parameters are dummy
|
||||
uint32_t lut_count = 1;
|
||||
uint32_t num_many_lut = 1;
|
||||
uint32_t lut_stride = 0;
|
||||
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
|
||||
/// dimension to a big LWE dimension
|
||||
@@ -311,7 +311,7 @@ __host__ void host_integer_decompress(
|
||||
compression_params.small_lwe_dimension,
|
||||
encryption_params.polynomial_size, encryption_params.pbs_base_log,
|
||||
encryption_params.pbs_level, encryption_params.grouping_factor,
|
||||
num_radix_blocks, encryption_params.pbs_type, lut_count, lut_stride);
|
||||
num_radix_blocks, encryption_params.pbs_type, num_many_lut, lut_stride);
|
||||
} else {
|
||||
/// For multi GPU execution we create vectors of pointers for inputs and
|
||||
/// outputs
|
||||
@@ -338,7 +338,7 @@ __host__ void host_integer_decompress(
|
||||
compression_params.small_lwe_dimension,
|
||||
encryption_params.polynomial_size, encryption_params.pbs_base_log,
|
||||
encryption_params.pbs_level, encryption_params.grouping_factor,
|
||||
num_radix_blocks, encryption_params.pbs_type, lut_count, lut_stride);
|
||||
num_radix_blocks, encryption_params.pbs_type, num_many_lut, lut_stride);
|
||||
|
||||
/// Copy data back to GPU 0 and release vecs
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
|
||||
@@ -425,11 +425,24 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
auto do_overflowing_sub = [&](cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
host_integer_overflowing_sub_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
subtraction_overflowed.data, merged_interesting_remainder.data,
|
||||
interesting_divisor.data, bsks, ksks, mem_ptr->overflow_sub_mem,
|
||||
uint32_t compute_borrow = 1;
|
||||
uint32_t uses_input_borrow = 0;
|
||||
auto first_indexes = mem_ptr->first_indexes_for_overflow_sub
|
||||
[merged_interesting_remainder.len - 1];
|
||||
auto second_indexes = mem_ptr->second_indexes_for_overflow_sub
|
||||
[merged_interesting_remainder.len - 1];
|
||||
auto scalar_indexes =
|
||||
mem_ptr
|
||||
->scalars_for_overflow_sub[merged_interesting_remainder.len - 1];
|
||||
mem_ptr->overflow_sub_mem->update_lut_indexes(
|
||||
streams, gpu_indexes, first_indexes, second_indexes, scalar_indexes,
|
||||
merged_interesting_remainder.len);
|
||||
host_integer_overflowing_sub<uint64_t>(
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
(uint64_t *)merged_interesting_remainder.data,
|
||||
interesting_divisor.data, subtraction_overflowed.data,
|
||||
(const Torus *)nullptr, mem_ptr->overflow_sub_mem, bsks, ksks,
|
||||
merged_interesting_remainder.len, compute_borrow, uses_input_borrow);
|
||||
};
|
||||
|
||||
// fills:
|
||||
@@ -657,10 +670,12 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
int_mem_ptr->negated_quotient, quotient, radix_params.big_lwe_dimension,
|
||||
num_blocks, radix_params.message_modulus, radix_params.carry_modulus);
|
||||
|
||||
host_propagate_single_carry<Torus>(int_mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_quotient,
|
||||
nullptr, nullptr, int_mem_ptr->scp_mem_1,
|
||||
bsks, ksks, num_blocks);
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_quotient, nullptr, nullptr, int_mem_ptr->scp_mem_1,
|
||||
bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
|
||||
host_integer_radix_negation(int_mem_ptr->sub_streams_2, gpu_indexes,
|
||||
gpu_count, int_mem_ptr->negated_remainder,
|
||||
@@ -671,7 +686,8 @@ __host__ void host_integer_div_rem_kb(cudaStream_t const *streams,
|
||||
host_propagate_single_carry<Torus>(
|
||||
int_mem_ptr->sub_streams_2, gpu_indexes, gpu_count,
|
||||
int_mem_ptr->negated_remainder, nullptr, nullptr,
|
||||
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks);
|
||||
int_mem_ptr->scp_mem_2, bsks, ksks, num_blocks, requested_flag,
|
||||
uses_carry);
|
||||
|
||||
host_integer_radix_cmux_kb<Torus>(
|
||||
int_mem_ptr->sub_streams_1, gpu_indexes, gpu_count, quotient,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
#include "integer/integer.cuh"
|
||||
#include "integer/negation.cuh"
|
||||
#include <linear_algebra.h>
|
||||
|
||||
void cuda_full_propagation_64_inplace(void *const *streams,
|
||||
@@ -49,7 +50,8 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
@@ -59,30 +61,94 @@ void scratch_cuda_propagate_single_carry_kb_64_inplace(
|
||||
scratch_cuda_propagate_single_carry_kb_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_sc_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
requested_flag, uses_carry, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
|
||||
uint32_t uses_carry, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_propagate_single_carry_kb_inplace<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_sc_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
requested_flag, uses_carry, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t compute_overflow,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_overflowing_sub<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_borrow_prop_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
compute_overflow, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array, void *carry_out, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
void *lwe_array, void *carry_out, const void *carry_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks,
|
||||
uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
nullptr, (int_sc_prop_memory<uint64_t> *)mem_ptr, bsks,
|
||||
(uint64_t **)(ksks), num_blocks);
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
void cuda_propagate_single_carry_get_input_carries_kb_64_inplace(
|
||||
void cuda_add_and_propagate_single_carry_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array, void *carry_out, void *input_carries, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_blocks) {
|
||||
host_propagate_single_carry<uint64_t>(
|
||||
void *lhs_array, const void *rhs_array, void *carry_out,
|
||||
const void *carry_in, int8_t *mem_ptr, void *const *bsks, void *const *ksks,
|
||||
uint32_t num_blocks, uint32_t requested_flag, uint32_t uses_carry) {
|
||||
|
||||
host_add_and_propagate_single_carry<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array), static_cast<uint64_t *>(carry_out),
|
||||
static_cast<uint64_t *>(input_carries),
|
||||
static_cast<uint64_t *>(lhs_array),
|
||||
static_cast<const uint64_t *>(rhs_array),
|
||||
static_cast<uint64_t *>(carry_out),
|
||||
static_cast<const uint64_t *>(carry_in),
|
||||
(int_sc_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)(ksks),
|
||||
num_blocks);
|
||||
num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
void cuda_integer_overflowing_sub_kb_64_inplace(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lhs_array, const void *rhs_array, void *overflow_block,
|
||||
const void *input_borrow, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow) {
|
||||
|
||||
host_integer_overflowing_sub<uint64_t>(
|
||||
(cudaStream_t const *)streams, gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lhs_array), static_cast<uint64_t *>(lhs_array),
|
||||
static_cast<const uint64_t *>(rhs_array),
|
||||
static_cast<uint64_t *>(overflow_block),
|
||||
static_cast<const uint64_t *>(input_borrow),
|
||||
(int_borrow_prop_memory<uint64_t> *)mem_ptr, bsks, (uint64_t **)ksks,
|
||||
num_blocks, compute_overflow, uses_input_borrow);
|
||||
}
|
||||
|
||||
void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
@@ -94,6 +160,23 @@ void cleanup_cuda_propagate_single_carry(void *const *streams,
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void cleanup_cuda_add_and_propagate_single_carry(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_sc_prop_memory<uint64_t> *mem_ptr =
|
||||
(int_sc_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
void cleanup_cuda_integer_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_borrow_prop_memory<uint64_t> *mem_ptr =
|
||||
(int_borrow_prop_memory<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
@@ -142,14 +225,14 @@ void cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *output_radix_lwe, void const *input_radix_lwe, int8_t *mem_ptr,
|
||||
void *const *ksks, void *const *bsks, uint32_t num_blocks,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
host_apply_many_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(output_radix_lwe),
|
||||
static_cast<const uint64_t *>(input_radix_lwe),
|
||||
(int_radix_lut<uint64_t> *)mem_ptr, (uint64_t **)(ksks), bsks, num_blocks,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -209,7 +209,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
auto small_lwe_size = small_lwe_dimension + 1;
|
||||
|
||||
// In the case of extracting a single LWE this parameters are dummy
|
||||
uint32_t lut_count = 1;
|
||||
uint32_t num_many_lut = 1;
|
||||
uint32_t lut_stride = 0;
|
||||
|
||||
if (num_radix_in_vec == 0)
|
||||
@@ -267,8 +267,8 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->params, 2,
|
||||
2 * ch_amount * num_blocks, reused_lut);
|
||||
}
|
||||
auto message_acc = luts_message_carry->get_lut(gpu_indexes[0], 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(gpu_indexes[0], 1);
|
||||
auto message_acc = luts_message_carry->get_lut(0, 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(0, 1);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_message = [message_modulus](Torus x) -> Torus {
|
||||
@@ -285,7 +285,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_carry);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
while (r > 2) {
|
||||
size_t cur_total_blocks = r * num_blocks;
|
||||
@@ -334,10 +334,10 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
if (carry_count > 0)
|
||||
cuda_set_value_async<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
luts_message_carry->get_lut_indexes(gpu_indexes[0], message_count), 1,
|
||||
luts_message_carry->get_lut_indexes(0, message_count), 1,
|
||||
carry_count);
|
||||
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
/// For multi GPU execution we create vectors of pointers for inputs and
|
||||
/// outputs
|
||||
@@ -370,7 +370,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
glwe_dimension, small_lwe_dimension, polynomial_size,
|
||||
mem_ptr->params.pbs_base_log, mem_ptr->params.pbs_level,
|
||||
mem_ptr->params.grouping_factor, total_count,
|
||||
mem_ptr->params.pbs_type, lut_count, lut_stride);
|
||||
mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
} else {
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
@@ -418,7 +418,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
glwe_dimension, small_lwe_dimension, polynomial_size,
|
||||
mem_ptr->params.pbs_base_log, mem_ptr->params.pbs_level,
|
||||
mem_ptr->params.grouping_factor, total_count,
|
||||
mem_ptr->params.pbs_type, lut_count, lut_stride);
|
||||
mem_ptr->params.pbs_type, num_many_lut, lut_stride);
|
||||
|
||||
multi_gpu_gather_lwe_async<Torus>(
|
||||
streams, gpu_indexes, active_gpu_count, new_blocks, lwe_after_pbs_vec,
|
||||
@@ -578,10 +578,15 @@ __host__ void host_integer_mult_radix_kb(
|
||||
terms_degree, bsks, ksks, mem_ptr->sum_ciphertexts_mem, num_blocks,
|
||||
2 * num_blocks, mem_ptr->luts_array);
|
||||
|
||||
auto scp_mem_ptr = mem_ptr->sum_ciphertexts_mem->scp_mem;
|
||||
host_propagate_single_carry<Torus>(streams, gpu_indexes, gpu_count,
|
||||
radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_blocks);
|
||||
uint32_t block_modulus = message_modulus * carry_modulus;
|
||||
uint32_t num_bits_in_block = log2_int(block_modulus);
|
||||
|
||||
auto scp_mem_ptr = mem_ptr->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<Torus>(
|
||||
streams, gpu_indexes, gpu_count, radix_lwe_out, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
|
||||
@@ -12,49 +12,3 @@ void cuda_negate_integer_radix_ciphertext_64(
|
||||
static_cast<const uint64_t *>(lwe_array_in), lwe_dimension,
|
||||
lwe_ciphertext_count, message_modulus, carry_modulus);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
|
||||
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_overflowing_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_overflowing_sub_memory<uint64_t> **)mem_ptr, num_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_radix_overflowing_sub_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *radix_lwe_out, void *radix_lwe_overflowed, void const *radix_lwe_left,
|
||||
void const *radix_lwe_right, int8_t *mem_ptr, void *const *bsks,
|
||||
void *const *ksks, uint32_t num_blocks) {
|
||||
|
||||
auto mem = (int_overflowing_sub_memory<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_overflowing_sub_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(radix_lwe_out),
|
||||
static_cast<uint64_t *>(radix_lwe_overflowed),
|
||||
static_cast<const uint64_t *>(radix_lwe_left),
|
||||
static_cast<const uint64_t *>(radix_lwe_right), bsks, (uint64_t **)(ksks),
|
||||
mem, num_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_radix_overflowing_sub(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
int_overflowing_sub_memory<uint64_t> *mem_ptr =
|
||||
(int_overflowing_sub_memory<uint64_t> *)(*mem_ptr_void);
|
||||
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -91,7 +91,7 @@ __host__ void scratch_cuda_integer_overflowing_sub_kb(
|
||||
*mem_ptr = new int_overflowing_sub_memory<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
/*
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_overflowing_sub_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
@@ -113,4 +113,39 @@ __host__ void host_integer_overflowing_sub_kb(
|
||||
mem_ptr, bsks, ksks, num_blocks);
|
||||
}
|
||||
|
||||
*/
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_overflowing_sub(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_out_array, Torus *lhs_array,
|
||||
const Torus *rhs_array, Torus *overflow_block, const Torus *input_borrow,
|
||||
int_borrow_prop_memory<uint64_t> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_blocks, uint32_t compute_overflow,
|
||||
uint32_t uses_input_borrow) {
|
||||
|
||||
auto radix_params = mem_ptr->params;
|
||||
|
||||
// We need to recalculate the num_groups, because on the division the number
|
||||
// of num_blocks changes
|
||||
uint32_t block_modulus =
|
||||
radix_params.message_modulus * radix_params.carry_modulus;
|
||||
uint32_t num_bits_in_block = log2_int(block_modulus);
|
||||
uint32_t grouping_size = num_bits_in_block;
|
||||
uint32_t num_groups = (num_blocks + grouping_size - 1) / grouping_size;
|
||||
|
||||
auto stream = (cudaStream_t *)streams;
|
||||
host_unchecked_sub_with_correcting_term<Torus>(
|
||||
stream[0], gpu_indexes[0], static_cast<Torus *>(lwe_out_array),
|
||||
static_cast<Torus *>(lhs_array), static_cast<const Torus *>(rhs_array),
|
||||
radix_params.big_lwe_dimension, num_blocks, radix_params.message_modulus,
|
||||
radix_params.carry_modulus, radix_params.message_modulus - 1);
|
||||
|
||||
host_single_borrow_propagate<Torus>(
|
||||
streams, gpu_indexes, gpu_count, static_cast<Torus *>(lwe_out_array),
|
||||
static_cast<Torus *>(overflow_block),
|
||||
static_cast<const Torus *>(input_borrow),
|
||||
(int_borrow_prop_memory<Torus> *)mem_ptr, bsks, (Torus **)(ksks),
|
||||
num_blocks, num_groups, compute_overflow, uses_input_borrow);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -31,10 +31,10 @@ __host__ void host_integer_radix_scalar_bitop_kb(
|
||||
} else {
|
||||
// We have all possible LUTs pre-computed and we use the decomposed scalar
|
||||
// as index to recover the right one
|
||||
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(gpu_indexes[0], 0),
|
||||
clear_blocks, num_clear_blocks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks,
|
||||
num_clear_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_input, bsks,
|
||||
|
||||
@@ -22,6 +22,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
|
||||
case GE:
|
||||
case LT:
|
||||
case LE:
|
||||
if (lwe_ciphertext_count % 2 != 0)
|
||||
PANIC("Cuda error (scalar comparisons): the number of radix blocks has "
|
||||
"to be even.")
|
||||
host_integer_radix_scalar_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -32,6 +35,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (lwe_ciphertext_count % 2 != 0)
|
||||
PANIC("Cuda error (scalar max/min): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_scalar_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
|
||||
@@ -110,11 +110,11 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
};
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
generate_device_accumulator<Torus>(streams[0], gpu_indexes[0],
|
||||
lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus,
|
||||
carry_modulus, scalar_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
@@ -141,8 +141,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
|
||||
//////////////
|
||||
// lsb
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -194,10 +195,10 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
|
||||
auto lut = diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_lsb_out,
|
||||
@@ -210,8 +211,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
uint32_t num_lsb_radix_blocks = total_num_radix_blocks;
|
||||
uint32_t num_scalar_blocks = total_num_scalar_blocks;
|
||||
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -292,7 +294,7 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
Torus const *sign_block =
|
||||
lwe_array_in + (total_num_radix_blocks - 1) * big_lwe_size;
|
||||
|
||||
auto sign_bit_pos = (int)std::log2(message_modulus) - 1;
|
||||
auto sign_bit_pos = (int)log2_int(message_modulus) - 1;
|
||||
|
||||
auto scalar_last_leaf_with_respect_to_zero_lut_f =
|
||||
[sign_handler_f, sign_bit_pos,
|
||||
@@ -329,10 +331,10 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, are_all_msb_zeros,
|
||||
@@ -358,8 +360,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
//////////////
|
||||
// lsb
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -422,11 +425,10 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
auto signed_msb_lut = mem_ptr->signed_msb_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
msb_streams[0], gpu_indexes[0],
|
||||
signed_msb_lut->get_lut(gpu_indexes[0], 0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
lut_f);
|
||||
signed_msb_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
msb_streams[0], gpu_indexes[0], signed_msb_lut->get_lut(0, 0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_f);
|
||||
signed_msb_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
Torus const *sign_block = msb + (num_msb_radix_blocks - 1) * big_lwe_size;
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
@@ -459,8 +461,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
auto lwe_array_ct_out = mem_ptr->tmp_lwe_array_out;
|
||||
auto lwe_array_sign_out =
|
||||
lwe_array_ct_out + (num_lsb_radix_blocks / 2) * big_lwe_size;
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks - 1,
|
||||
@@ -676,10 +679,10 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], packed_scalar,
|
||||
scalar_blocks, 0, num_scalar_blocks, message_modulus);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
scalar_comparison_luts->get_lut_indexes(gpu_indexes[0], 0),
|
||||
packed_scalar, num_halved_scalar_blocks * sizeof(Torus), lsb_streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(scalar_comparison_luts->get_lut_indexes(0, 0),
|
||||
packed_scalar,
|
||||
num_halved_scalar_blocks * sizeof(Torus),
|
||||
lsb_streams[0], gpu_indexes[0]);
|
||||
scalar_comparison_luts->broadcast_lut(lsb_streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
|
||||
@@ -54,7 +54,7 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
uint32_t lwe_size = input_lwe_dimension + 1;
|
||||
uint32_t lwe_size_bytes = lwe_size * sizeof(T);
|
||||
uint32_t msg_bits = (uint32_t)std::log2(message_modulus);
|
||||
uint32_t msg_bits = log2_int(message_modulus);
|
||||
uint32_t num_ciphertext_bits = msg_bits * num_radix_blocks;
|
||||
|
||||
T *preshifted_buffer = mem->preshifted_buffer;
|
||||
@@ -112,10 +112,12 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
terms_degree, bsks, ksks, mem->sum_ciphertexts_vec_mem,
|
||||
num_radix_blocks, j, nullptr);
|
||||
|
||||
auto scp_mem_ptr = mem->sum_ciphertexts_vec_mem->scp_mem;
|
||||
host_propagate_single_carry<T>(streams, gpu_indexes, gpu_count, lwe_array,
|
||||
nullptr, nullptr, scp_mem_ptr, bsks, ksks,
|
||||
num_radix_blocks);
|
||||
auto scp_mem_ptr = mem->sc_prop_mem;
|
||||
uint32_t requested_flag = outputFlag::FLAG_NONE;
|
||||
uint32_t uses_carry = 0;
|
||||
host_propagate_single_carry<T>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, nullptr, nullptr,
|
||||
scp_mem_ptr, bsks, ksks, num_radix_blocks, requested_flag, uses_carry);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ __host__ void host_integer_radix_scalar_rotate_kb_inplace(
|
||||
size_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
size_t num_bits_in_message = (size_t)log2(message_modulus);
|
||||
size_t num_bits_in_message = (size_t)log2_int(message_modulus);
|
||||
size_t total_num_bits = num_bits_in_message * num_blocks;
|
||||
n = n % total_num_bits;
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ __host__ void host_integer_radix_logical_scalar_shift_kb_inplace(
|
||||
size_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
size_t num_bits_in_block = (size_t)log2(message_modulus);
|
||||
size_t num_bits_in_block = (size_t)log2_int(message_modulus);
|
||||
size_t total_num_bits = num_bits_in_block * num_blocks;
|
||||
shift = shift % total_num_bits;
|
||||
|
||||
@@ -141,7 +141,7 @@ __host__ void host_integer_radix_arithmetic_scalar_shift_kb_inplace(
|
||||
size_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
size_t big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
|
||||
size_t num_bits_in_block = (size_t)log2(message_modulus);
|
||||
size_t num_bits_in_block = (size_t)log2_int(message_modulus);
|
||||
size_t total_num_bits = num_bits_in_block * num_blocks;
|
||||
shift = shift % total_num_bits;
|
||||
|
||||
|
||||
@@ -29,7 +29,7 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
uint32_t gpu_count, Torus *lwe_array, Torus const *lwe_shift,
|
||||
int_shift_and_rotate_buffer<Torus> *mem, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
uint32_t bits_per_block = std::log2(mem->params.message_modulus);
|
||||
uint32_t bits_per_block = log2_int(mem->params.message_modulus);
|
||||
uint32_t total_nb_bits = bits_per_block * num_radix_blocks;
|
||||
if (total_nb_bits == 0)
|
||||
return;
|
||||
@@ -55,7 +55,7 @@ __host__ void host_integer_radix_shift_and_rotate_kb_inplace(
|
||||
// then the behaviour of shifting won't be the same
|
||||
// if shift >= total_nb_bits compared to when total_nb_bits
|
||||
// is a power of two, as will 'capture' more bits in `shift_bits`
|
||||
uint32_t max_num_bits_that_tell_shift = std::log2(total_nb_bits);
|
||||
uint32_t max_num_bits_that_tell_shift = log2_int(total_nb_bits);
|
||||
if (!is_power_of_two(total_nb_bits))
|
||||
max_num_bits_that_tell_shift += 1;
|
||||
// Extracts bits and put them in the bit index 2 (=> bit number 3)
|
||||
|
||||
@@ -57,6 +57,7 @@ void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
|
||||
static_cast<const uint64_t *>(lwe_array_in_2),
|
||||
input_lwe_dimension, input_lwe_ciphertext_count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform the addition of a u32 input LWE ciphertext vector with a u32
|
||||
* plaintext vector. See the equivalent operation on u64 data for more details.
|
||||
|
||||
@@ -82,6 +82,46 @@ __host__ void host_addition(cudaStream_t stream, uint32_t gpu_index, T *output,
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void pack_for_overflowing_ops(T *output, T const *input_1,
|
||||
T const *input_2, uint32_t num_entries,
|
||||
uint32_t message_modulus) {
|
||||
|
||||
int tid = threadIdx.x;
|
||||
int index = blockIdx.x * blockDim.x + tid;
|
||||
if (index < num_entries) {
|
||||
// Here we take advantage of the wrapping behaviour of uint
|
||||
output[index] = input_1[index] * message_modulus + input_2[index];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__host__ void host_pack_for_overflowing_ops(cudaStream_t stream,
|
||||
uint32_t gpu_index, T *output,
|
||||
T const *input_1, T const *input_2,
|
||||
uint32_t input_lwe_dimension,
|
||||
uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
// lwe_size includes the presence of the body
|
||||
// whereas lwe_dimension is the number of elements in the mask
|
||||
int lwe_size = input_lwe_dimension + 1;
|
||||
// Create a 1-dimensional grid of threads
|
||||
int num_blocks = 0, num_threads = 0;
|
||||
int num_entries = lwe_size;
|
||||
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
|
||||
dim3 grid(num_blocks, 1, 1);
|
||||
dim3 thds(num_threads, 1, 1);
|
||||
|
||||
pack_for_overflowing_ops<T><<<grid, thds, 0, stream>>>(
|
||||
&output[(input_lwe_ciphertext_count - 1) * lwe_size],
|
||||
&input_1[(input_lwe_ciphertext_count - 1) * lwe_size],
|
||||
&input_2[(input_lwe_ciphertext_count - 1) * lwe_size], lwe_size,
|
||||
message_modulus);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void subtraction(T *output, T const *input_1, T const *input_2,
|
||||
uint32_t num_entries) {
|
||||
|
||||
@@ -92,7 +92,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t grouping_factor,
|
||||
uint32_t input_lwe_ciphertext_count, PBS_TYPE pbs_type,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
switch (sizeof(Torus)) {
|
||||
case sizeof(uint32_t):
|
||||
@@ -126,7 +126,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
current_lwe_array_in, current_lwe_input_indexes,
|
||||
bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_inputs_on_gpu,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -165,7 +165,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
current_lwe_array_in, current_lwe_input_indexes,
|
||||
bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
|
||||
polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_inputs_on_gpu, lut_count, lut_stride);
|
||||
num_inputs_on_gpu, num_many_lut, lut_stride);
|
||||
}
|
||||
break;
|
||||
case CLASSICAL:
|
||||
@@ -194,7 +194,7 @@ void execute_pbs_async(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
current_lwe_array_in, current_lwe_input_indexes,
|
||||
bootstrapping_keys[i], pbs_buffer[i], lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, num_inputs_on_gpu,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
|
||||
@@ -45,7 +45,7 @@ __global__ void device_programmable_bootstrap_cg(
|
||||
const double2 *__restrict__ bootstrapping_key, double2 *join_buffer,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, int8_t *device_mem,
|
||||
uint64_t device_memory_size_per_block, uint32_t lut_count,
|
||||
uint64_t device_memory_size_per_block, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
grid_group grid = this_grid();
|
||||
@@ -152,8 +152,8 @@ __global__ void device_programmable_bootstrap_cg(
|
||||
// but we do the computation at block 0 to avoid waiting for extra blocks,
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.z * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -168,8 +168,8 @@ __global__ void device_programmable_bootstrap_cg(
|
||||
}
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -235,7 +235,7 @@ __host__ void host_programmable_bootstrap_cg(
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
// With SM each block corresponds to either the mask or body, no need to
|
||||
// duplicate data for each
|
||||
@@ -273,7 +273,7 @@ __host__ void host_programmable_bootstrap_cg(
|
||||
kernel_args[10] = &base_log;
|
||||
kernel_args[11] = &level_count;
|
||||
kernel_args[12] = &d_mem;
|
||||
kernel_args[14] = &lut_count;
|
||||
kernel_args[14] = &num_many_lut;
|
||||
kernel_args[15] = &lut_stride;
|
||||
|
||||
if (max_shared_memory < partial_sm) {
|
||||
|
||||
@@ -32,7 +32,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
uint32_t level_count, uint32_t grouping_factor, uint32_t lwe_offset,
|
||||
uint32_t lwe_chunk_size, uint32_t keybundle_size_per_input,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
grid_group grid = this_grid();
|
||||
|
||||
// We use shared memory for the polynomials that are used often during the
|
||||
@@ -134,8 +135,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
// default
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.z * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -153,8 +154,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -293,7 +294,7 @@ __host__ void execute_cg_external_product_loop(
|
||||
Torus const *lwe_output_indexes, pbs_buffer<Torus, MULTI_BIT> *buffer,
|
||||
uint32_t num_samples, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t lwe_offset, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t lwe_offset, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
uint64_t full_sm =
|
||||
@@ -343,7 +344,7 @@ __host__ void execute_cg_external_product_loop(
|
||||
kernel_args[16] = &chunk_size;
|
||||
kernel_args[17] = &keybundle_size_per_input;
|
||||
kernel_args[18] = &d_mem;
|
||||
kernel_args[20] = &lut_count;
|
||||
kernel_args[20] = &num_many_lut;
|
||||
kernel_args[21] = &lut_stride;
|
||||
|
||||
dim3 grid_accumulate(level_count, glwe_dimension + 1, num_samples);
|
||||
@@ -379,7 +380,7 @@ __host__ void host_cg_multi_bit_programmable_bootstrap(
|
||||
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
|
||||
@@ -397,7 +398,7 @@ __host__ void host_cg_multi_bit_programmable_bootstrap(
|
||||
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, lwe_array_out, lwe_output_indexes, buffer,
|
||||
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, base_log, level_count, lwe_offset, lut_count,
|
||||
grouping_factor, base_log, level_count, lwe_offset, num_many_lut,
|
||||
lut_stride);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -123,7 +123,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
@@ -133,7 +133,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<512>>(
|
||||
@@ -141,7 +141,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<1024>>(
|
||||
@@ -149,7 +149,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<2048>>(
|
||||
@@ -157,7 +157,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<4096>>(
|
||||
@@ -165,7 +165,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<8192>>(
|
||||
@@ -173,7 +173,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<16384>>(
|
||||
@@ -181,7 +181,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (classical PBS): unsupported polynomial size. "
|
||||
@@ -380,7 +380,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
@@ -390,7 +390,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_cg<Torus, Degree<512>>(
|
||||
@@ -398,7 +398,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_cg<Torus, Degree<1024>>(
|
||||
@@ -406,7 +406,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<2048>>(
|
||||
@@ -414,7 +414,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<4096>>(
|
||||
@@ -422,7 +422,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<8192>>(
|
||||
@@ -430,7 +430,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<16384>>(
|
||||
@@ -438,7 +438,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (classical PBS): unsupported polynomial size. "
|
||||
@@ -455,7 +455,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
Torus const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
@@ -465,7 +465,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap<Torus, Degree<512>>(
|
||||
@@ -473,7 +473,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap<Torus, Degree<1024>>(
|
||||
@@ -481,7 +481,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<2048>>(
|
||||
@@ -489,7 +489,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<4096>>(
|
||||
@@ -497,7 +497,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -505,7 +505,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -513,7 +513,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (classical PBS): unsupported polynomial size. "
|
||||
@@ -531,7 +531,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 32)
|
||||
PANIC("Cuda error (classical PBS): base log should be <= 32")
|
||||
@@ -551,7 +551,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
static_cast<const uint32_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
#else
|
||||
PANIC("Cuda error (PBS): TBC pbs is not supported.")
|
||||
@@ -566,7 +566,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
static_cast<const uint32_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case DEFAULT:
|
||||
cuda_programmable_bootstrap_lwe_ciphertext_vector<uint32_t>(
|
||||
@@ -578,7 +578,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_32(
|
||||
static_cast<const uint32_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (PBS): unknown pbs variant.")
|
||||
@@ -653,9 +653,11 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_samples, uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_samples, uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
if (base_log > 64)
|
||||
PANIC("Cuda error (classical PBS): base log should be <= 64")
|
||||
if ((glwe_dimension + 1) * level_count > 8)
|
||||
PANIC("Cuda error (multi-bit PBS): (k + 1)*l should be <= 8")
|
||||
|
||||
pbs_buffer<uint64_t, CLASSICAL> *buffer =
|
||||
(pbs_buffer<uint64_t, CLASSICAL> *)mem_ptr;
|
||||
@@ -672,7 +674,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
#else
|
||||
PANIC("Cuda error (PBS): TBC pbs is not supported.")
|
||||
@@ -687,7 +689,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case PBS_VARIANT::DEFAULT:
|
||||
cuda_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
@@ -699,7 +701,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const double2 *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, num_samples,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (PBS): unknown pbs variant.")
|
||||
@@ -727,7 +729,7 @@ template void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector<uint64_t>(
|
||||
uint64_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint64_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
@@ -737,7 +739,7 @@ template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
uint64_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint64_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void scratch_cuda_programmable_bootstrap_cg<uint64_t>(
|
||||
@@ -758,7 +760,7 @@ template void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector<uint32_t>(
|
||||
uint32_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint32_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint32_t>(
|
||||
@@ -768,7 +770,7 @@ template void cuda_programmable_bootstrap_lwe_ciphertext_vector<uint32_t>(
|
||||
uint32_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint32_t, CLASSICAL> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
|
||||
template void scratch_cuda_programmable_bootstrap_cg<uint32_t>(
|
||||
@@ -797,7 +799,7 @@ template void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector<uint32_t>(
|
||||
uint32_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint32_t, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
template void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector<uint64_t>(
|
||||
void *stream, uint32_t gpu_index, uint64_t *lwe_array_out,
|
||||
@@ -806,7 +808,7 @@ template void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector<uint64_t>(
|
||||
uint64_t const *lwe_input_indexes, double2 const *bootstrapping_key,
|
||||
pbs_buffer<uint64_t, CLASSICAL> *buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride);
|
||||
template void scratch_cuda_programmable_bootstrap_tbc<uint32_t>(
|
||||
void *stream, uint32_t gpu_index,
|
||||
|
||||
@@ -142,7 +142,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
uint32_t lwe_iteration, uint32_t lwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
// We use shared memory for the polynomials that are used often during the
|
||||
// bootstrap, since shared memory is kept in L1 cache and accessing it is
|
||||
@@ -217,8 +217,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
// but we do the computation at block 0 to avoid waiting for extra blocks,
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.x * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -233,8 +233,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
}
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -412,8 +412,8 @@ __host__ void execute_step_two(
|
||||
uint32_t input_lwe_ciphertext_count, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, int8_t *d_mem, int lwe_iteration, uint64_t partial_sm,
|
||||
uint64_t partial_dm, uint64_t full_sm, uint64_t full_dm, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
uint64_t partial_dm, uint64_t full_sm, uint64_t full_dm,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
int max_shared_memory = cuda_get_max_shared_memory(0);
|
||||
cudaSetDevice(gpu_index);
|
||||
@@ -426,21 +426,21 @@ __host__ void execute_step_two(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
bootstrapping_key, global_accumulator, global_join_buffer,
|
||||
lwe_iteration, lwe_dimension, polynomial_size, base_log,
|
||||
level_count, d_mem, full_dm, lut_count, lut_stride);
|
||||
level_count, d_mem, full_dm, num_many_lut, lut_stride);
|
||||
} else if (max_shared_memory < full_sm) {
|
||||
device_programmable_bootstrap_step_two<Torus, params, PARTIALSM>
|
||||
<<<grid, thds, partial_sm, stream>>>(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
bootstrapping_key, global_accumulator, global_join_buffer,
|
||||
lwe_iteration, lwe_dimension, polynomial_size, base_log,
|
||||
level_count, d_mem, partial_dm, lut_count, lut_stride);
|
||||
level_count, d_mem, partial_dm, num_many_lut, lut_stride);
|
||||
} else {
|
||||
device_programmable_bootstrap_step_two<Torus, params, FULLSM>
|
||||
<<<grid, thds, full_sm, stream>>>(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
bootstrapping_key, global_accumulator, global_join_buffer,
|
||||
lwe_iteration, lwe_dimension, polynomial_size, base_log,
|
||||
level_count, d_mem, 0, lut_count, lut_stride);
|
||||
level_count, d_mem, 0, num_many_lut, lut_stride);
|
||||
}
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
@@ -456,7 +456,7 @@ __host__ void host_programmable_bootstrap(
|
||||
pbs_buffer<Torus, CLASSICAL> *pbs_buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
// With SM each block corresponds to either the mask or body, no need to
|
||||
@@ -493,7 +493,7 @@ __host__ void host_programmable_bootstrap(
|
||||
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
|
||||
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -67,7 +67,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
@@ -76,7 +76,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
@@ -84,7 +84,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
@@ -92,7 +92,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<2048>>(
|
||||
@@ -100,7 +100,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<4096>>(
|
||||
@@ -108,7 +108,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -116,7 +116,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_cg_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -124,7 +124,7 @@ void cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported polynomial size. Supported "
|
||||
@@ -142,7 +142,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
switch (polynomial_size) {
|
||||
case 256:
|
||||
@@ -151,7 +151,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
@@ -159,7 +159,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
@@ -167,7 +167,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 2048:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<2048>>(
|
||||
@@ -175,7 +175,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 4096:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<4096>>(
|
||||
@@ -183,7 +183,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -191,7 +191,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -199,7 +199,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported polynomial size. Supported "
|
||||
@@ -215,11 +215,13 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
void const *lwe_input_indexes, void const *bootstrapping_key,
|
||||
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 64)
|
||||
PANIC("Cuda error (multi-bit PBS): base log should be <= 64")
|
||||
if ((glwe_dimension + 1) * level_count > 8)
|
||||
PANIC("Cuda error (multi-bit PBS): (k + 1)*l should be <= 8")
|
||||
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *buffer =
|
||||
(pbs_buffer<uint64_t, MULTI_BIT> *)mem_ptr;
|
||||
@@ -236,7 +238,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const uint64_t *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
#else
|
||||
PANIC("Cuda error (multi-bit PBS): TBC pbs is not supported.")
|
||||
@@ -251,7 +253,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const uint64_t *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case PBS_VARIANT::DEFAULT:
|
||||
cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
@@ -263,7 +265,7 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
|
||||
static_cast<const uint64_t *>(lwe_input_indexes),
|
||||
static_cast<const uint64_t *>(bootstrapping_key), buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported implementation variant.")
|
||||
@@ -465,7 +467,7 @@ uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
|
||||
#if CUDA_ARCH < 900
|
||||
// We pick a smaller divisor on GPUs other than H100, so 256-bit integer
|
||||
// multiplication can run
|
||||
int log2_max_num_pbs = std::log2(max_num_pbs);
|
||||
int log2_max_num_pbs = log2_int(max_num_pbs);
|
||||
if (log2_max_num_pbs > 13)
|
||||
ith_divisor = log2_max_num_pbs - 11;
|
||||
#endif
|
||||
@@ -499,7 +501,7 @@ cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
template void scratch_cuda_cg_multi_bit_programmable_bootstrap<uint64_t>(
|
||||
void *stream, uint32_t gpu_index,
|
||||
@@ -516,7 +518,7 @@ cuda_cg_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
|
||||
template bool
|
||||
has_support_to_cuda_programmable_bootstrap_tbc_multi_bit<uint64_t>(
|
||||
@@ -588,7 +590,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
pbs_buffer<Torus, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
if (base_log > 32)
|
||||
PANIC("Cuda error (multi-bit PBS): base log should be <= 32")
|
||||
@@ -600,7 +602,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
@@ -608,7 +610,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
@@ -616,7 +618,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 2048: {
|
||||
int num_sms = 0;
|
||||
@@ -629,14 +631,14 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log,
|
||||
level_count, num_samples, lut_count, lut_stride);
|
||||
level_count, num_samples, num_many_lut, lut_stride);
|
||||
else
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, Degree<2048>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log,
|
||||
level_count, num_samples, lut_count, lut_stride);
|
||||
level_count, num_samples, num_many_lut, lut_stride);
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -646,7 +648,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 8192:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<8192>>(
|
||||
@@ -654,7 +656,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
case 16384:
|
||||
host_tbc_multi_bit_programmable_bootstrap<Torus, AmortizedDegree<16384>>(
|
||||
@@ -662,7 +664,7 @@ void cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, pbs_buffer, glwe_dimension,
|
||||
lwe_dimension, polynomial_size, grouping_factor, base_log, level_count,
|
||||
num_samples, lut_count, lut_stride);
|
||||
num_samples, num_many_lut, lut_stride);
|
||||
break;
|
||||
default:
|
||||
PANIC("Cuda error (multi-bit PBS): unsupported polynomial size. Supported "
|
||||
@@ -685,5 +687,5 @@ cuda_tbc_multi_bit_programmable_bootstrap_lwe_ciphertext_vector<uint64_t>(
|
||||
pbs_buffer<uint64_t, MULTI_BIT> *pbs_buffer, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride);
|
||||
uint32_t num_many_lut, uint32_t lut_stride);
|
||||
#endif
|
||||
|
||||
@@ -253,7 +253,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
uint32_t polynomial_size, uint32_t level_count,
|
||||
uint32_t grouping_factor, uint32_t iteration, uint32_t lwe_offset,
|
||||
uint32_t lwe_chunk_size, int8_t *device_mem,
|
||||
uint64_t device_memory_size_per_block, uint32_t lut_count,
|
||||
uint64_t device_memory_size_per_block, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
// We use shared memory for the polynomials that are used often during the
|
||||
// bootstrap, since shared memory is kept in L1 cache and accessing it is
|
||||
@@ -326,8 +326,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
// but we do the computation at block 0 to avoid waiting for extra blocks,
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, global_slice);
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.x * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -342,8 +342,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
}
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, global_slice, 0);
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -591,12 +591,14 @@ execute_step_one(cudaStream_t stream, uint32_t gpu_index,
|
||||
}
|
||||
|
||||
template <typename Torus, class params>
|
||||
__host__ void execute_step_two(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus const *lwe_output_indexes, pbs_buffer<Torus, MULTI_BIT> *buffer,
|
||||
uint32_t num_samples, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, int32_t grouping_factor, uint32_t level_count,
|
||||
uint32_t j, uint32_t lwe_offset, uint32_t lut_count, uint32_t lut_stride) {
|
||||
__host__ void
|
||||
execute_step_two(cudaStream_t stream, uint32_t gpu_index, Torus *lwe_array_out,
|
||||
Torus const *lwe_output_indexes,
|
||||
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t num_samples,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, int32_t grouping_factor,
|
||||
uint32_t level_count, uint32_t j, uint32_t lwe_offset,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
uint64_t full_sm_accumulate_step_two =
|
||||
@@ -621,7 +623,7 @@ __host__ void execute_step_two(
|
||||
global_accumulator, global_accumulator_fft, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, level_count, grouping_factor, j,
|
||||
lwe_offset, lwe_chunk_size, d_mem, full_sm_accumulate_step_two,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
else
|
||||
device_multi_bit_programmable_bootstrap_accumulate_step_two<Torus, params,
|
||||
FULLSM>
|
||||
@@ -630,7 +632,7 @@ __host__ void execute_step_two(
|
||||
global_accumulator, global_accumulator_fft, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, level_count,
|
||||
grouping_factor, j, lwe_offset, lwe_chunk_size, d_mem, 0,
|
||||
lut_count, lut_stride);
|
||||
num_many_lut, lut_stride);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
@@ -643,7 +645,7 @@ __host__ void host_multi_bit_programmable_bootstrap(
|
||||
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
|
||||
@@ -667,7 +669,8 @@ __host__ void host_multi_bit_programmable_bootstrap(
|
||||
execute_step_two<Torus, params>(
|
||||
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
|
||||
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, level_count, j, lwe_offset, lut_count, lut_stride);
|
||||
grouping_factor, level_count, j, lwe_offset, num_many_lut,
|
||||
lut_stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -45,8 +45,8 @@ __global__ void device_programmable_bootstrap_tbc(
|
||||
const double2 *__restrict__ bootstrapping_key, double2 *join_buffer,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, int8_t *device_mem,
|
||||
uint64_t device_memory_size_per_block, bool support_dsm, uint32_t lut_count,
|
||||
uint32_t lut_stride) {
|
||||
uint64_t device_memory_size_per_block, bool support_dsm,
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
cluster_group cluster = this_cluster();
|
||||
|
||||
@@ -158,8 +158,8 @@ __global__ void device_programmable_bootstrap_tbc(
|
||||
// in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.z * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -175,8 +175,8 @@ __global__ void device_programmable_bootstrap_tbc(
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -261,7 +261,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
pbs_buffer<Torus, CLASSICAL> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
auto supports_dsm =
|
||||
supports_distributed_shared_memory_on_classic_programmable_bootstrap<
|
||||
@@ -317,7 +317,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
lwe_array_in, lwe_input_indexes, bootstrapping_key, buffer_fft,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, d_mem, full_dm,
|
||||
supports_dsm, lut_count, lut_stride));
|
||||
supports_dsm, num_many_lut, lut_stride));
|
||||
} else if (max_shared_memory < full_sm + minimum_sm_tbc) {
|
||||
config.dynamicSmemBytes = partial_sm + minimum_sm_tbc;
|
||||
|
||||
@@ -326,7 +326,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
lwe_array_in, lwe_input_indexes, bootstrapping_key, buffer_fft,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, d_mem,
|
||||
partial_dm, supports_dsm, lut_count, lut_stride));
|
||||
partial_dm, supports_dsm, num_many_lut, lut_stride));
|
||||
} else {
|
||||
config.dynamicSmemBytes = full_sm + minimum_sm_tbc;
|
||||
|
||||
@@ -335,7 +335,7 @@ __host__ void host_programmable_bootstrap_tbc(
|
||||
lwe_array_out, lwe_output_indexes, lut_vector, lut_vector_indexes,
|
||||
lwe_array_in, lwe_input_indexes, bootstrapping_key, buffer_fft,
|
||||
lwe_dimension, polynomial_size, base_log, level_count, d_mem, 0,
|
||||
supports_dsm, lut_count, lut_stride));
|
||||
supports_dsm, num_many_lut, lut_stride));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
uint32_t level_count, uint32_t grouping_factor, uint32_t lwe_offset,
|
||||
uint32_t lwe_chunk_size, uint32_t keybundle_size_per_input,
|
||||
int8_t *device_mem, uint64_t device_memory_size_per_block,
|
||||
bool support_dsm, uint32_t lut_count, uint32_t lut_stride) {
|
||||
bool support_dsm, uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
|
||||
cluster_group cluster = this_cluster();
|
||||
|
||||
@@ -141,8 +141,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
// blocks, in case they're not synchronized
|
||||
sample_extract_mask<Torus, params>(block_lwe_array_out, accumulator);
|
||||
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
(i * gridDim.z * (glwe_dimension * polynomial_size + 1));
|
||||
@@ -157,8 +157,8 @@ __global__ void __launch_bounds__(params::degree / params::opt)
|
||||
}
|
||||
} else if (blockIdx.y == glwe_dimension) {
|
||||
sample_extract_body<Torus, params>(block_lwe_array_out, accumulator, 0);
|
||||
if (lut_count > 1) {
|
||||
for (int i = 1; i < lut_count; i++) {
|
||||
if (num_many_lut > 1) {
|
||||
for (int i = 1; i < num_many_lut; i++) {
|
||||
|
||||
auto next_lwe_array_out =
|
||||
lwe_array_out +
|
||||
@@ -299,7 +299,7 @@ __host__ void execute_tbc_external_product_loop(
|
||||
Torus const *lwe_output_indexes, pbs_buffer<Torus, MULTI_BIT> *buffer,
|
||||
uint32_t num_samples, uint32_t lwe_dimension, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
|
||||
uint32_t level_count, uint32_t lwe_offset, uint32_t lut_count,
|
||||
uint32_t level_count, uint32_t lwe_offset, uint32_t num_many_lut,
|
||||
uint32_t lut_stride) {
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
@@ -363,7 +363,7 @@ __host__ void execute_tbc_external_product_loop(
|
||||
lwe_array_in, lwe_input_indexes, keybundle_fft, buffer_fft,
|
||||
global_accumulator, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
base_log, level_count, grouping_factor, lwe_offset, chunk_size,
|
||||
keybundle_size_per_input, d_mem, full_dm, supports_dsm, lut_count,
|
||||
keybundle_size_per_input, d_mem, full_dm, supports_dsm, num_many_lut,
|
||||
lut_stride));
|
||||
} else if (max_shared_memory < full_dm + minimum_dm) {
|
||||
config.dynamicSmemBytes = partial_dm + minimum_dm;
|
||||
@@ -375,7 +375,7 @@ __host__ void execute_tbc_external_product_loop(
|
||||
lwe_array_in, lwe_input_indexes, keybundle_fft, buffer_fft,
|
||||
global_accumulator, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
base_log, level_count, grouping_factor, lwe_offset, chunk_size,
|
||||
keybundle_size_per_input, d_mem, partial_dm, supports_dsm, lut_count,
|
||||
keybundle_size_per_input, d_mem, partial_dm, supports_dsm, num_many_lut,
|
||||
lut_stride));
|
||||
} else {
|
||||
config.dynamicSmemBytes = full_dm + minimum_dm;
|
||||
@@ -387,7 +387,7 @@ __host__ void execute_tbc_external_product_loop(
|
||||
lwe_array_in, lwe_input_indexes, keybundle_fft, buffer_fft,
|
||||
global_accumulator, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
base_log, level_count, grouping_factor, lwe_offset, chunk_size,
|
||||
keybundle_size_per_input, d_mem, 0, supports_dsm, lut_count,
|
||||
keybundle_size_per_input, d_mem, 0, supports_dsm, num_many_lut,
|
||||
lut_stride));
|
||||
}
|
||||
}
|
||||
@@ -401,7 +401,7 @@ __host__ void host_tbc_multi_bit_programmable_bootstrap(
|
||||
pbs_buffer<Torus, MULTI_BIT> *buffer, uint32_t glwe_dimension,
|
||||
uint32_t lwe_dimension, uint32_t polynomial_size, uint32_t grouping_factor,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_samples,
|
||||
uint32_t lut_count, uint32_t lut_stride) {
|
||||
uint32_t num_many_lut, uint32_t lut_stride) {
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
@@ -419,7 +419,7 @@ __host__ void host_tbc_multi_bit_programmable_bootstrap(
|
||||
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, lwe_array_out, lwe_output_indexes, buffer,
|
||||
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, base_log, level_count, lwe_offset, lut_count,
|
||||
grouping_factor, base_log, level_count, lwe_offset, num_many_lut,
|
||||
lut_stride);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,7 +1,24 @@
|
||||
#ifndef CUDA_PARAMETERS_CUH
|
||||
#define CUDA_PARAMETERS_CUH
|
||||
|
||||
constexpr int log2(int n) { return (n <= 2) ? 1 : 1 + log2(n / 2); }
|
||||
#include "device.h"
|
||||
#include <cstdint>
|
||||
|
||||
// If decide to support something else than 32 and 64 bits, this method will
|
||||
// need to be adjusted
|
||||
template <typename T> constexpr unsigned log2_int(T n) {
|
||||
if (n == 0) {
|
||||
PANIC("Cuda error (log2): log2 is undefined for 0");
|
||||
}
|
||||
|
||||
if constexpr (sizeof(T) == 4) { // uint32_t
|
||||
return (unsigned)(8 * sizeof(uint32_t) - __builtin_clz(n) - 1);
|
||||
} else if constexpr (sizeof(T) == 8) { // uint64_t
|
||||
return (unsigned)(8 * sizeof(uint64_t) - __builtin_clzll(n) - 1);
|
||||
} else {
|
||||
return (n <= 2) ? 1 : 1 + log2_int(n / 2);
|
||||
}
|
||||
}
|
||||
|
||||
constexpr int choose_opt_amortized(int degree) {
|
||||
if (degree <= 1024)
|
||||
@@ -41,14 +58,14 @@ template <int N> class Degree {
|
||||
public:
|
||||
constexpr static int degree = N;
|
||||
constexpr static int opt = choose_opt(N);
|
||||
constexpr static int log2_degree = log2(N);
|
||||
constexpr static int log2_degree = log2_int(N);
|
||||
};
|
||||
|
||||
template <int N> class AmortizedDegree {
|
||||
public:
|
||||
constexpr static int degree = N;
|
||||
constexpr static int opt = choose_opt_amortized(N);
|
||||
constexpr static int log2_degree = log2(N);
|
||||
constexpr static int log2_degree = log2_int(N);
|
||||
};
|
||||
enum sharedMemDegree { NOSM = 0, PARTIALSM = 1, FULLSM = 2 };
|
||||
|
||||
|
||||
@@ -38,7 +38,7 @@ int32_t cuda_setup_multi_gpu() {
|
||||
int get_active_gpu_count(int num_inputs, int gpu_count) {
|
||||
int active_gpu_count = gpu_count;
|
||||
if (gpu_count > num_inputs) {
|
||||
active_gpu_count = num_inputs;
|
||||
active_gpu_count = 1;
|
||||
}
|
||||
return active_gpu_count;
|
||||
}
|
||||
@@ -56,8 +56,8 @@ int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count) {
|
||||
// If there are fewer inputs than GPUs, not all GPUs are active and GPU 0
|
||||
// handles everything
|
||||
if (gpu_count > total_num_inputs) {
|
||||
if (gpu_index < total_num_inputs) {
|
||||
num_inputs = 1;
|
||||
if (gpu_index == 0) {
|
||||
num_inputs = total_num_inputs;
|
||||
}
|
||||
} else {
|
||||
// If there are more inputs than GPUs, all GPUs are active and compute over
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user