mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
67 Commits
al/div_mul
...
pa/feat/cu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f9e94d9b6b | ||
|
|
d66e36b529 | ||
|
|
33ca2c2fab | ||
|
|
b22e369166 | ||
|
|
90edfdbbe7 | ||
|
|
f998f00580 | ||
|
|
57a31d19a7 | ||
|
|
9c43c30e66 | ||
|
|
a9d48c7e35 | ||
|
|
863d51feaf | ||
|
|
ae2aeb3b6b | ||
|
|
5c44ffad27 | ||
|
|
e42d203fc5 | ||
|
|
37f25c0ce5 | ||
|
|
cd03b7eef7 | ||
|
|
2c8f0ce7de | ||
|
|
e3a93c7d87 | ||
|
|
9b43a9459a | ||
|
|
33d5091025 | ||
|
|
70ff0f726c | ||
|
|
13d55f31ac | ||
|
|
7e871e54e1 | ||
|
|
012585204a | ||
|
|
d6e45858c1 | ||
|
|
ae832c158f | ||
|
|
8504d79180 | ||
|
|
c306e63430 | ||
|
|
9195753273 | ||
|
|
bda8ab028e | ||
|
|
9e8db2179e | ||
|
|
950cece2a9 | ||
|
|
aee53d3fae | ||
|
|
4e2db929da | ||
|
|
d2c13e4593 | ||
|
|
c41b76f892 | ||
|
|
1ede004e9a | ||
|
|
1df331d246 | ||
|
|
0f2451e3b7 | ||
|
|
3de23d14a2 | ||
|
|
e0ee8af1ac | ||
|
|
072005d521 | ||
|
|
241b73704c | ||
|
|
8687b69769 | ||
|
|
cdb81dd262 | ||
|
|
03956a9a24 | ||
|
|
ef684649f9 | ||
|
|
fc642c6f26 | ||
|
|
c2a999d300 | ||
|
|
ae3e5f1a32 | ||
|
|
3dcb982a0b | ||
|
|
e9c901b3a9 | ||
|
|
2d8907dfed | ||
|
|
06f8fc8962 | ||
|
|
381aeb572f | ||
|
|
3a99ee9718 | ||
|
|
86f07045fe | ||
|
|
b1ce34f8a7 | ||
|
|
4388a3dc99 | ||
|
|
805436839d | ||
|
|
bdbec55e84 | ||
|
|
33131c664a | ||
|
|
1151bb267e | ||
|
|
ce9679f1ee | ||
|
|
23b43c33c7 | ||
|
|
6feaf49906 | ||
|
|
25f4e5f279 | ||
|
|
c1f05cbf85 |
53
.github/actions/hyperstack_setup/action.yml
vendored
Normal file
53
.github/actions/hyperstack_setup/action.yml
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
name: Setup Cuda
|
||||
description: Setup Cuda on Hyperstack instance
|
||||
|
||||
inputs:
|
||||
cuda-version:
|
||||
description: Version of Cuda to use
|
||||
required: true
|
||||
gcc-version:
|
||||
description: Version of GCC to use
|
||||
required: true
|
||||
cmake-version:
|
||||
description: Version of cmake to use
|
||||
default: 3.29.6
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
cd cmake-${{ inputs.cmake-version }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
|
||||
echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
shell: bash
|
||||
run: nvidia-smi
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
24
.github/workflows/aws_tfhe_fast_tests.yml
vendored
24
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -11,16 +11,26 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
|
||||
jobs:
|
||||
check-user-permission:
|
||||
if: github.event_name == 'pull_request_target'
|
||||
uses: ./.github/workflows/check_triggering_actor.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-user-permission
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
needs.check-user-permission.result == 'success'
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
@@ -55,10 +65,11 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -133,7 +144,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -148,7 +159,7 @@ jobs:
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -157,9 +168,10 @@ jobs:
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -270,7 +282,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/aws_tfhe_integer_tests.yml
vendored
8
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -140,7 +140,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -144,7 +144,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/aws_tfhe_tests.yml
vendored
8
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -67,7 +67,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -142,7 +142,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -168,7 +168,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -250,7 +250,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
6
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_boolean.yml
vendored
8
.github/workflows/benchmark_boolean.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_boolean
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_core_crypto.yml
vendored
8
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_erc20.yml
vendored
8
.github/workflows/benchmark_erc20.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_gpu_4090.yml
vendored
8
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_multi_bit_gpu_default
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -157,7 +157,7 @@ jobs:
|
||||
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
50
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
50
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -48,28 +48,19 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -84,31 +75,10 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_pbs_gpu
|
||||
@@ -128,7 +98,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -167,7 +137,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
3
.github/workflows/benchmark_gpu_erc20.yml
vendored
3
.github/workflows/benchmark_gpu_erc20.yml
vendored
@@ -12,7 +12,10 @@ on:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
|
||||
55
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
55
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -75,28 +75,19 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -111,34 +102,10 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_hlapi_erc20_gpu
|
||||
@@ -157,9 +124,9 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20
|
||||
name: ${{ github.sha }}_erc20_${{ inputs.profile }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
@@ -196,7 +163,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
1
.github/workflows/benchmark_gpu_integer.yml
vendored
1
.github/workflows/benchmark_gpu_integer.yml
vendored
@@ -15,6 +15,7 @@ on:
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
command:
|
||||
description: "Benchmark command to run"
|
||||
|
||||
@@ -118,7 +118,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -145,28 +145,19 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -181,41 +172,10 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
@@ -240,11 +200,18 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -272,7 +239,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_integer.yml
vendored
8
.github/workflows/benchmark_integer.yml
vendored
@@ -90,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -170,7 +170,7 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -197,7 +197,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_shortint.yml
vendored
8
.github/workflows/benchmark_shortint.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -136,7 +136,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_tfhe_fft.yml
vendored
6
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_tfhe_ntt.yml
vendored
6
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
10
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_tfhe_zk_pok
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -155,7 +155,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/benchmark_wasm_client.yml
vendored
10
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -166,7 +166,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -200,7 +200,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/benchmark_zk_pke.yml
vendored
10
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -177,7 +177,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_zk
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -211,7 +211,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
29
.github/workflows/check_triggering_actor.yml
vendored
Normal file
29
.github/workflows/check_triggering_actor.yml
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
# Check if triggering actor is a collaborator and has write access
|
||||
name: Check Triggering Actor
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
secrets:
|
||||
TOKEN:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
check-actor-permission:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get User Permission
|
||||
id: check-access
|
||||
uses: actions-cool/check-user-permission@956b2e73cdfe3bcb819bb7225e490cb3b18fd76e # v2.2.1
|
||||
with:
|
||||
require: write
|
||||
username: ${{ github.triggering_actor }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
|
||||
- name: Check User Permission
|
||||
if: steps.check-access.outputs.require-result == 'false'
|
||||
run: |
|
||||
echo "${{ github.triggering_actor }} does not have permissions on this repo."
|
||||
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
|
||||
echo "Job originally triggered by ${{ github.actor }}"
|
||||
exit 1
|
||||
2
.github/workflows/ci_lint.yml
vendored
2
.github/workflows/ci_lint.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
make lint_workflow
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6ae615f6475d2ede5ad88bea6baa7a1d5e93ffaa # v3.0.19
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
|
||||
12
.github/workflows/code_coverage.yml
vendored
12
.github/workflows/code_coverage.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -47,13 +47,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/gpu_4090_tests.yml
vendored
2
.github/workflows/gpu_4090_tests.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
52
.github/workflows/gpu_fast_h100_tests.yml
vendored
52
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,60 +94,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
|
||||
@@ -187,7 +155,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
52
.github/workflows/gpu_fast_tests.yml
vendored
52
.github/workflows/gpu_fast_tests.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -92,60 +92,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
make test_core_crypto_gpu
|
||||
@@ -185,7 +153,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
38
.github/workflows/gpu_full_h100_tests.yml
vendored
38
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -49,9 +49,6 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
@@ -71,38 +68,21 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto, integer and internal CUDA backend tests
|
||||
run: |
|
||||
make test_gpu
|
||||
@@ -139,7 +119,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
52
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
52
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,60 +94,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run multi-bit CUDA integer compression tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
|
||||
@@ -190,7 +158,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
63
.github/workflows/gpu_integer_long_run_tests.yml
vendored
63
.github/workflows/gpu_integer_long_run_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: AWS Long Run Tests on GPU
|
||||
name: Long Run Tests on GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -15,8 +15,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
# Weekly tests will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -29,17 +29,17 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: 2-h100
|
||||
profile: multi-gpu-test
|
||||
|
||||
cuda-tests:
|
||||
name: Long run GPU H100 tests
|
||||
name: Long run GPU tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
@@ -53,57 +53,26 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
make test_integer_long_run_gpu
|
||||
@@ -119,7 +88,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests.result }}
|
||||
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (gpu-tests)
|
||||
@@ -129,7 +98,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/gpu_pcc.yml
vendored
6
.github/workflows/gpu_pcc.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
53
.github/workflows/gpu_signed_integer_tests.yml
vendored
53
.github/workflows/gpu_signed_integer_tests.yml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -101,57 +101,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
@@ -160,10 +131,6 @@ jobs:
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
make test_signed_integer_multi_bit_gpu_ci
|
||||
@@ -189,7 +156,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,58 +94,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
|
||||
@@ -171,7 +138,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
53
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
53
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -100,54 +100,25 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
@@ -156,10 +127,6 @@ jobs:
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
make test_unsigned_integer_multi_bit_gpu_ci
|
||||
@@ -185,7 +152,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
11
.github/workflows/integer_long_run_tests.yml
vendored
11
.github/workflows/integer_long_run_tests.yml
vendored
@@ -15,8 +15,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
# Weekly tests will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -45,6 +45,7 @@ jobs:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -53,7 +54,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -77,7 +78,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/m1_tests.yml
vendored
2
.github/workflows/m1_tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
2
.github/workflows/make_release.yml
vendored
2
.github/workflows/make_release.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
- uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
|
||||
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
|
||||
6
.github/workflows/make_release_cuda.yml
vendored
6
.github/workflows/make_release_cuda.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -120,7 +120,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -19,11 +19,14 @@ exclude = [
|
||||
"utils/cargo-tfhe-lints"
|
||||
]
|
||||
[workspace.dependencies]
|
||||
aligned-vec = { version = "0.5", default-features = false }
|
||||
aligned-vec = { version = "0.6", default-features = false }
|
||||
bytemuck = "1.14.3"
|
||||
dyn-stack = { version = "0.10", default-features = false }
|
||||
dyn-stack = { version = "0.11", default-features = false }
|
||||
itertools = "0.13"
|
||||
num-complex = "0.4"
|
||||
pulp = { version = "0.18.22", default-features = false }
|
||||
pulp = { version = "0.20.0", default-features = false }
|
||||
rand = "0.8"
|
||||
rayon = "1"
|
||||
serde = { version = "1.0", default-features = false }
|
||||
wasm-bindgen = ">=0.2.86,<0.2.94"
|
||||
|
||||
|
||||
238
Makefile
238
Makefile
@@ -2,7 +2,6 @@ SHELL:=$(shell /usr/bin/env which bash)
|
||||
OS:=$(shell uname)
|
||||
RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
|
||||
CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
|
||||
TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
|
||||
CPU_COUNT=$(shell ./scripts/cpu_count.sh)
|
||||
RS_BUILD_TOOLCHAIN:=stable
|
||||
CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
|
||||
@@ -25,6 +24,7 @@ BACKWARD_COMPAT_DATA_BRANCH?=v0.4
|
||||
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
|
||||
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
|
||||
TFHE_SPEC:=tfhe
|
||||
WASM_PACK_VERSION="0.13.1"
|
||||
# We are kind of hacking the cut here, the version cannot contain a quote '"'
|
||||
WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
|
||||
WEB_RUNNER_DIR=web-test-runner
|
||||
@@ -116,8 +116,8 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
|
||||
|
||||
.PHONY: install_wasm_pack # Install wasm-pack to build JS packages
|
||||
install_wasm_pack: install_rs_build_toolchain
|
||||
@wasm-pack --version > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.0 || \
|
||||
@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
|
||||
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
|
||||
( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )
|
||||
|
||||
.PHONY: install_node # Install last version of NodeJS via nvm
|
||||
@@ -281,14 +281,14 @@ check_typos: install_typos_checker
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
|
||||
check_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
@@ -307,52 +307,51 @@ lint_workflow: check_actionlint_installed
|
||||
.PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
|
||||
clippy_core: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE) \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental \
|
||||
--features=experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
|
||||
--features=nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
|
||||
--features=experimental,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),zk-pok \
|
||||
--features=zk-pok \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_boolean # Run clippy lints enabling the boolean features
|
||||
clippy_boolean: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean \
|
||||
--features=boolean \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_shortint # Run clippy lints enabling the shortint features
|
||||
clippy_shortint: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint \
|
||||
--features=shortint \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,experimental \
|
||||
--features=shortint,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),zk-pok,shortint \
|
||||
--features=zk-pok,shortint \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_integer # Run clippy lints enabling the integer features
|
||||
clippy_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer \
|
||||
--features=integer \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,experimental \
|
||||
--features=integer,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
|
||||
clippy: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
|
||||
--features=boolean,shortint,integer \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
|
||||
@@ -363,13 +362,13 @@ clippy_rustdoc: install_rs_check_toolchain
|
||||
fi && \
|
||||
CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok,pbs-stats,strings \
|
||||
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
|
||||
clippy_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
|
||||
@@ -394,17 +393,16 @@ clippy_trivium: install_rs_check_toolchain
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
|
||||
clippy_tfhe_csprng: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=$(TARGET_ARCH_FEATURE) \
|
||||
-p tfhe-csprng -- --no-deps -D warnings
|
||||
--features=parallel,software-prng -p tfhe-csprng -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
|
||||
clippy_zk_pok: install_rs_check_toolchain
|
||||
@@ -443,67 +441,67 @@ check_rust_bindings_did_not_change:
|
||||
.PHONY: tfhe_lints # Run custom tfhe-rs lints
|
||||
tfhe_lints: install_tfhe_lints
|
||||
cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok -- -D warnings
|
||||
--features=boolean,shortint,integer,zk-pok -- -D warnings
|
||||
|
||||
.PHONY: build_core # Build core_crypto without experimental features
|
||||
build_core: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
|
||||
-p $(TFHE_SPEC)
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 -p $(TFHE_SPEC); \
|
||||
--features=nightly-avx512 -p $(TFHE_SPEC); \
|
||||
fi
|
||||
|
||||
.PHONY: build_core_experimental # Build core_crypto with experimental features
|
||||
build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
|
||||
--features=experimental -p $(TFHE_SPEC)
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 -p $(TFHE_SPEC); \
|
||||
--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
|
||||
fi
|
||||
|
||||
.PHONY: build_boolean # Build with boolean enabled
|
||||
build_boolean: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets
|
||||
--features=boolean -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_shortint # Build with shortint enabled
|
||||
build_shortint: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets
|
||||
--features=shortint -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_integer # Build with integer enabled
|
||||
build_integer: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets
|
||||
--features=integer -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
|
||||
build_tfhe_full: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
|
||||
--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_tfhe_coverage # Build with test coverage enabled
|
||||
build_tfhe_coverage: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
|
||||
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
|
||||
|
||||
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
|
||||
build_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
|
||||
build_c_api_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
|
||||
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_web_js_api # Build the js API targeting the web browser
|
||||
@@ -534,15 +532,15 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
|
||||
.PHONY: build_tfhe_csprng # Build tfhe_csprng
|
||||
build_tfhe_csprng: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng --all-targets
|
||||
-p tfhe-csprng --all-targets
|
||||
|
||||
.PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
|
||||
test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
|
||||
--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
|
||||
--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
|
||||
fi
|
||||
|
||||
.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
|
||||
@@ -550,13 +548,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache \
|
||||
--features=experimental,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- core_crypto::
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,nightly-avx512 \
|
||||
--features=experimental,internal-keycache,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
|
||||
fi
|
||||
|
||||
@@ -574,35 +572,38 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
|
||||
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
|
||||
test_core_crypto_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
|
||||
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
|
||||
.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_long_run_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
|
||||
.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
|
||||
test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
LONG_TESTS=TRUE \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)" --backend "gpu"
|
||||
|
||||
.PHONY: test_integer_compression
|
||||
test_integer_compression: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
|
||||
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
|
||||
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
|
||||
|
||||
.PHONY: test_integer_compression_gpu
|
||||
test_integer_compression_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
|
||||
|
||||
.PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
|
||||
test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
@@ -661,20 +662,20 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
|
||||
.PHONY: test_boolean # Run the tests of the boolean module
|
||||
test_boolean: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::
|
||||
--features=boolean -p $(TFHE_SPEC) -- boolean::
|
||||
|
||||
.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
|
||||
test_boolean_cov: install_rs_check_toolchain install_tarpaulin
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
|
||||
$(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache \
|
||||
--features=boolean,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::
|
||||
|
||||
.PHONY: test_c_api_rs # Run the rust tests for the C API
|
||||
test_c_api_rs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
-p $(TFHE_SPEC) \
|
||||
c_api
|
||||
|
||||
@@ -706,14 +707,14 @@ test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
|
||||
.PHONY: test_shortint # Run all the tests for shortint
|
||||
test_shortint: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
|
||||
--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
|
||||
|
||||
.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
|
||||
test_shortint_cov: install_rs_check_toolchain install_tarpaulin
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
|
||||
$(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
|
||||
--features=shortint,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::
|
||||
|
||||
.PHONY: test_integer_ci # Run the tests for integer ci
|
||||
@@ -770,26 +771,28 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_long_run # Run the long run tests for integer
|
||||
test_integer_long_run: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
|
||||
|
||||
.PHONY: test_integer_long_run # Run the long run integer tests
|
||||
test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
LONG_TESTS=TRUE \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_safe_serialization # Run the tests for safe serialization
|
||||
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
|
||||
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
|
||||
|
||||
.PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
|
||||
test_zk: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,zk-pok -p $(TFHE_SPEC) -- zk::
|
||||
--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::
|
||||
|
||||
.PHONY: test_integer # Run all the tests for integer
|
||||
test_integer: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::
|
||||
--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::
|
||||
|
||||
.PHONY: test_integer_cov # Run the tests of the integer module with code coverage
|
||||
test_integer_cov: install_rs_check_toolchain install_tarpaulin
|
||||
@@ -797,38 +800,38 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
|
||||
--out xml --output-dir coverage/integer --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads \
|
||||
--exclude-files $(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
|
||||
--features=integer,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::
|
||||
|
||||
.PHONY: test_high_level_api # Run all the tests for high_level_api
|
||||
test_high_level_api: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
|
||||
-- high_level_api::
|
||||
|
||||
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) \
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
.PHONY: test_strings # Run the tests for strings ci
|
||||
test_strings: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,integer,strings -p $(TFHE_SPEC) \
|
||||
--features=shortint,integer,strings -p $(TFHE_SPEC) \
|
||||
-- strings::
|
||||
|
||||
|
||||
.PHONY: test_user_doc # Run tests from the .md documentation
|
||||
test_user_doc: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
-p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
|
||||
test_user_doc_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
|
||||
@@ -836,14 +839,12 @@ test_user_doc_gpu: install_rs_build_toolchain
|
||||
.PHONY: test_regex_engine # Run tests for regex_engine example
|
||||
test_regex_engine: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--example regex_engine \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer
|
||||
--example regex_engine --features=integer
|
||||
|
||||
.PHONY: test_sha256_bool # Run tests for sha256_bool example
|
||||
test_sha256_bool: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--example sha256_bool \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean
|
||||
--example sha256_bool --features=boolean
|
||||
|
||||
.PHONY: test_examples # Run tests for examples
|
||||
test_examples: test_sha256_bool test_regex_engine
|
||||
@@ -861,7 +862,7 @@ test_kreyvium: install_rs_build_toolchain
|
||||
.PHONY: test_tfhe_csprng # Run tfhe-csprng tests
|
||||
test_tfhe_csprng: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng
|
||||
-p tfhe-csprng
|
||||
|
||||
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
|
||||
test_zk_pok: install_rs_build_toolchain
|
||||
@@ -879,7 +880,7 @@ test_zk_wasm_x86_compat_ci: check_nvm_installed
|
||||
test_zk_wasm_x86_compat: install_rs_build_toolchain build_node_js_api
|
||||
cd tfhe/tests/zk_wasm_x86_test && npm install
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe --test zk_wasm_x86_test --features=$(TARGET_ARCH_FEATURE),integer,zk-pok
|
||||
-p tfhe --test zk_wasm_x86_test --features=integer,zk-pok
|
||||
|
||||
.PHONY: test_versionable # Run tests for tfhe-versionable subcrate
|
||||
test_versionable: install_rs_build_toolchain
|
||||
@@ -892,7 +893,7 @@ test_versionable: install_rs_build_toolchain
|
||||
test_backward_compatibility_ci: install_rs_build_toolchain
|
||||
TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
|
||||
--features=shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
|
||||
|
||||
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
|
||||
test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
|
||||
@@ -907,7 +908,7 @@ doc: install_rs_check_toolchain
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
|
||||
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: docs # Build rust doc alias for doc
|
||||
docs: doc
|
||||
@@ -918,7 +919,7 @@ lint_doc: install_rs_check_toolchain
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
|
||||
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
|
||||
|
||||
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
|
||||
lint_docs: lint_doc
|
||||
@@ -947,7 +948,7 @@ check_md_links: install_mlc
|
||||
.PHONY: check_compile_tests # Build tests in debug without running them
|
||||
check_compile_tests: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
|
||||
--features=experimental,boolean,shortint,integer,internal-keycache \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
|
||||
@@ -958,7 +959,7 @@ check_compile_tests: install_rs_build_toolchain
|
||||
.PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
|
||||
check_compile_tests_benches_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=experimental,boolean,shortint,integer,internal-keycache,gpu \
|
||||
-p $(TFHE_SPEC)
|
||||
mkdir -p "$(TFHECUDA_BUILD)" && \
|
||||
cd "$(TFHECUDA_BUILD)" && \
|
||||
@@ -1037,42 +1038,42 @@ dieharder_csprng: install_dieharder build_tfhe_csprng
|
||||
.PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
|
||||
print_doc_bench_parameters:
|
||||
RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe
|
||||
--features=shortint,internal-keycache -p tfhe
|
||||
|
||||
.PHONY: bench_integer # Run benchmarks for unsigned integer
|
||||
bench_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer # Run benchmarks for signed integer
|
||||
bench_signed_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
|
||||
bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression_gpu
|
||||
bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1080,7 +1081,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
|
||||
bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1088,7 +1089,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
|
||||
bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1096,7 +1097,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
|
||||
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1104,14 +1105,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-pke-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
@@ -1119,14 +1120,14 @@ bench_shortint: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench shortint-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_shortint_oprf # Run benchmarks for shortint
|
||||
bench_shortint_oprf: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench oprf-shortint-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
|
||||
bench_shortint_multi_bit: install_rs_check_toolchain
|
||||
@@ -1134,43 +1135,43 @@ bench_shortint_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench shortint-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_boolean # Run benchmarks for boolean
|
||||
bench_boolean: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench boolean-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs # Run benchmarks for PBS
|
||||
bench_pbs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
|
||||
bench_pbs128: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs128-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_ks # Run benchmarks for keyswitch
|
||||
bench_ks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_ks_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
|
||||
bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
|
||||
@@ -1206,13 +1207,13 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
|
||||
bench_hlapi_erc20: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ECR20 operations on GPU
|
||||
bench_hlapi_erc20_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
|
||||
bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
@@ -1227,32 +1228,32 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
gen_key_cache: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example generates_test_keys \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
|
||||
--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
|
||||
-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
|
||||
|
||||
.PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
|
||||
gen_key_cache_core_crypto: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
|
||||
--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
|
||||
core_crypto::keycache::generate_keys
|
||||
|
||||
.PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
|
||||
measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example hlapi_compact_pk_ct_sizes \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache
|
||||
--features=integer,internal-keycache
|
||||
|
||||
.PHONY: measure_shortint_key_sizes # Measure sizes of bootstrapping and key switching keys for shortint
|
||||
measure_shortint_key_sizes: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example shortint_key_sizes \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache
|
||||
--features=shortint,internal-keycache
|
||||
|
||||
.PHONY: measure_boolean_key_sizes # Measure sizes of bootstrapping and key switching keys for boolean
|
||||
measure_boolean_key_sizes: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example boolean_key_sizes \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache
|
||||
--features=boolean,internal-keycache
|
||||
|
||||
.PHONY: parse_integer_benches # Run python parser to output a csv containing integer benches data
|
||||
parse_integer_benches:
|
||||
@@ -1264,14 +1265,13 @@ parse_integer_benches:
|
||||
parse_wasm_benchmarks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example wasm_benchmarks_parser \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
|
||||
--features=shortint,internal-keycache \
|
||||
-- wasm_benchmark_results.json
|
||||
|
||||
.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
|
||||
write_params_to_file: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example write_params_to_file \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
|
||||
--example write_params_to_file --features=boolean,shortint,internal-keycache
|
||||
|
||||
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
|
||||
clone_backward_compat_data:
|
||||
@@ -1286,26 +1286,26 @@ tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
|
||||
.PHONY: regex_engine # Run regex_engine example
|
||||
regex_engine: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example regex_engine \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer \
|
||||
--example regex_engine --features=integer \
|
||||
-- $(REGEX_STRING) $(REGEX_PATTERN)
|
||||
|
||||
.PHONY: dark_market # Run dark market example
|
||||
dark_market: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example dark_market \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
|
||||
--features=integer,internal-keycache \
|
||||
-- fhe-modified fhe-parallel plain fhe
|
||||
|
||||
.PHONY: sha256_bool # Run sha256_bool example
|
||||
sha256_bool: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example sha256_bool \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean
|
||||
--example sha256_bool --features=boolean
|
||||
|
||||
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
|
||||
pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
|
||||
clippy_all tfhe_lints check_compile_tests
|
||||
clippy_all check_compile_tests
|
||||
# TFHE lints deactivated as it's incompatible with 1.83 - temporary
|
||||
# tfhe_lints
|
||||
|
||||
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
|
||||
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
|
||||
|
||||
16
README.md
16
README.md
@@ -70,22 +70,8 @@ production-ready library for all the advanced features of TFHE.
|
||||
### Cargo.toml configuration
|
||||
To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:
|
||||
|
||||
+ For x86_64-based machines running Unix-like OSes:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-unix"] }
|
||||
```
|
||||
|
||||
+ For Apple Silicon or aarch64-based machines running Unix-like OSes:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
|
||||
```
|
||||
|
||||
+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
|
||||
```
|
||||
|
||||
> [!Note]
|
||||
|
||||
@@ -6,15 +6,8 @@ edition = "2021"
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
rayon = { version = "1.7.0"}
|
||||
|
||||
[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
|
||||
path = "../../tfhe"
|
||||
features = [ "boolean", "shortint", "integer", "x86_64" ]
|
||||
|
||||
[target.'cfg(target_arch = "aarch64")'.dependencies.tfhe]
|
||||
path = "../../tfhe"
|
||||
features = [ "boolean", "shortint", "integer", "aarch64-unix" ]
|
||||
rayon = { workspace = true }
|
||||
tfhe = { path = "../../tfhe", features = [ "boolean", "shortint", "integer" ] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = [ "html_reports" ]}
|
||||
|
||||
@@ -38,6 +38,7 @@ template <typename Torus> struct int_compression {
|
||||
|
||||
scratch_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
streams[0], gpu_indexes[0], &fp_ks_buffer,
|
||||
compression_params.small_lwe_dimension,
|
||||
compression_params.glwe_dimension, compression_params.polynomial_size,
|
||||
num_radix_blocks, true);
|
||||
}
|
||||
|
||||
@@ -46,7 +46,14 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory);
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
@@ -440,5 +447,41 @@ void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
|
||||
|
||||
void cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
|
||||
|
||||
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
} // extern C
|
||||
#endif // CUDA_INTEGER_H
|
||||
|
||||
@@ -479,7 +479,6 @@ template <typename Torus> struct int_radix_lut {
|
||||
cuda_memcpy_async_gpu_to_gpu(dst_lut_indexes, src_lut_indexes,
|
||||
num_blocks * sizeof(Torus), streams[i],
|
||||
gpu_indexes[i]);
|
||||
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1540,10 +1539,12 @@ template <typename Torus> struct int_prop_simu_group_carries_memory {
|
||||
cuda_memset_async(grouping_pgns, 0, num_groups * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
prepared_blocks = (Torus *)cuda_malloc_async(
|
||||
num_radix_blocks * big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
cuda_memset_async(prepared_blocks, 0, num_radix_blocks * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
prepared_blocks =
|
||||
(Torus *)cuda_malloc_async((num_radix_blocks + 1) * big_lwe_size_bytes,
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memset_async(prepared_blocks, 0,
|
||||
(num_radix_blocks + 1) * big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
resolved_carries = (Torus *)cuda_malloc_async(
|
||||
(num_groups + 1) * big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
@@ -1773,7 +1774,6 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
uint32_t num_many_lut;
|
||||
uint32_t lut_stride;
|
||||
|
||||
uint32_t group_size;
|
||||
uint32_t num_groups;
|
||||
Torus *output_flag;
|
||||
Torus *last_lhs;
|
||||
@@ -1781,8 +1781,6 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
int_radix_lut<Torus> *lut_message_extract;
|
||||
|
||||
int_radix_lut<Torus> *lut_overflow_flag_prep;
|
||||
int_radix_lut<Torus> *lut_overflow_flag_last;
|
||||
int_radix_lut<Torus> *lut_carry_flag_last;
|
||||
|
||||
int_shifted_blocks_and_states_memory<Torus> *shifted_blocks_state_mem;
|
||||
int_prop_simu_group_carries_memory<Torus> *prop_simu_group_carries_mem;
|
||||
@@ -1792,8 +1790,6 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
uint32_t requested_flag;
|
||||
|
||||
uint32_t active_gpu_count;
|
||||
cudaStream_t *sub_streams_1;
|
||||
cudaStream_t *sub_streams_2;
|
||||
|
||||
cudaEvent_t *incoming_events1;
|
||||
cudaEvent_t *incoming_events2;
|
||||
@@ -1818,7 +1814,6 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
uint32_t block_modulus = message_modulus * carry_modulus;
|
||||
uint32_t num_bits_in_block = std::log2(block_modulus);
|
||||
uint32_t grouping_size = num_bits_in_block;
|
||||
group_size = grouping_size;
|
||||
num_groups = (num_radix_blocks + grouping_size - 1) / grouping_size;
|
||||
|
||||
num_many_lut = 2; // many luts apply 2 luts
|
||||
@@ -1835,8 +1830,8 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
|
||||
// Step 3 elements
|
||||
lut_message_extract =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 2,
|
||||
num_radix_blocks + 1, allocate_gpu_memory);
|
||||
// lut for the first block in the first grouping
|
||||
auto f_message_extract = [message_modulus](Torus block) -> Torus {
|
||||
return (block >> 1) % message_modulus;
|
||||
@@ -1852,8 +1847,9 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
|
||||
// This store a single block that with be used to store the overflow or
|
||||
// carry results
|
||||
output_flag = (Torus *)cuda_malloc_async(big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
output_flag =
|
||||
(Torus *)cuda_malloc_async(big_lwe_size_bytes * (num_radix_blocks + 1),
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memset_async(output_flag, 0, big_lwe_size_bytes, streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
@@ -1912,9 +1908,6 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
// It seems that this lut could be apply together with the other one but for
|
||||
// now we won't do it
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) { // Overflow case
|
||||
lut_overflow_flag_last = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, 1, 1, allocate_gpu_memory);
|
||||
|
||||
auto f_overflow_last = [num_radix_blocks,
|
||||
requested_flag_in](Torus block) -> Torus {
|
||||
uint32_t position = (num_radix_blocks == 1 &&
|
||||
@@ -1930,39 +1923,57 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
return does_overflow_if_carry_is_0;
|
||||
}
|
||||
};
|
||||
auto overflow_flag_last = lut_overflow_flag_last->get_lut(0, 0);
|
||||
auto overflow_flag_last = lut_message_extract->get_lut(0, 1);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], overflow_flag_last, glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f_overflow_last);
|
||||
|
||||
lut_overflow_flag_last->broadcast_lut(streams, gpu_indexes, 0);
|
||||
Torus *h_lut_indexes =
|
||||
(Torus *)malloc((num_radix_blocks + 1) * sizeof(Torus));
|
||||
for (int index = 0; index < num_radix_blocks + 1; index++) {
|
||||
if (index < num_radix_blocks) {
|
||||
h_lut_indexes[index] = 0;
|
||||
} else {
|
||||
h_lut_indexes[index] = 1;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_async_to_gpu(
|
||||
lut_message_extract->get_lut_indexes(0, 0), h_lut_indexes,
|
||||
(num_radix_blocks + 1) * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
lut_message_extract->broadcast_lut(streams, gpu_indexes, 0);
|
||||
free(h_lut_indexes);
|
||||
}
|
||||
if (requested_flag == outputFlag::FLAG_CARRY) { // Carry case
|
||||
lut_carry_flag_last = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, 1, 1, allocate_gpu_memory);
|
||||
|
||||
auto f_carry_last = [](Torus block) -> Torus {
|
||||
return ((block >> 2) & 1);
|
||||
};
|
||||
auto carry_flag_last = lut_carry_flag_last->get_lut(0, 0);
|
||||
auto carry_flag_last = lut_message_extract->get_lut(0, 1);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_flag_last, glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f_carry_last);
|
||||
|
||||
lut_carry_flag_last->broadcast_lut(streams, gpu_indexes, 0);
|
||||
Torus *h_lut_indexes =
|
||||
(Torus *)malloc((num_radix_blocks + 1) * sizeof(Torus));
|
||||
for (int index = 0; index < num_radix_blocks + 1; index++) {
|
||||
if (index < num_radix_blocks) {
|
||||
h_lut_indexes[index] = 0;
|
||||
} else {
|
||||
h_lut_indexes[index] = 1;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_async_to_gpu(
|
||||
lut_message_extract->get_lut_indexes(0, 0), h_lut_indexes,
|
||||
(num_radix_blocks + 1) * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
lut_message_extract->broadcast_lut(streams, gpu_indexes, 0);
|
||||
free(h_lut_indexes);
|
||||
}
|
||||
|
||||
active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
|
||||
sub_streams_1 =
|
||||
(cudaStream_t *)malloc(active_gpu_count * sizeof(cudaStream_t));
|
||||
sub_streams_2 =
|
||||
(cudaStream_t *)malloc(active_gpu_count * sizeof(cudaStream_t));
|
||||
for (uint j = 0; j < active_gpu_count; j++) {
|
||||
sub_streams_1[j] = cuda_create_stream(gpu_indexes[j]);
|
||||
sub_streams_2[j] = cuda_create_stream(gpu_indexes[j]);
|
||||
}
|
||||
|
||||
incoming_events1 =
|
||||
(cudaEvent_t *)malloc(active_gpu_count * sizeof(cudaEvent_t));
|
||||
@@ -1998,24 +2009,10 @@ template <typename Torus> struct int_sc_prop_memory {
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) { // In case of overflow
|
||||
lut_overflow_flag_prep->release(streams, gpu_indexes, gpu_count);
|
||||
lut_overflow_flag_last->release(streams, gpu_indexes, gpu_count);
|
||||
delete lut_overflow_flag_prep;
|
||||
delete lut_overflow_flag_last;
|
||||
cuda_drop_async(last_lhs, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(last_rhs, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
if (requested_flag == outputFlag::FLAG_CARRY) { // In case of carry
|
||||
lut_carry_flag_last->release(streams, gpu_indexes, gpu_count);
|
||||
delete lut_carry_flag_last;
|
||||
}
|
||||
|
||||
// release sub streams
|
||||
for (uint i = 0; i < active_gpu_count; i++) {
|
||||
cuda_destroy_stream(sub_streams_1[i], gpu_indexes[i]);
|
||||
cuda_destroy_stream(sub_streams_2[i], gpu_indexes[i]);
|
||||
}
|
||||
free(sub_streams_1);
|
||||
free(sub_streams_2);
|
||||
|
||||
// release events
|
||||
for (uint j = 0; j < active_gpu_count; j++) {
|
||||
@@ -2955,14 +2952,11 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
|
||||
|
||||
template <typename Torus> struct int_cmux_buffer {
|
||||
int_radix_lut<Torus> *predicate_lut;
|
||||
int_radix_lut<Torus> *inverted_predicate_lut;
|
||||
int_radix_lut<Torus> *message_extract_lut;
|
||||
|
||||
Torus *tmp_true_ct;
|
||||
Torus *tmp_false_ct;
|
||||
|
||||
int_zero_out_if_buffer<Torus> *zero_if_true_buffer;
|
||||
int_zero_out_if_buffer<Torus> *zero_if_false_buffer;
|
||||
Torus *buffer_in;
|
||||
Torus *buffer_out;
|
||||
Torus *condition_array;
|
||||
|
||||
int_radix_params params;
|
||||
|
||||
@@ -2978,17 +2972,12 @@ template <typename Torus> struct int_cmux_buffer {
|
||||
Torus big_size =
|
||||
(params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
|
||||
|
||||
tmp_true_ct =
|
||||
(Torus *)cuda_malloc_async(big_size, streams[0], gpu_indexes[0]);
|
||||
tmp_false_ct =
|
||||
(Torus *)cuda_malloc_async(big_size, streams[0], gpu_indexes[0]);
|
||||
|
||||
zero_if_true_buffer = new int_zero_out_if_buffer<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_radix_blocks,
|
||||
allocate_gpu_memory);
|
||||
zero_if_false_buffer = new int_zero_out_if_buffer<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, num_radix_blocks,
|
||||
allocate_gpu_memory);
|
||||
buffer_in =
|
||||
(Torus *)cuda_malloc_async(2 * big_size, streams[0], gpu_indexes[0]);
|
||||
buffer_out =
|
||||
(Torus *)cuda_malloc_async(2 * big_size, streams[0], gpu_indexes[0]);
|
||||
condition_array =
|
||||
(Torus *)cuda_malloc_async(2 * big_size, streams[0], gpu_indexes[0]);
|
||||
|
||||
auto lut_f = [predicate_lut_f](Torus block, Torus condition) -> Torus {
|
||||
return predicate_lut_f(condition) ? 0 : block;
|
||||
@@ -3002,12 +2991,8 @@ template <typename Torus> struct int_cmux_buffer {
|
||||
};
|
||||
|
||||
predicate_lut =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
|
||||
inverted_predicate_lut =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 2,
|
||||
2 * num_radix_blocks, allocate_gpu_memory);
|
||||
|
||||
message_extract_lut =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
|
||||
@@ -3016,21 +3001,33 @@ template <typename Torus> struct int_cmux_buffer {
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], predicate_lut->get_lut(0, 0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_f);
|
||||
params.carry_modulus, inverted_lut_f);
|
||||
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], inverted_predicate_lut->get_lut(0, 0),
|
||||
streams[0], gpu_indexes[0], predicate_lut->get_lut(0, 1),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, inverted_lut_f);
|
||||
params.carry_modulus, lut_f);
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], message_extract_lut->get_lut(0, 0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, message_extract_lut_f);
|
||||
Torus *h_lut_indexes =
|
||||
(Torus *)malloc(2 * num_radix_blocks * sizeof(Torus));
|
||||
for (int index = 0; index < 2 * num_radix_blocks; index++) {
|
||||
if (index < num_radix_blocks) {
|
||||
h_lut_indexes[index] = 0;
|
||||
} else {
|
||||
h_lut_indexes[index] = 1;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_async_to_gpu(
|
||||
predicate_lut->get_lut_indexes(0, 0), h_lut_indexes,
|
||||
2 * num_radix_blocks * sizeof(Torus), streams[0], gpu_indexes[0]);
|
||||
|
||||
predicate_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
inverted_predicate_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
message_extract_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
free(h_lut_indexes);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3038,18 +3035,12 @@ template <typename Torus> struct int_cmux_buffer {
|
||||
uint32_t gpu_count) {
|
||||
predicate_lut->release(streams, gpu_indexes, gpu_count);
|
||||
delete predicate_lut;
|
||||
inverted_predicate_lut->release(streams, gpu_indexes, gpu_count);
|
||||
delete inverted_predicate_lut;
|
||||
message_extract_lut->release(streams, gpu_indexes, gpu_count);
|
||||
delete message_extract_lut;
|
||||
|
||||
zero_if_true_buffer->release(streams, gpu_indexes, gpu_count);
|
||||
delete zero_if_true_buffer;
|
||||
zero_if_false_buffer->release(streams, gpu_indexes, gpu_count);
|
||||
delete zero_if_false_buffer;
|
||||
|
||||
cuda_drop_async(tmp_true_ct, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(tmp_false_ct, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(buffer_in, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(buffer_out, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(condition_array, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -3063,7 +3054,7 @@ template <typename Torus> struct int_are_all_block_true_buffer {
|
||||
// This map store LUTs that checks the equality between some input and values
|
||||
// of interest in are_all_block_true(), as with max_value (the maximum message
|
||||
// value).
|
||||
std::unordered_map<int, int_radix_lut<Torus> *> is_equal_to_lut_map;
|
||||
int_radix_lut<Torus> *is_max_value;
|
||||
|
||||
int_are_all_block_true_buffer(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
@@ -3084,16 +3075,26 @@ template <typename Torus> struct int_are_all_block_true_buffer {
|
||||
tmp_out = (Torus *)cuda_malloc_async((params.big_lwe_dimension + 1) *
|
||||
num_radix_blocks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
is_max_value =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 2,
|
||||
max_chunks, allocate_gpu_memory);
|
||||
auto is_max_value_f = [max_value](Torus x) -> Torus {
|
||||
return x == max_value;
|
||||
};
|
||||
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], is_max_value->get_lut(0, 0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, is_max_value_f);
|
||||
|
||||
is_max_value->broadcast_lut(streams, gpu_indexes, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
for (auto &lut : is_equal_to_lut_map) {
|
||||
lut.second->release(streams, gpu_indexes, gpu_count);
|
||||
delete (lut.second);
|
||||
}
|
||||
is_equal_to_lut_map.clear();
|
||||
is_max_value->release(streams, gpu_indexes, gpu_count);
|
||||
delete (is_max_value);
|
||||
|
||||
cuda_drop_async(tmp_block_accumulated, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(tmp_out, streams[0], gpu_indexes[0]);
|
||||
@@ -3270,8 +3271,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
|
||||
int_radix_params params;
|
||||
COMPARISON_TYPE op;
|
||||
|
||||
Torus *tmp_packed_left;
|
||||
Torus *tmp_packed_right;
|
||||
Torus *tmp_packed;
|
||||
|
||||
std::function<Torus(Torus)> operator_f;
|
||||
|
||||
@@ -3308,11 +3308,8 @@ template <typename Torus> struct int_comparison_diff_buffer {
|
||||
|
||||
Torus big_size = (params.big_lwe_dimension + 1) * sizeof(Torus);
|
||||
|
||||
tmp_packed_left = (Torus *)cuda_malloc_async(
|
||||
big_size * (num_radix_blocks / 2), streams[0], gpu_indexes[0]);
|
||||
|
||||
tmp_packed_right = (Torus *)cuda_malloc_async(
|
||||
big_size * (num_radix_blocks / 2), streams[0], gpu_indexes[0]);
|
||||
tmp_packed = (Torus *)cuda_malloc_async(big_size * num_radix_blocks,
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
tree_buffer = new int_tree_sign_reduction_buffer<Torus>(
|
||||
streams, gpu_indexes, gpu_count, operator_f, params, num_radix_blocks,
|
||||
@@ -3335,8 +3332,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
|
||||
reduce_signs_lut->release(streams, gpu_indexes, gpu_count);
|
||||
delete reduce_signs_lut;
|
||||
|
||||
cuda_drop_async(tmp_packed_left, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(tmp_packed_right, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(tmp_packed, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(tmp_signs_a, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(tmp_signs_b, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
@@ -3685,9 +3681,9 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
[shifted_mask](Torus x) -> Torus { return x & shifted_mask; };
|
||||
|
||||
masking_luts_1[i] = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, 1, params, 1, 1, true);
|
||||
streams, gpu_indexes, gpu_count, params, 1, 1, true);
|
||||
masking_luts_2[i] = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, 1, params, 1, num_blocks, true);
|
||||
streams, gpu_indexes, gpu_count, params, 1, num_blocks, true);
|
||||
|
||||
int_radix_lut<Torus> *luts[2] = {masking_luts_1[i], masking_luts_2[i]};
|
||||
|
||||
@@ -3704,7 +3700,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
// both of them are equal but because they are used in two different
|
||||
// executions in parallel we need two different pbs_buffers.
|
||||
message_extract_lut_1 = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, 1, params, 1, num_blocks, true);
|
||||
streams, gpu_indexes, gpu_count, params, 1, num_blocks, true);
|
||||
message_extract_lut_2 = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, params, 1, num_blocks, true);
|
||||
|
||||
@@ -3816,16 +3812,16 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
|
||||
this->params = params;
|
||||
shift_mem_1 = new int_logical_scalar_shift_buffer<Torus>(
|
||||
streams, gpu_indexes, 1, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
|
||||
streams, gpu_indexes, gpu_count, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
|
||||
params, 2 * num_blocks, true);
|
||||
|
||||
shift_mem_2 = new int_logical_scalar_shift_buffer<Torus>(
|
||||
streams, gpu_indexes, 1, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
|
||||
streams, gpu_indexes, gpu_count, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
|
||||
params, 2 * num_blocks, true);
|
||||
|
||||
uint32_t compute_overflow = 1;
|
||||
overflow_sub_mem = new int_borrow_prop_memory<Torus>(
|
||||
streams, gpu_indexes, 1, params, num_blocks, compute_overflow,
|
||||
streams, gpu_indexes, gpu_count, params, num_blocks, compute_overflow,
|
||||
true);
|
||||
uint32_t group_size = overflow_sub_mem->group_size;
|
||||
bool use_seq = overflow_sub_mem->prop_simu_group_carries_mem
|
||||
@@ -3834,7 +3830,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
|
||||
group_size, use_seq);
|
||||
|
||||
comparison_buffer = new int_comparison_buffer<Torus>(
|
||||
streams, gpu_indexes, 1, COMPARISON_TYPE::NE, params,
|
||||
streams, gpu_indexes, gpu_count, COMPARISON_TYPE::NE, params,
|
||||
num_blocks, false, true);
|
||||
|
||||
init_lookup_tables(streams, gpu_indexes, gpu_count, num_blocks);
|
||||
@@ -4275,12 +4271,15 @@ template <typename Torus> struct int_scalar_mul_buffer {
|
||||
Torus *preshifted_buffer;
|
||||
Torus *all_shifted_buffer;
|
||||
int_sc_prop_memory<Torus> *sc_prop_mem;
|
||||
bool anticipated_buffers_drop;
|
||||
|
||||
int_scalar_mul_buffer(cudaStream_t const *streams,
|
||||
uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int_radix_params params, uint32_t num_radix_blocks,
|
||||
bool allocate_gpu_memory) {
|
||||
bool allocate_gpu_memory,
|
||||
bool anticipated_buffer_drop) {
|
||||
this->params = params;
|
||||
this->anticipated_buffers_drop = anticipated_buffer_drop;
|
||||
|
||||
if (allocate_gpu_memory) {
|
||||
uint32_t msg_bits = (uint32_t)std::log2(params.message_modulus);
|
||||
@@ -4328,6 +4327,11 @@ template <typename Torus> struct int_scalar_mul_buffer {
|
||||
delete sum_ciphertexts_vec_mem;
|
||||
delete sc_prop_mem;
|
||||
cuda_drop_async(all_shifted_buffer, streams[0], gpu_indexes[0]);
|
||||
if (!anticipated_buffers_drop) {
|
||||
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
|
||||
logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
||||
delete (logical_scalar_shift_buffer);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -21,8 +21,8 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
|
||||
|
||||
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
|
||||
bool allocate_gpu_memory);
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, void *glwe_array_out,
|
||||
|
||||
@@ -0,0 +1,358 @@
|
||||
#ifndef CNCRT_FAST_KS_CUH
|
||||
#define CNCRT_FAST_KS_CUH
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "gadget.cuh"
|
||||
#include "helper_multi_gpu.h"
|
||||
#include "keyswitch.cuh"
|
||||
#include "polynomial/functions.cuh"
|
||||
#include "polynomial/polynomial_math.cuh"
|
||||
#include "torus.cuh"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
|
||||
|
||||
const int BLOCK_SIZE_GEMM = 64;
|
||||
const int THREADS_GEMM = 8;
|
||||
const int BLOCK_SIZE_DECOMP = 8;
|
||||
|
||||
template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
|
||||
return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
|
||||
}
|
||||
|
||||
__host__ inline bool can_use_pks_fast_path(uint32_t lwe_dimension,
|
||||
uint32_t num_lwe,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count,
|
||||
uint32_t glwe_dimension) {
|
||||
// TODO: activate it back, fix tests and extend to level_count > 1
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initialize decomposition by performing rounding
|
||||
// and decomposing one level of an array of Torus LWEs. Only
|
||||
// decomposes the mask elements of the incoming LWEs.
|
||||
template <typename Torus, typename TorusVec>
|
||||
__global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
|
||||
uint32_t lwe_dimension,
|
||||
uint32_t num_lwe, uint32_t base_log,
|
||||
uint32_t level_count) {
|
||||
|
||||
// index of this LWE ct in the buffer
|
||||
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
// index of the LWE sample in the LWE ct
|
||||
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
|
||||
return;
|
||||
|
||||
// Input LWE array is [mask_0, .., mask_lwe_dim, message] and
|
||||
// we only decompose the mask. Thus the stride for reading
|
||||
// is lwe_dimension + 1, while for writing it is lwe_dimension
|
||||
auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
|
||||
auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
|
||||
|
||||
Torus a_i = lwe_in[read_val_idx];
|
||||
|
||||
Torus state = init_decomposer_state(a_i, base_log, level_count);
|
||||
|
||||
Torus mod_b_mask = (1ll << base_log) - 1ll;
|
||||
lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
|
||||
}
|
||||
|
||||
// Continue decomposiion of an array of Torus elements in place. Supposes
|
||||
// that the array contains already decomposed elements and
|
||||
// computes the new decomposed level in place.
|
||||
template <typename Torus, typename TorusVec>
|
||||
__global__ void
|
||||
decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
|
||||
uint32_t num_lwe, uint32_t base_log,
|
||||
uint32_t level_count) {
|
||||
|
||||
// index of this LWE ct in the buffer
|
||||
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
// index of the LWE sample in the LWE ct
|
||||
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
|
||||
return;
|
||||
|
||||
auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
|
||||
|
||||
Torus state = buffer_in[val_idx];
|
||||
|
||||
Torus mod_b_mask = (1ll << base_log) - 1ll;
|
||||
|
||||
buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
|
||||
}
|
||||
|
||||
// Multiply matrices A, B of size (M, K), (K, N) respectively
|
||||
// with K as the inner dimension.
|
||||
//
|
||||
// A block of threads processeds blocks of size (BLOCK_SIZE_GEMM,
|
||||
// BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
|
||||
// THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
|
||||
// BLOCK_SIZE_GEMM)-shaped tiles of values from B.
|
||||
template <typename Torus, typename TorusVec>
|
||||
__global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
|
||||
int stride_B, Torus *C) {
|
||||
|
||||
const int BM = BLOCK_SIZE_GEMM;
|
||||
const int BN = BLOCK_SIZE_GEMM;
|
||||
const int BK = THREADS_GEMM;
|
||||
const int TM = THREADS_GEMM;
|
||||
|
||||
const uint cRow = blockIdx.y;
|
||||
const uint cCol = blockIdx.x;
|
||||
|
||||
const uint totalResultsBlocktile = BM * BN;
|
||||
const int threadCol = threadIdx.x % BN;
|
||||
const int threadRow = threadIdx.x / BN;
|
||||
|
||||
// Allocate space for the current block tile in shared memory
|
||||
__shared__ Torus As[BM * BK];
|
||||
__shared__ Torus Bs[BK * BN];
|
||||
|
||||
// Initialize the pointers to the input blocks from A, B
|
||||
// Tiles from these blocks are loaded to shared memory
|
||||
A += cRow * BM * K;
|
||||
B += cCol * BN;
|
||||
|
||||
// Each thread will handle multiple sub-blocks
|
||||
const uint innerColA = threadIdx.x % BK;
|
||||
const uint innerRowA = threadIdx.x / BK;
|
||||
const uint innerColB = threadIdx.x % BN;
|
||||
const uint innerRowB = threadIdx.x / BN;
|
||||
|
||||
// allocate thread-local cache for results in registerfile
|
||||
Torus threadResults[TM] = {0};
|
||||
|
||||
auto row_A = cRow * BM + innerRowA;
|
||||
auto col_B = cCol * BN + innerColB;
|
||||
|
||||
// For each thread, loop over block tiles
|
||||
for (uint bkIdx = 0; bkIdx < K; bkIdx += BK) {
|
||||
auto col_A = bkIdx + innerColA;
|
||||
auto row_B = bkIdx + innerRowB;
|
||||
|
||||
if (row_A < M && col_A < K) {
|
||||
As[innerRowA * BK + innerColA] = A[innerRowA * K + innerColA];
|
||||
} else {
|
||||
As[innerRowA * BK + innerColA] = 0;
|
||||
}
|
||||
|
||||
if (col_B < N && row_B < K) {
|
||||
Bs[innerRowB * BN + innerColB] = B[innerRowB * stride_B + innerColB];
|
||||
} else {
|
||||
Bs[innerRowB * BN + innerColB] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Advance blocktile for the next iteration of this loop
|
||||
A += BK;
|
||||
B += BK * stride_B;
|
||||
|
||||
// calculate per-thread results
|
||||
for (uint dotIdx = 0; dotIdx < BK; ++dotIdx) {
|
||||
// we make the dotproduct loop the outside loop, which facilitates
|
||||
// reuse of the Bs entry, which we can cache in a tmp var.
|
||||
Torus tmp = Bs[dotIdx * BN + threadCol];
|
||||
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
|
||||
threadResults[resIdx] +=
|
||||
As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
|
||||
// BLOCK_SIZE_GEMM)
|
||||
C += cRow * BM * N + cCol * BN;
|
||||
|
||||
// write out the results
|
||||
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
|
||||
int outRow = cRow * BM + threadRow * TM + resIdx;
|
||||
int outCol = cCol * BN + threadCol;
|
||||
|
||||
if (outRow >= M)
|
||||
continue;
|
||||
if (outCol >= N)
|
||||
continue;
|
||||
|
||||
C[(threadRow * TM + resIdx) * N + threadCol] += threadResults[resIdx];
|
||||
}
|
||||
}
|
||||
|
||||
// Finish the keyswitching operation and prepare GLWEs for accumulation.
|
||||
// 1. Finish the keyswitching computation partially performed with a GEMM:
|
||||
// - negate the dot product between the GLWE and KSK polynomial
|
||||
// - add the GLWE message for the N-th polynomial coeff in the message poly
|
||||
// 2. Rotate each of the GLWE . KSK poly dot products to
|
||||
// prepare them for accumulation into a single GLWE
|
||||
template <typename Torus>
|
||||
__global__ void polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C(
|
||||
Torus *in_glwe_buffer, Torus *out_glwe_buffer, Torus const *lwe_array,
|
||||
uint32_t lwe_dimension, uint32_t num_glwes, uint32_t polynomial_size,
|
||||
uint32_t glwe_dimension) {
|
||||
|
||||
uint32_t glwe_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint32_t degree = glwe_id; // lwe 0 rotate 0, lwe 1 rotate 1, .. , lwe
|
||||
// poly_size-1 rotate poly_size-1
|
||||
uint32_t coeffIdx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (glwe_id >= num_glwes)
|
||||
return;
|
||||
if (coeffIdx >= polynomial_size)
|
||||
return;
|
||||
|
||||
auto in_poly =
|
||||
in_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
|
||||
auto out_result =
|
||||
out_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
|
||||
if (coeffIdx == 0) {
|
||||
// Add the message value of the input LWE (`C`) to the N-th coefficient
|
||||
// in the GLWE . KSK dot product
|
||||
|
||||
// The C is added to the first position of the last polynomial in the GLWE
|
||||
// which has (glwe_dimension+1) polynomials
|
||||
// The C value is extracted as the last value of the LWE ct. (of index
|
||||
// glwe_id) the LWEs have (polynomial_size + 1) values
|
||||
in_poly[polynomial_size * glwe_dimension] =
|
||||
lwe_array[glwe_id * (lwe_dimension + 1) + lwe_dimension] -
|
||||
in_poly[polynomial_size * glwe_dimension];
|
||||
|
||||
for (int gi = 1; gi < glwe_dimension; ++gi)
|
||||
in_poly[coeffIdx + gi * polynomial_size] =
|
||||
-in_poly[coeffIdx + gi * polynomial_size];
|
||||
|
||||
} else {
|
||||
// Otherwise simply negate the input coefficient
|
||||
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
|
||||
in_poly[coeffIdx + gi * polynomial_size] =
|
||||
-in_poly[coeffIdx + gi * polynomial_size];
|
||||
}
|
||||
// Negate all the coefficients for rotation for the first poly
|
||||
in_poly[coeffIdx] = -in_poly[coeffIdx];
|
||||
|
||||
// rotate the body
|
||||
polynomial_accumulate_monic_monomial_mul<Torus>(
|
||||
out_result, in_poly, degree, coeffIdx, polynomial_size, 1, true);
|
||||
// rotate the mask too
|
||||
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
|
||||
polynomial_accumulate_monic_monomial_mul<Torus>(
|
||||
out_result + gi * polynomial_size, in_poly + gi * polynomial_size,
|
||||
degree, coeffIdx, polynomial_size, 1, true);
|
||||
}
|
||||
|
||||
template <typename Torus, typename TorusVec>
|
||||
__host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
|
||||
Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_lwes) {
|
||||
|
||||
// Optimization of packing keyswitch when packing many LWEs
|
||||
|
||||
if (level_count > 1) {
|
||||
PANIC("Fast path PKS only supports level_count==1");
|
||||
}
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
// The fast path of PKS uses the scratch buffer (d_mem) differently than the
|
||||
// old path: it needs to store the decomposed masks in the first half of this
|
||||
// buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
|
||||
// scratch buffer for the fast path must determine the half-size of the
|
||||
// scratch buffer as the max between the size of the GLWE and the size of the
|
||||
// LWE-mask
|
||||
int memory_unit = glwe_accumulator_size > lwe_dimension
|
||||
? glwe_accumulator_size
|
||||
: lwe_dimension;
|
||||
|
||||
// ping pong the buffer between successive calls
|
||||
// split the buffer in two parts of this size
|
||||
auto d_mem_0 = (Torus *)fp_ks_buffer;
|
||||
auto d_mem_1 = d_mem_0 + num_lwes * memory_unit;
|
||||
|
||||
// Set the scratch buffer to 0 as it is used to accumulate
|
||||
// decomposition temporary results
|
||||
cuda_memset_async(d_mem_1, 0, num_lwes * memory_unit * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// decompose LWEs
|
||||
// don't decompose LWE body - the LWE has lwe_size + 1 elements. The last
|
||||
// element, the body is ignored by rounding down the number of blocks assuming
|
||||
// here that the LWE dimension is a multiple of the block size
|
||||
dim3 grid_decomp(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
|
||||
CEIL_DIV(lwe_dimension, BLOCK_SIZE_DECOMP));
|
||||
dim3 threads_decomp(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
|
||||
|
||||
// decompose first level
|
||||
decompose_vectorize_init<Torus, TorusVec>
|
||||
<<<grid_decomp, threads_decomp, 0, stream>>>(lwe_array_in, d_mem_0,
|
||||
lwe_dimension, num_lwes,
|
||||
base_log, level_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// gemm to ks the individual LWEs to GLWEs
|
||||
dim3 grid_gemm(CEIL_DIV(glwe_accumulator_size, BLOCK_SIZE_GEMM),
|
||||
CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
|
||||
dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);
|
||||
|
||||
auto stride_KSK_buffer = glwe_accumulator_size;
|
||||
|
||||
uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
|
||||
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
|
||||
num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0, fp_ksk_array,
|
||||
stride_KSK_buffer, d_mem_1);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
/*
|
||||
TODO: transpose key to generalize to level_count > 1
|
||||
|
||||
for (int li = 1; li < level_count; ++li) {
|
||||
decompose_vectorize_step_inplace<Torus, TorusVec>
|
||||
<<<grid_decomp, threads_decomp, 0, stream>>>(
|
||||
d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size,
|
||||
stream>>>( num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
|
||||
fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
*/
|
||||
|
||||
// should we include the mask in the rotation ??
|
||||
dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
|
||||
CEIL_DIV(polynomial_size, BLOCK_SIZE_DECOMP));
|
||||
dim3 threads_rotate(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
|
||||
// rotate the GLWEs
|
||||
polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C<Torus>
|
||||
<<<grid_rotate, threads_rotate, 0, stream>>>(
|
||||
d_mem_1, d_mem_0, lwe_array_in, lwe_dimension, num_lwes,
|
||||
polynomial_size, glwe_dimension);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
dim3 grid_accumulate(
|
||||
CEIL_DIV(polynomial_size * (glwe_dimension + 1), BLOCK_SIZE_DECOMP));
|
||||
dim3 threads_accum(BLOCK_SIZE_DECOMP);
|
||||
|
||||
// accumulate to a single glwe
|
||||
accumulate_glwes<Torus><<<grid_accumulate, threads_accum, 0, stream>>>(
|
||||
glwe_out, d_mem_0, glwe_dimension, polynomial_size, num_lwes);
|
||||
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,6 +1,8 @@
|
||||
#include "fast_packing_keyswitch.cuh"
|
||||
#include "keyswitch.cuh"
|
||||
#include "keyswitch.h"
|
||||
#include <cstdint>
|
||||
#include <stdio.h>
|
||||
|
||||
/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
|
||||
* Head out to the equivalent operation on 64 bits for more details.
|
||||
@@ -53,15 +55,17 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
|
||||
|
||||
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
|
||||
bool allocate_gpu_memory) {
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory) {
|
||||
scratch_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer,
|
||||
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, num_lwes, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
/* Perform functional packing keyswitch on a batch of 64 bits input LWE
|
||||
* ciphertexts.
|
||||
*/
|
||||
|
||||
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, void *glwe_array_out,
|
||||
void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
|
||||
@@ -69,13 +73,24 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_lwes) {
|
||||
|
||||
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(glwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in),
|
||||
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
|
||||
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
|
||||
base_log, level_count, num_lwes);
|
||||
if (can_use_pks_fast_path(input_lwe_dimension, num_lwes,
|
||||
output_polynomial_size, level_count,
|
||||
output_glwe_dimension)) {
|
||||
host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(glwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in),
|
||||
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
|
||||
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
|
||||
base_log, level_count, num_lwes);
|
||||
} else
|
||||
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(glwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in),
|
||||
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
|
||||
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
|
||||
base_log, level_count, num_lwes);
|
||||
}
|
||||
|
||||
void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,
|
||||
|
||||
@@ -158,16 +158,20 @@ void execute_keyswitch_async(cudaStream_t const *streams,
|
||||
template <typename Torus>
|
||||
__host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
|
||||
cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
|
||||
bool allocate_gpu_memory) {
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory) {
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
if (allocate_gpu_memory)
|
||||
int memory_unit = glwe_accumulator_size > lwe_dimension
|
||||
? glwe_accumulator_size
|
||||
: lwe_dimension;
|
||||
|
||||
if (allocate_gpu_memory) {
|
||||
*fp_ks_buffer = (int8_t *)cuda_malloc_async(
|
||||
2 * num_lwes * glwe_accumulator_size * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
2 * num_lwes * memory_unit * sizeof(Torus), stream, gpu_index);
|
||||
}
|
||||
}
|
||||
|
||||
// public functional packing keyswitch for a single LWE ciphertext
|
||||
@@ -241,6 +245,7 @@ __global__ void packing_keyswitch_lwe_list_to_glwe(
|
||||
auto lwe_in = lwe_array_in + input_id * lwe_size;
|
||||
auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
|
||||
auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
|
||||
|
||||
// KS LWE to GLWE
|
||||
packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
|
||||
ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
|
||||
@@ -293,8 +298,18 @@ __host__ void host_packing_keyswitch_lwe_list_to_glwe(
|
||||
dim3 grid(num_blocks, num_lwes);
|
||||
dim3 threads(num_threads);
|
||||
|
||||
// The fast path of PKS uses the scratch buffer (d_mem) differently:
|
||||
// it needs to store the decomposed masks in the first half of this buffer
|
||||
// and the keyswitched GLWEs in the second half of the buffer. Thus the
|
||||
// scratch buffer for the fast path must determine the half-size of the
|
||||
// scratch buffer as the max between the size of the GLWE and the size of the
|
||||
// LWE-mask
|
||||
int memory_unit = glwe_accumulator_size > lwe_dimension_in
|
||||
? glwe_accumulator_size
|
||||
: lwe_dimension_in;
|
||||
|
||||
auto d_mem = (Torus *)fp_ks_buffer;
|
||||
auto d_tmp_glwe_array_out = d_mem + num_lwes * glwe_accumulator_size;
|
||||
auto d_tmp_glwe_array_out = d_mem + num_lwes * memory_unit;
|
||||
|
||||
// individually keyswitch each lwe
|
||||
packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(
|
||||
|
||||
@@ -37,39 +37,32 @@ __host__ void host_integer_radix_cmux_kb(
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
auto params = mem_ptr->params;
|
||||
|
||||
// Since our CPU threads will be working on different streams we shall assert
|
||||
// the work in the main stream is completed
|
||||
auto true_streams = mem_ptr->zero_if_true_buffer->true_streams;
|
||||
auto false_streams = mem_ptr->zero_if_false_buffer->false_streams;
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
auto mem_true = mem_ptr->zero_if_true_buffer;
|
||||
zero_out_if<Torus>(true_streams, gpu_indexes, gpu_count, mem_ptr->tmp_true_ct,
|
||||
lwe_array_true, lwe_condition, mem_true,
|
||||
mem_ptr->inverted_predicate_lut, bsks, ksks,
|
||||
num_radix_blocks);
|
||||
auto mem_false = mem_ptr->zero_if_false_buffer;
|
||||
zero_out_if<Torus>(false_streams, gpu_indexes, gpu_count,
|
||||
mem_ptr->tmp_false_ct, lwe_array_false, lwe_condition,
|
||||
mem_false, mem_ptr->predicate_lut, bsks, ksks,
|
||||
num_radix_blocks);
|
||||
for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
|
||||
}
|
||||
for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
Torus radix_lwe_size = lwe_size * num_radix_blocks;
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in, lwe_array_true,
|
||||
radix_lwe_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in + radix_lwe_size,
|
||||
lwe_array_false, radix_lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->condition_array + i * lwe_size,
|
||||
lwe_condition, lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
}
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
|
||||
mem_ptr->condition_array, bsks, ksks, 2 * num_radix_blocks,
|
||||
mem_ptr->predicate_lut, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
auto added_cts = mem_ptr->tmp_true_ct;
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts,
|
||||
mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
|
||||
params.big_lwe_dimension, num_radix_blocks);
|
||||
auto mem_true = mem_ptr->buffer_out;
|
||||
auto mem_false = &mem_ptr->buffer_out[radix_lwe_size];
|
||||
auto added_cts = mem_true;
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
|
||||
mem_false, params.big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
|
||||
@@ -58,6 +58,9 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
case GE:
|
||||
case LT:
|
||||
case LE:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (comparisons): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -68,6 +71,8 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
|
||||
host_integer_radix_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -89,3 +94,91 @@ void cleanup_cuda_integer_comparison(void *const *streams,
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
|
||||
false, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_are_all_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
|
||||
false, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
|
||||
template <typename Torus>
|
||||
__host__ void are_all_comparisons_block_true(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
@@ -85,16 +85,19 @@ __host__ void are_all_comparisons_block_true(
|
||||
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
uint32_t chunk_length = std::min(max_value, remaining_blocks);
|
||||
int num_chunks = remaining_blocks / chunk_length;
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = tmp_out;
|
||||
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
|
||||
auto is_equal_to_num_blocks_map =
|
||||
&are_all_block_true_buffer->is_equal_to_lut_map;
|
||||
auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension,
|
||||
chunk_length);
|
||||
@@ -107,53 +110,50 @@ __host__ void are_all_comparisons_block_true(
|
||||
|
||||
// Selects a LUT
|
||||
int_radix_lut<Torus> *lut;
|
||||
auto broadcast_lut_should_be_called = false;
|
||||
if (are_all_block_true_buffer->op == COMPARISON_TYPE::NE) {
|
||||
// is_non_zero_lut_buffer LUT
|
||||
lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
} else {
|
||||
if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
|
||||
(*is_equal_to_num_blocks_map).end()) {
|
||||
// The LUT is already computed
|
||||
lut = (*is_equal_to_num_blocks_map)[chunk_length];
|
||||
} else {
|
||||
if (chunk_lengths[num_chunks - 1] != max_value) {
|
||||
// LUT needs to be computed
|
||||
auto new_lut =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
max_value, num_radix_blocks, true);
|
||||
|
||||
uint32_t chunk_length = chunk_lengths[num_chunks - 1];
|
||||
auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
|
||||
return x == chunk_length;
|
||||
};
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], new_lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
is_equal_to_num_blocks_lut_f);
|
||||
|
||||
// new_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
broadcast_lut_should_be_called = true;
|
||||
|
||||
(*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
|
||||
lut = new_lut;
|
||||
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
|
||||
for (int index = 0; index < num_chunks; index++) {
|
||||
if (index == num_chunks - 1) {
|
||||
h_lut_indexes[index] = 1;
|
||||
} else {
|
||||
h_lut_indexes[index] = 0;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
|
||||
h_lut_indexes, num_chunks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
free(h_lut_indexes);
|
||||
}
|
||||
lut = is_max_value_lut;
|
||||
}
|
||||
|
||||
// Applies the LUT
|
||||
if (remaining_blocks == 1) {
|
||||
// In the last iteration we copy the output to the final address
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, 1, lwe_array_out, accumulator, bsks, ksks, 1,
|
||||
lut);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
|
||||
ksks, 1, lut);
|
||||
return;
|
||||
} else {
|
||||
if (broadcast_lut_should_be_called)
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, 1, tmp_out, accumulator, bsks, ksks,
|
||||
num_chunks, lut);
|
||||
else
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
|
||||
num_chunks, lut);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
|
||||
num_chunks, lut);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -167,7 +167,7 @@ __host__ void are_all_comparisons_block_true(
|
||||
template <typename Torus>
|
||||
__host__ void is_at_least_one_comparisons_block_true(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
@@ -189,14 +189,18 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
uint32_t remaining_blocks = num_radix_blocks;
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
uint32_t chunk_length = std::min(max_value, remaining_blocks);
|
||||
int num_chunks = remaining_blocks / chunk_length;
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = mem_ptr->tmp_lwe_array_out;
|
||||
auto accumulator = buffer->tmp_block_accumulated;
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension,
|
||||
chunk_length);
|
||||
@@ -458,10 +462,12 @@ __host__ void tree_sign_reduction(
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
|
||||
// Last leaf
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, 1, lwe_array_out, y, bsks, ksks, 1, last_lut);
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, y, bsks, ksks, 1,
|
||||
last_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -486,8 +492,9 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
if (carry_modulus >= message_modulus) {
|
||||
// Packing is possible
|
||||
// Pack inputs
|
||||
Torus *packed_left = diff_buffer->tmp_packed_left;
|
||||
Torus *packed_right = diff_buffer->tmp_packed_right;
|
||||
Torus *packed_left = diff_buffer->tmp_packed;
|
||||
Torus *packed_right =
|
||||
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
|
||||
// In case the ciphertext is signed, the sign block and the one before it
|
||||
// are handled separately
|
||||
if (mem_ptr->is_signed) {
|
||||
@@ -506,10 +513,7 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
2 * packed_num_radix_blocks, identity_lut);
|
||||
|
||||
lhs = packed_left;
|
||||
rhs = packed_right;
|
||||
@@ -538,11 +542,13 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
|
||||
// Compare the last block before the sign block separately
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
Torus *packed_left = diff_buffer->tmp_packed;
|
||||
Torus *packed_right =
|
||||
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *last_left_block_before_sign_block =
|
||||
diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
Torus *last_right_block_before_sign_block =
|
||||
diff_buffer->tmp_packed_right +
|
||||
packed_num_radix_blocks * big_lwe_size;
|
||||
packed_right + packed_num_radix_blocks * big_lwe_size;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
|
||||
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
|
||||
@@ -620,4 +626,35 @@ __host__ void host_integer_radix_maxmin_kb(
|
||||
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_are_all_comparisons_block_true_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto eq_buffer = mem_ptr->eq_buffer;
|
||||
|
||||
// It returns a block encrypting 1 if all input blocks are 1
|
||||
// otherwise the block encrypts 0
|
||||
are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
|
||||
lwe_array_out, lwe_array_in, mem_ptr,
|
||||
bsks, ksks, num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_is_at_least_one_comparisons_block_true_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto eq_buffer = mem_ptr->eq_buffer;
|
||||
|
||||
// It returns a block encrypting 1 if all input blocks are 1
|
||||
// otherwise the block encrypts 0
|
||||
is_at_least_one_comparisons_block_true<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_in, mem_ptr,
|
||||
bsks, ksks, num_radix_blocks);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
#define CUDA_INTEGER_COMPRESSION_CUH
|
||||
|
||||
#include "ciphertext.h"
|
||||
#include "crypto/fast_packing_keyswitch.cuh"
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer/compression/compression.h"
|
||||
@@ -116,11 +117,21 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
while (rem_lwes > 0) {
|
||||
auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);
|
||||
|
||||
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
|
||||
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
|
||||
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
|
||||
compression_params.polynomial_size, compression_params.ks_base_log,
|
||||
compression_params.ks_level, chunk_size);
|
||||
if (can_use_pks_fast_path(
|
||||
input_lwe_dimension, chunk_size, compression_params.polynomial_size,
|
||||
compression_params.ks_level, compression_params.glwe_dimension)) {
|
||||
host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
|
||||
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
|
||||
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
|
||||
compression_params.polynomial_size, compression_params.ks_base_log,
|
||||
compression_params.ks_level, chunk_size);
|
||||
} else {
|
||||
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
|
||||
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
|
||||
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
|
||||
compression_params.polynomial_size, compression_params.ks_base_log,
|
||||
compression_params.ks_level, chunk_size);
|
||||
}
|
||||
|
||||
rem_lwes -= chunk_size;
|
||||
lwe_subset += chunk_size * lwe_in_size;
|
||||
|
||||
@@ -286,7 +286,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
uint32_t shifted_mask = full_message_mask >> shift_amount;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, 1, interesting_divisor.last_block(),
|
||||
streams, gpu_indexes, gpu_count, interesting_divisor.last_block(),
|
||||
interesting_divisor.last_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_1[shifted_mask]);
|
||||
}; // trim_last_interesting_divisor_bits
|
||||
@@ -315,7 +315,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
shifted_mask = shifted_mask & full_message_mask;
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, 1, divisor_ms_blocks.first_block(),
|
||||
streams, gpu_indexes, gpu_count, divisor_ms_blocks.first_block(),
|
||||
divisor_ms_blocks.first_block(), bsks, ksks, 1,
|
||||
mem_ptr->masking_luts_2[shifted_mask]);
|
||||
}; // trim_first_divisor_ms_bits
|
||||
@@ -340,7 +340,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, 1, interesting_remainder1.data, 1,
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder1.data, 1,
|
||||
mem_ptr->shift_mem_1, bsks, ksks, interesting_remainder1.len);
|
||||
|
||||
tmp_radix.clone_from(interesting_remainder1, 0,
|
||||
@@ -370,13 +370,13 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
|
||||
streams, gpu_indexes, 1, interesting_remainder2.data, 1,
|
||||
streams, gpu_indexes, gpu_count, interesting_remainder2.data, 1,
|
||||
mem_ptr->shift_mem_2, bsks, ksks, interesting_remainder2.len);
|
||||
}; // left_shift_interesting_remainder2
|
||||
|
||||
//for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
//}
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
// interesting_divisor
|
||||
trim_last_interesting_divisor_bits(mem_ptr->sub_streams_1, gpu_indexes,
|
||||
gpu_count);
|
||||
@@ -389,12 +389,12 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// interesting_remainder2
|
||||
left_shift_interesting_remainder2(mem_ptr->sub_streams_4, gpu_indexes,
|
||||
gpu_count);
|
||||
// for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[0], gpu_indexes[0]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[0], gpu_indexes[0]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[0], gpu_indexes[0]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_4[0], gpu_indexes[0]);
|
||||
// }
|
||||
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem_ptr->sub_streams_4[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
// if interesting_remainder1 != 0 -> interesting_remainder2 == 0
|
||||
// if interesting_remainder1 == 0 -> interesting_remainder2 != 0
|
||||
@@ -438,7 +438,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
streams, gpu_indexes, first_indexes, second_indexes, scalar_indexes,
|
||||
merged_interesting_remainder.len);
|
||||
host_integer_overflowing_sub<uint64_t>(
|
||||
streams, gpu_indexes, 1, new_remainder.data,
|
||||
streams, gpu_indexes, gpu_count, new_remainder.data,
|
||||
(uint64_t *)merged_interesting_remainder.data,
|
||||
interesting_divisor.data, subtraction_overflowed.data,
|
||||
(const Torus *)nullptr, mem_ptr->overflow_sub_mem, bsks, ksks,
|
||||
@@ -460,7 +460,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
// But we are in the special case where scalar == 0
|
||||
// So we can skip some stuff
|
||||
host_compare_with_zero_equality<Torus>(
|
||||
streams, gpu_indexes, 1, tmp_1.data, trivial_blocks.data,
|
||||
streams, gpu_indexes, gpu_count, tmp_1.data, trivial_blocks.data,
|
||||
mem_ptr->comparison_buffer, bsks, ksks, trivial_blocks.len,
|
||||
mem_ptr->comparison_buffer->eq_buffer->is_non_zero_lut);
|
||||
|
||||
@@ -468,7 +468,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
ceil_div(trivial_blocks.len, message_modulus * carry_modulus - 1);
|
||||
|
||||
is_at_least_one_comparisons_block_true<Torus>(
|
||||
streams, gpu_indexes, 1,
|
||||
streams, gpu_indexes, gpu_count,
|
||||
at_least_one_upper_block_is_non_zero.data, tmp_1.data,
|
||||
mem_ptr->comparison_buffer, bsks, ksks, tmp_1.len);
|
||||
}
|
||||
@@ -482,7 +482,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
|
||||
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count) {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, 1,
|
||||
streams, gpu_indexes, gpu_count,
|
||||
cleaned_merged_interesting_remainder.data,
|
||||
cleaned_merged_interesting_remainder.data, bsks, ksks,
|
||||
cleaned_merged_interesting_remainder.len,
|
||||
|
||||
@@ -198,6 +198,27 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_apply_many_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
num_many_lut, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
@@ -237,7 +258,7 @@ void cuda_apply_many_univariate_lut_kb_64(
|
||||
|
||||
void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void *input_lut, uint32_t lwe_dimension,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
@@ -251,8 +272,9 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
|
||||
scratch_cuda_apply_bivariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr, static_cast<uint64_t *>(input_lut),
|
||||
num_radix_blocks, params, allocate_gpu_memory);
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(
|
||||
|
||||
@@ -1557,6 +1557,25 @@ void host_apply_univariate_lut_kb(cudaStream_t const *streams,
|
||||
num_blocks, mem);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void scratch_cuda_apply_many_univariate_lut_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_lut<Torus> **mem_ptr, Torus const *input_lut,
|
||||
uint32_t num_radix_blocks, int_radix_params params, uint32_t num_many_lut,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
*mem_ptr = new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
1, num_radix_blocks, num_many_lut,
|
||||
allocate_gpu_memory);
|
||||
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
|
||||
// 0
|
||||
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
|
||||
(params.glwe_dimension + 1) *
|
||||
params.polynomial_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void host_apply_many_univariate_lut_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
@@ -1624,13 +1643,12 @@ void host_propagate_single_carry(cudaStream_t const *streams,
|
||||
auto params = mem->params;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
auto message_modulus = params.message_modulus;
|
||||
auto carry_modulus = params.carry_modulus;
|
||||
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
|
||||
auto lut_stride = mem->lut_stride;
|
||||
auto num_many_lut = mem->num_many_lut;
|
||||
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW)
|
||||
PANIC("Cuda error: single carry propagation is not supported for overflow, "
|
||||
"try using add_and_propagate_single_carry");
|
||||
@@ -1647,7 +1665,7 @@ void host_propagate_single_carry(cudaStream_t const *streams,
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_CARRY) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
// Step 2
|
||||
@@ -1667,45 +1685,40 @@ void host_propagate_single_carry(cudaStream_t const *streams,
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
|
||||
requested_flag == outputFlag::FLAG_CARRY) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
|
||||
mem->output_flag,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
mem->prop_simu_group_carries_mem->simulators +
|
||||
(num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
}
|
||||
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
// Step 3
|
||||
// Add carries and cleanup OutputFlag::None
|
||||
host_radix_sum_in_groups<Torus>(
|
||||
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
|
||||
big_lwe_size, group_size);
|
||||
|
||||
auto message_extract = mem->lut_message_extract;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_1, gpu_indexes, gpu_count, lwe_array, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks, message_extract);
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_CARRY) {
|
||||
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
|
||||
mem->output_flag, mem->output_flag,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries +
|
||||
(mem->num_groups - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
|
||||
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
|
||||
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
|
||||
big_lwe_size_bytes, mem->sub_streams_2[0],
|
||||
gpu_indexes[0]);
|
||||
}
|
||||
for (int j = 0; j < mem->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array, mem->output_flag,
|
||||
big_lwe_size_bytes * num_radix_blocks,
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
} else {
|
||||
auto message_extract = mem->lut_message_extract;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, prepared_blocks, bsks, ksks,
|
||||
num_radix_blocks, message_extract);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1721,13 +1734,12 @@ void host_add_and_propagate_single_carry(
|
||||
auto params = mem->params;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
auto message_modulus = params.message_modulus;
|
||||
auto carry_modulus = params.carry_modulus;
|
||||
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
|
||||
auto lut_stride = mem->lut_stride;
|
||||
auto num_many_lut = mem->num_many_lut;
|
||||
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
@@ -1754,12 +1766,12 @@ void host_add_and_propagate_single_carry(
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
|
||||
auto lut_overflow_prep = mem->lut_overflow_flag_prep;
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem->output_flag, mem->last_lhs,
|
||||
streams, gpu_indexes, gpu_count, output_flag, mem->last_lhs,
|
||||
mem->last_rhs, bsks, ksks, 1, lut_overflow_prep,
|
||||
lut_overflow_prep->params.message_modulus);
|
||||
} else if (requested_flag == outputFlag::FLAG_CARRY) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
@@ -1780,58 +1792,50 @@ void host_add_and_propagate_single_carry(
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
|
||||
requested_flag == outputFlag::FLAG_CARRY) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
|
||||
mem->output_flag,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
mem->prop_simu_group_carries_mem->simulators +
|
||||
(num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
}
|
||||
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
// Step 3
|
||||
// Add carries and cleanup OutputFlag::None
|
||||
host_radix_sum_in_groups<Torus>(
|
||||
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
|
||||
big_lwe_size, group_size);
|
||||
|
||||
auto message_extract = mem->lut_message_extract;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_1, gpu_indexes, gpu_count, lhs_array, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks, message_extract);
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
|
||||
requested_flag == outputFlag::FLAG_CARRY) {
|
||||
if (num_radix_blocks == 1 && requested_flag == outputFlag::FLAG_OVERFLOW &&
|
||||
uses_carry == 1) {
|
||||
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
|
||||
mem->output_flag, mem->output_flag, input_carries,
|
||||
big_lwe_dimension, 1);
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
input_carries, big_lwe_dimension, 1);
|
||||
|
||||
} else {
|
||||
|
||||
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
|
||||
mem->output_flag, mem->output_flag,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries +
|
||||
(mem->num_groups - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
}
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
|
||||
mem->output_flag, bsks, ksks, 1, mem->lut_overflow_flag_last);
|
||||
} else {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
|
||||
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
|
||||
}
|
||||
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
|
||||
big_lwe_size_bytes, mem->sub_streams_2[0],
|
||||
gpu_indexes[0]);
|
||||
}
|
||||
for (int j = 0; j < mem->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lhs_array, mem->output_flag,
|
||||
big_lwe_size_bytes * num_radix_blocks,
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
} else {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lhs_array, prepared_blocks, bsks, ksks,
|
||||
num_radix_blocks, mem->lut_message_extract);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,6 +22,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
|
||||
case GE:
|
||||
case LT:
|
||||
case LE:
|
||||
if (lwe_ciphertext_count % 2 != 0)
|
||||
PANIC("Cuda error (scalar comparisons): the number of radix blocks has "
|
||||
"to be even.")
|
||||
host_integer_radix_scalar_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -32,6 +35,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (lwe_ciphertext_count % 2 != 0)
|
||||
PANIC("Cuda error (scalar max/min): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_scalar_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
|
||||
@@ -141,8 +141,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
|
||||
//////////////
|
||||
// lsb
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -210,8 +211,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
uint32_t num_lsb_radix_blocks = total_num_radix_blocks;
|
||||
uint32_t num_scalar_blocks = total_num_scalar_blocks;
|
||||
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -358,8 +360,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
//////////////
|
||||
// lsb
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -458,8 +461,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
auto lwe_array_ct_out = mem_ptr->tmp_lwe_array_out;
|
||||
auto lwe_array_sign_out =
|
||||
lwe_array_ct_out + (num_lsb_radix_blocks / 2) * big_lwe_size;
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks - 1,
|
||||
|
||||
@@ -36,7 +36,7 @@ __host__ void scratch_cuda_integer_radix_scalar_mul_kb(
|
||||
|
||||
*mem_ptr =
|
||||
new int_scalar_mul_buffer<T>(streams, gpu_indexes, gpu_count, params,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
num_radix_blocks, allocate_gpu_memory, true);
|
||||
}
|
||||
|
||||
template <typename T, class params>
|
||||
@@ -94,9 +94,11 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
}
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
|
||||
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
||||
delete (mem->logical_scalar_shift_buffer);
|
||||
if (mem->anticipated_buffers_drop) {
|
||||
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
|
||||
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
||||
delete (mem->logical_scalar_shift_buffer);
|
||||
}
|
||||
|
||||
if (j == 0) {
|
||||
// lwe array = 0
|
||||
|
||||
@@ -136,7 +136,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<512>>(
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -144,7 +144,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<1024>>(
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -393,7 +393,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_cg<Torus, Degree<512>>(
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -401,7 +401,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_cg<Torus, Degree<1024>>(
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -468,7 +468,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap<Torus, Degree<512>>(
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -476,7 +476,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap<Torus, Degree<1024>>(
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
|
||||
@@ -480,20 +480,30 @@ __host__ void host_programmable_bootstrap(
|
||||
double2 *global_join_buffer = pbs_buffer->global_join_buffer;
|
||||
int8_t *d_mem = pbs_buffer->d_mem;
|
||||
|
||||
bool graphCreated = false;
|
||||
cudaGraph_t graph;
|
||||
cudaGraphExec_t instance;
|
||||
for (int i = 0; i < lwe_dimension; i++) {
|
||||
execute_step_one<Torus, params>(
|
||||
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, global_accumulator,
|
||||
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
|
||||
partial_sm, partial_dm_step_one, full_sm_step_one, full_dm_step_one);
|
||||
execute_step_two<Torus, params>(
|
||||
stream, gpu_index, lwe_array_out, lwe_output_indexes, lut_vector,
|
||||
lut_vector_indexes, bootstrapping_key, global_accumulator,
|
||||
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
|
||||
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
|
||||
num_many_lut, lut_stride);
|
||||
if (!graphCreated) {
|
||||
cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
|
||||
execute_step_one<Torus, params>(
|
||||
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, global_accumulator,
|
||||
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
|
||||
partial_sm, partial_dm_step_one, full_sm_step_one, full_dm_step_one);
|
||||
execute_step_two<Torus, params>(
|
||||
stream, gpu_index, lwe_array_out, lwe_output_indexes, lut_vector,
|
||||
lut_vector_indexes, bootstrapping_key, global_accumulator,
|
||||
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
|
||||
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
|
||||
num_many_lut, lut_stride);
|
||||
cudaStreamEndCapture(stream, &graph);
|
||||
cudaGraphInstantiate(&instance, graph, NULL, NULL, 0);
|
||||
graphCreated = true;
|
||||
}
|
||||
cudaGraphLaunch(instance, stream);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -649,29 +649,41 @@ __host__ void host_multi_bit_programmable_bootstrap(
|
||||
|
||||
auto lwe_chunk_size = buffer->lwe_chunk_size;
|
||||
|
||||
bool graphCreated = false;
|
||||
cudaGraph_t graph;
|
||||
cudaGraphExec_t instance;
|
||||
|
||||
for (uint32_t lwe_offset = 0; lwe_offset < (lwe_dimension / grouping_factor);
|
||||
lwe_offset += lwe_chunk_size) {
|
||||
|
||||
// Compute a keybundle
|
||||
execute_compute_keybundle<Torus, params>(
|
||||
stream, gpu_index, lwe_array_in, lwe_input_indexes, bootstrapping_key,
|
||||
buffer, num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, level_count, lwe_offset);
|
||||
// Accumulate
|
||||
uint32_t chunk_size = std::min(
|
||||
lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset);
|
||||
for (uint32_t j = 0; j < chunk_size; j++) {
|
||||
execute_step_one<Torus, params>(
|
||||
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, buffer, num_samples, lwe_dimension, glwe_dimension,
|
||||
polynomial_size, base_log, level_count, j, lwe_offset);
|
||||
if (!graphCreated) {
|
||||
cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
|
||||
// Compute a keybundle
|
||||
execute_compute_keybundle<Torus, params>(
|
||||
stream, gpu_index, lwe_array_in, lwe_input_indexes, bootstrapping_key,
|
||||
buffer, num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, level_count, lwe_offset);
|
||||
// Accumulate
|
||||
uint32_t chunk_size = std::min(
|
||||
lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset);
|
||||
for (uint32_t j = 0; j < chunk_size; j++) {
|
||||
execute_step_one<Torus, params>(
|
||||
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, buffer, num_samples, lwe_dimension,
|
||||
glwe_dimension, polynomial_size, base_log, level_count, j,
|
||||
lwe_offset);
|
||||
|
||||
execute_step_two<Torus, params>(
|
||||
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
|
||||
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, level_count, j, lwe_offset, num_many_lut,
|
||||
lut_stride);
|
||||
execute_step_two<Torus, params>(
|
||||
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
|
||||
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
|
||||
grouping_factor, level_count, j, lwe_offset, num_many_lut,
|
||||
lut_stride);
|
||||
}
|
||||
cudaStreamEndCapture(stream, &graph);
|
||||
cudaGraphInstantiate(&instance, graph, NULL, NULL, 0);
|
||||
graphCreated = true;
|
||||
}
|
||||
cudaGraphLaunch(instance, stream);
|
||||
}
|
||||
}
|
||||
#endif // MULTIBIT_PBS_H
|
||||
|
||||
@@ -163,6 +163,29 @@ extern "C" {
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
input_lut: *const ffi::c_void,
|
||||
lwe_dimension: u32,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
grouping_factor: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
num_many_lut: u32,
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_apply_univariate_lut_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
@@ -1083,6 +1106,92 @@ extern "C" {
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
big_lwe_dimension: u32,
|
||||
small_lwe_dimension: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
grouping_factor: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
lwe_array_out: *mut ffi::c_void,
|
||||
lwe_array_in: *const ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
num_radix_blocks: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
big_lwe_dimension: u32,
|
||||
small_lwe_dimension: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
grouping_factor: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
lwe_array_out: *mut ffi::c_void,
|
||||
lwe_array_in: *const ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
num_radix_blocks: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_keyswitch_lwe_ciphertext_vector_32(
|
||||
stream: *mut ffi::c_void,
|
||||
@@ -1120,6 +1229,7 @@ extern "C" {
|
||||
stream: *mut ffi::c_void,
|
||||
gpu_index: u32,
|
||||
fp_ks_buffer: *mut *mut i8,
|
||||
lwe_dimension: u32,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
num_lwes: u32,
|
||||
|
||||
@@ -31,7 +31,7 @@ instance_type = "m6i.4xlarge"
|
||||
[backend.hyperstack.gpu-test]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
flavor_name = "n3-RTX-A6000x1"
|
||||
flavor_name = "n3-L40x1"
|
||||
|
||||
[backend.hyperstack.single-h100]
|
||||
environment_name = "canada"
|
||||
@@ -58,6 +58,12 @@ environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
flavor_name = "n3-H100x8-NVLink"
|
||||
|
||||
|
||||
[backend.hyperstack.multi-h100-sxm5]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
flavor_name = "n3-H100-SXM5x8"
|
||||
|
||||
[backend.hyperstack.multi-a100-nvlink]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
ARCH_FEATURE=x86_64
|
||||
|
||||
IS_AARCH64="$( (uname -a | grep -c "arm64\|aarch64") || true)"
|
||||
|
||||
if [[ "${IS_AARCH64}" != "0" ]]; then
|
||||
ARCH_FEATURE=aarch64
|
||||
fi
|
||||
|
||||
UNAME="$(uname)"
|
||||
|
||||
if [[ "${UNAME}" == "Linux" || "${UNAME}" == "Darwin" ]]; then
|
||||
ARCH_FEATURE="${ARCH_FEATURE}-unix"
|
||||
fi
|
||||
|
||||
echo "${ARCH_FEATURE}"
|
||||
@@ -10,6 +10,9 @@ function usage() {
|
||||
echo "--multi-bit Run multi-bit tests only: default off"
|
||||
echo "--unsigned-only Run only unsigned integer tests, by default both signed and unsigned tests are run"
|
||||
echo "--signed-only Run only signed integer tests, by default both signed and unsigned tests are run"
|
||||
echo "--nightly-tests Run integer tests configured for nightly runs (3_3 params)"
|
||||
echo "--fast-tests Run integer set but skip a subset of longer tests"
|
||||
echo "--long-tests Run only long run integer tests"
|
||||
echo "--cargo-profile The cargo profile used to build tests"
|
||||
echo "--backend Backend to use with tfhe-rs"
|
||||
echo "--avx512-support Set to ON to enable avx512"
|
||||
@@ -21,6 +24,7 @@ RUST_TOOLCHAIN="+stable"
|
||||
multi_bit_argument=
|
||||
sign_argument=
|
||||
fast_tests_argument=
|
||||
long_tests_argument=
|
||||
nightly_tests_argument=
|
||||
no_big_params_argument=
|
||||
cargo_profile="release"
|
||||
@@ -91,6 +95,10 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
|
||||
fast_tests_argument=--fast-tests
|
||||
fi
|
||||
|
||||
if [[ "${LONG_TESTS}" == TRUE ]]; then
|
||||
long_tests_argument=--long-tests
|
||||
fi
|
||||
|
||||
if [[ "${NIGHTLY_TESTS}" == TRUE ]]; then
|
||||
nightly_tests_argument=--nightly-tests
|
||||
fi
|
||||
@@ -104,7 +112,6 @@ if [[ "${backend}" == "gpu" ]]; then
|
||||
fi
|
||||
|
||||
CURR_DIR="$(dirname "$0")"
|
||||
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
|
||||
|
||||
# TODO autodetect/have a finer CPU count depending on memory
|
||||
num_cpu_threads="$("${CURR_DIR}"/cpu_count.sh)"
|
||||
@@ -138,32 +145,38 @@ if [[ "${backend}" == "gpu" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
|
||||
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${long_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
|
||||
|
||||
if [[ "${FAST_TESTS}" == "TRUE" ]]; then
|
||||
echo "Running 'fast' test set"
|
||||
else
|
||||
elif [[ "${LONG_TESTS}" == "FALSE" ]]; then
|
||||
echo "Running 'slow' test set"
|
||||
fi
|
||||
|
||||
if [[ "${LONG_TESTS}" == "TRUE" ]]; then
|
||||
echo "Running 'long run' test set"
|
||||
fi
|
||||
|
||||
if [[ "${NIGHTLY_TESTS}" == "TRUE" ]]; then
|
||||
echo "Running 'nightly' test set"
|
||||
fi
|
||||
|
||||
echo "${filter_expression}"
|
||||
|
||||
cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
--tests \
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--features=integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--test-threads "${test_threads}" \
|
||||
-E "$filter_expression"
|
||||
|
||||
if [[ -z ${multi_bit_argument} ]]; then
|
||||
if [[ -z ${multi_bit_argument} && -z ${long_tests_argument} ]]; then
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--features="${ARCH_FEATURE}",integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--features=integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--doc \
|
||||
-- --test-threads="${doctest_threads}" integer::"${gpu_feature}"
|
||||
fi
|
||||
|
||||
@@ -65,7 +65,6 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
|
||||
fi
|
||||
|
||||
CURR_DIR="$(dirname "$0")"
|
||||
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
|
||||
|
||||
n_threads_small="$("${CURR_DIR}"/cpu_count.sh)"
|
||||
n_threads_big="${n_threads_small}"
|
||||
@@ -94,7 +93,7 @@ if [[ "${BIG_TESTS_INSTANCE}" != TRUE ]]; then
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
|
||||
--features=shortint,internal-keycache,zk-pok,experimental \
|
||||
--test-threads "${n_threads_small}" \
|
||||
-E "${filter_expression_small_params}"
|
||||
|
||||
@@ -111,7 +110,7 @@ and not test(~smart_add_and_mul)"""
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
|
||||
--features=shortint,internal-keycache,zk-pok,experimental \
|
||||
--test-threads "${n_threads_big}" \
|
||||
--no-tests=warn \
|
||||
-E "${filter_expression_big_params}"
|
||||
@@ -120,7 +119,7 @@ and not test(~smart_add_and_mul)"""
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
|
||||
--features=shortint,internal-keycache,zk-pok,experimental \
|
||||
--doc \
|
||||
-- shortint::
|
||||
fi
|
||||
@@ -134,7 +133,7 @@ else
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
|
||||
--features=shortint,internal-keycache,experimental \
|
||||
--test-threads "${n_threads_big}" \
|
||||
-E "${filter_expression}"
|
||||
|
||||
@@ -142,7 +141,7 @@ else
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
|
||||
--features=shortint,internal-keycache,experimental \
|
||||
--doc \
|
||||
-- --test-threads="${n_threads_big}" shortint::
|
||||
fi
|
||||
|
||||
@@ -26,6 +26,12 @@ parser.add_argument(
|
||||
action="store_true",
|
||||
help="Run only a small subset of test suite",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--long-tests",
|
||||
dest="long_tests",
|
||||
action="store_true",
|
||||
help="Run only the long tests suite",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nightly-tests",
|
||||
dest="nightly_tests",
|
||||
@@ -80,6 +86,7 @@ EXCLUDED_INTEGER_TESTS = [
|
||||
"/.*test_wopbs_bivariate_crt_wopbs_param_message_[34]_carry_[34]_ks_pbs_gaussian_2m64$/",
|
||||
"/.*test_integer_smart_mul_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
|
||||
"/.*test_integer_default_add_sequence_multi_thread_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
|
||||
"/.*::tests_long_run::.*/",
|
||||
]
|
||||
|
||||
# skip default_div, default_rem which are covered by default_div_rem
|
||||
@@ -94,55 +101,61 @@ EXCLUDED_BIG_PARAMETERS = [
|
||||
"/.*_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
|
||||
]
|
||||
|
||||
|
||||
def filter_integer_tests(input_args):
|
||||
(multi_bit_filter, group_filter) = (
|
||||
("_multi_bit", "_group_[0-9]") if input_args.multi_bit else ("", "")
|
||||
)
|
||||
backend_filter = ""
|
||||
if input_args.backend == "gpu":
|
||||
backend_filter = "gpu::"
|
||||
if multi_bit_filter:
|
||||
# For now, GPU only has specific parameters set for multi-bit
|
||||
multi_bit_filter = "_gpu_multi_bit"
|
||||
if not input_args.long_tests:
|
||||
if input_args.backend == "gpu":
|
||||
backend_filter = "gpu::"
|
||||
if multi_bit_filter:
|
||||
# For now, GPU only has specific parameters set for multi-bit
|
||||
multi_bit_filter = "_gpu_multi_bit"
|
||||
|
||||
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
|
||||
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
|
||||
|
||||
if input_args.multi_bit:
|
||||
filter_expression.append("test(~_multi_bit)")
|
||||
else:
|
||||
filter_expression.append("not test(~_multi_bit)")
|
||||
if input_args.multi_bit:
|
||||
filter_expression.append("test(~_multi_bit)")
|
||||
else:
|
||||
filter_expression.append("not test(~_multi_bit)")
|
||||
|
||||
if input_args.signed_only:
|
||||
filter_expression.append("test(~_signed)")
|
||||
if input_args.unsigned_only:
|
||||
filter_expression.append("not test(~_signed)")
|
||||
if input_args.signed_only:
|
||||
filter_expression.append("test(~_signed)")
|
||||
if input_args.unsigned_only:
|
||||
filter_expression.append("not test(~_signed)")
|
||||
|
||||
if input_args.no_big_params:
|
||||
for pattern in EXCLUDED_BIG_PARAMETERS:
|
||||
if input_args.no_big_params:
|
||||
for pattern in EXCLUDED_BIG_PARAMETERS:
|
||||
filter_expression.append(f"not test({pattern})")
|
||||
|
||||
if input_args.fast_tests and input_args.nightly_tests:
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
|
||||
)
|
||||
elif input_args.fast_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
|
||||
)
|
||||
elif input_args.nightly_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
# This subset would run slower than fast_tests hence the use of nightly_tests
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
|
||||
)
|
||||
excluded_tests = (
|
||||
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
|
||||
)
|
||||
for pattern in excluded_tests:
|
||||
filter_expression.append(f"not test({pattern})")
|
||||
|
||||
if input_args.fast_tests and input_args.nightly_tests:
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
|
||||
)
|
||||
elif input_args.fast_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
|
||||
)
|
||||
elif input_args.nightly_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
# This subset would run slower than fast_tests hence the use of nightly_tests
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
|
||||
)
|
||||
else:
|
||||
if input_args.backend == "gpu":
|
||||
filter_expression = [f"test(/^integer::gpu::server_key::radix::tests_long_run.*/)"]
|
||||
elif input_args.backend == "cpu":
|
||||
filter_expression = [f"test(/^integer::server_key::radix_parallel::tests_long_run.*/)"]
|
||||
|
||||
excluded_tests = (
|
||||
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
|
||||
)
|
||||
for pattern in excluded_tests:
|
||||
filter_expression.append(f"not test({pattern})")
|
||||
|
||||
return " and ".join(filter_expression)
|
||||
|
||||
|
||||
@@ -7,7 +7,6 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
clap = "=4.4.4"
|
||||
lazy_static = "1.4"
|
||||
log = "0.4"
|
||||
simplelog = "0.12"
|
||||
walkdir = "2.5.0"
|
||||
|
||||
@@ -101,7 +101,7 @@ pub fn check_tfhe_docs_are_tested() -> Result<(), Error> {
|
||||
.into_iter()
|
||||
.filter_map(|entry| {
|
||||
let path = entry.path().canonicalize().ok()?;
|
||||
if path.is_file() && path.extension().map_or(false, |e| e == "md") {
|
||||
if path.is_file() && path.extension().is_some_and(|e| e == "md") {
|
||||
let file_content = std::fs::read_to_string(&path).ok()?;
|
||||
if file_content.contains("```rust") {
|
||||
Some(path.to_path_buf())
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
use clap::{Arg, Command};
|
||||
use lazy_static::lazy_static;
|
||||
use log::LevelFilter;
|
||||
use simplelog::{ColorChoice, CombinedLogger, Config, TermLogger, TerminalMode};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@@ -12,9 +11,8 @@ mod utils;
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// CONSTANTS
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
lazy_static! {
|
||||
static ref DRY_RUN: AtomicBool = AtomicBool::new(false);
|
||||
}
|
||||
|
||||
static DRY_RUN: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// MAIN
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-csprng"
|
||||
version = "0.4.1"
|
||||
version = "0.5.0"
|
||||
edition = "2021"
|
||||
license = "BSD-3-Clause-Clear"
|
||||
description = "Cryptographically Secure PRNG used in the TFHE-rs library."
|
||||
@@ -13,41 +13,25 @@ rust-version = "1.72"
|
||||
|
||||
[dependencies]
|
||||
aes = "0.8.2"
|
||||
rayon = { version = "1.5.0", optional = true }
|
||||
rayon = { workspace = true , optional = true }
|
||||
|
||||
[target.'cfg(target_os = "macos")'.dependencies]
|
||||
libc = "0.2.133"
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.8.3"
|
||||
rand = { workspace = true }
|
||||
criterion = "0.5.1"
|
||||
clap = "=4.4.4"
|
||||
|
||||
[features]
|
||||
parallel = ["rayon"]
|
||||
seeder_x86_64_rdseed = []
|
||||
seeder_unix = []
|
||||
generator_x86_64_aesni = []
|
||||
generator_fallback = []
|
||||
generator_aarch64_aes = []
|
||||
|
||||
x86_64 = [
|
||||
"parallel",
|
||||
"seeder_x86_64_rdseed",
|
||||
"generator_x86_64_aesni",
|
||||
"generator_fallback",
|
||||
]
|
||||
x86_64-unix = ["x86_64", "seeder_unix"]
|
||||
aarch64 = ["parallel", "generator_aarch64_aes", "generator_fallback"]
|
||||
aarch64-unix = ["aarch64", "seeder_unix"]
|
||||
software-prng = []
|
||||
|
||||
[[bench]]
|
||||
name = "benchmark"
|
||||
path = "benches/benchmark.rs"
|
||||
harness = false
|
||||
required-features = ["seeder_x86_64_rdseed", "generator_x86_64_aesni"]
|
||||
|
||||
[[example]]
|
||||
name = "generate"
|
||||
path = "examples/generate.rs"
|
||||
required-features = ["seeder_unix", "generator_fallback"]
|
||||
|
||||
@@ -8,13 +8,13 @@ The implementation is based on the AES blockcipher used in CTR mode, as describe
|
||||
|
||||
Two implementations are available, an accelerated one on x86_64 CPUs with the `aes` feature and the `sse2` feature, and a pure software one that can be used on other platforms.
|
||||
|
||||
The crate also makes two seeders available, one needing the x86_64 feature `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
|
||||
The crate also makes two seeders available, one needing the x86_64 instruction `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
|
||||
|
||||
## Running the benchmarks
|
||||
|
||||
To execute the benchmarks on an x86_64 platform:
|
||||
```shell
|
||||
RUSTFLAGS="-Ctarget-cpu=native" cargo bench --features=seeder_x86_64_rdseed,generator_x86_64_aesni
|
||||
RUSTFLAGS="-Ctarget-cpu=native" cargo bench
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
@@ -1,15 +1,53 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use tfhe_csprng::generators::{
|
||||
AesniRandomGenerator, BytesPerChild, ChildrenCount, RandomGenerator,
|
||||
BytesPerChild, ChildrenCount, DefaultRandomGenerator, RandomGenerator,
|
||||
};
|
||||
use tfhe_csprng::seeders::{RdseedSeeder, Seeder};
|
||||
#[cfg(target_os = "macos")]
|
||||
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
))]
|
||||
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
|
||||
|
||||
use tfhe_csprng::seeders::Seeder;
|
||||
|
||||
// The number of bytes to generate during one benchmark iteration.
|
||||
const N_GEN: usize = 1_000_000;
|
||||
|
||||
fn new_seeder() -> ActivatedSeeder {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
ActivatedSeeder
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new()
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new(0)
|
||||
}
|
||||
}
|
||||
|
||||
fn parent_generate(c: &mut Criterion) {
|
||||
let mut seeder = RdseedSeeder;
|
||||
let mut generator = AesniRandomGenerator::new(seeder.seed());
|
||||
let mut seeder = new_seeder();
|
||||
let mut generator = DefaultRandomGenerator::new(seeder.seed());
|
||||
c.bench_function("parent_generate", |b| {
|
||||
b.iter(|| {
|
||||
(0..N_GEN).for_each(|_| {
|
||||
@@ -20,8 +58,8 @@ fn parent_generate(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
fn child_generate(c: &mut Criterion) {
|
||||
let mut seeder = RdseedSeeder;
|
||||
let mut generator = AesniRandomGenerator::new(seeder.seed());
|
||||
let mut seeder = new_seeder();
|
||||
let mut generator = DefaultRandomGenerator::new(seeder.seed());
|
||||
let mut generator = generator
|
||||
.try_fork(ChildrenCount(1), BytesPerChild(N_GEN * 10_000))
|
||||
.unwrap()
|
||||
@@ -37,8 +75,8 @@ fn child_generate(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
fn fork(c: &mut Criterion) {
|
||||
let mut seeder = RdseedSeeder;
|
||||
let mut generator = AesniRandomGenerator::new(seeder.seed());
|
||||
let mut seeder = new_seeder();
|
||||
let mut generator = DefaultRandomGenerator::new(seeder.seed());
|
||||
c.bench_function("fork", |b| {
|
||||
b.iter(|| {
|
||||
black_box(
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
// To have clear error messages during compilation about why some piece of code may not be available
|
||||
// we decided to check the features compatibility with the target configuration in this script.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
// See https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch for various
|
||||
// compilation configuration
|
||||
|
||||
// Can be easily extended if needed
|
||||
pub struct FeatureRequirement {
|
||||
pub feature_name: &'static str,
|
||||
// target_arch requirement
|
||||
pub feature_req_target_arch: Option<&'static str>,
|
||||
// target_family requirement
|
||||
pub feature_req_target_family: Option<&'static str>,
|
||||
}
|
||||
|
||||
// We implement a version of default that is const which is not possible through the Default trait
|
||||
impl FeatureRequirement {
|
||||
// As we cannot use cfg!(feature = "feature_name") with something else than a literal, we need
|
||||
// a reference to the HashMap we populate with the enabled features
|
||||
fn is_activated(&self, build_activated_features: &HashMap<&'static str, bool>) -> bool {
|
||||
*build_activated_features.get(self.feature_name).unwrap()
|
||||
}
|
||||
|
||||
// panics if the requirements are not met
|
||||
fn check_requirements(&self) {
|
||||
let target_arch = get_target_arch_cfg();
|
||||
if let Some(feature_req_target_arch) = self.feature_req_target_arch {
|
||||
if feature_req_target_arch != target_arch {
|
||||
panic!(
|
||||
"Feature `{}` requires target_arch `{}`, current cfg: `{}`",
|
||||
self.feature_name, feature_req_target_arch, target_arch
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
let target_families = get_target_family_cfgs();
|
||||
if let Some(feature_req_target_family) = self.feature_req_target_family {
|
||||
if target_families
|
||||
.split(',')
|
||||
.all(|family| family != feature_req_target_family)
|
||||
{
|
||||
panic!(
|
||||
"Feature `{}` requires target_family `{}`, current cfgs: `{}`",
|
||||
self.feature_name, feature_req_target_family, target_families
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// const vecs are not yet a thing so use a fixed size array (update the array size when adding
|
||||
// requirements)
|
||||
static FEATURE_REQUIREMENTS: [FeatureRequirement; 4] = [
|
||||
FeatureRequirement {
|
||||
feature_name: "seeder_x86_64_rdseed",
|
||||
feature_req_target_arch: Some("x86_64"),
|
||||
feature_req_target_family: None,
|
||||
},
|
||||
FeatureRequirement {
|
||||
feature_name: "generator_x86_64_aesni",
|
||||
feature_req_target_arch: Some("x86_64"),
|
||||
feature_req_target_family: None,
|
||||
},
|
||||
FeatureRequirement {
|
||||
feature_name: "seeder_unix",
|
||||
feature_req_target_arch: None,
|
||||
feature_req_target_family: Some("unix"),
|
||||
},
|
||||
FeatureRequirement {
|
||||
feature_name: "generator_aarch64_aes",
|
||||
feature_req_target_arch: Some("aarch64"),
|
||||
feature_req_target_family: None,
|
||||
},
|
||||
];
|
||||
|
||||
// For a "feature_name" feature_cfg!("feature_name") expands to
|
||||
// ("feature_name", cfg!(feature = "feature_name"))
|
||||
macro_rules! feature_cfg {
|
||||
($feat_name:literal) => {
|
||||
($feat_name, cfg!(feature = $feat_name))
|
||||
};
|
||||
}
|
||||
|
||||
// Static HashMap would require an additional crate (phf or lazy static e.g.), so we just write a
|
||||
// function that returns the HashMap we are interested in
|
||||
fn get_feature_enabled_status() -> HashMap<&'static str, bool> {
|
||||
HashMap::from([
|
||||
feature_cfg!("seeder_x86_64_rdseed"),
|
||||
feature_cfg!("generator_x86_64_aesni"),
|
||||
feature_cfg!("seeder_unix"),
|
||||
feature_cfg!("generator_aarch64_aes"),
|
||||
])
|
||||
}
|
||||
|
||||
// See https://stackoverflow.com/a/43435335/18088947 for the inspiration of this code
|
||||
fn get_target_arch_cfg() -> String {
|
||||
env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH is not set")
|
||||
}
|
||||
|
||||
fn get_target_family_cfgs() -> String {
|
||||
env::var("CARGO_CFG_TARGET_FAMILY").expect("CARGO_CFG_TARGET_FAMILY is not set")
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let feature_enabled_status = get_feature_enabled_status();
|
||||
|
||||
// This will panic if some requirements for a feature are not met
|
||||
FEATURE_REQUIREMENTS
|
||||
.iter()
|
||||
.filter(|&req| FeatureRequirement::is_activated(req, &feature_enabled_status))
|
||||
.for_each(FeatureRequirement::check_requirements);
|
||||
}
|
||||
@@ -2,35 +2,29 @@
|
||||
//! the program stdout. It can also generate a fixed number of bytes by passing a value along the
|
||||
//! optional argument `--bytes_total`. For testing purpose.
|
||||
use clap::{value_parser, Arg, Command};
|
||||
#[cfg(feature = "generator_x86_64_aesni")]
|
||||
use tfhe_csprng::generators::AesniRandomGenerator as ActivatedRandomGenerator;
|
||||
#[cfg(feature = "generator_aarch64_aes")]
|
||||
use tfhe_csprng::generators::NeonAesRandomGenerator as ActivatedRandomGenerator;
|
||||
use tfhe_csprng::generators::RandomGenerator;
|
||||
#[cfg(all(
|
||||
not(feature = "generator_x86_64_aesni"),
|
||||
not(feature = "generator_aarch64_aes"),
|
||||
feature = "generator_fallback"
|
||||
))]
|
||||
use tfhe_csprng::generators::SoftwareRandomGenerator as ActivatedRandomGenerator;
|
||||
use tfhe_csprng::generators::{DefaultRandomGenerator, RandomGenerator};
|
||||
|
||||
use std::io::prelude::*;
|
||||
use std::io::{stdout, StdoutLock};
|
||||
#[cfg(target_os = "macos")]
|
||||
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
|
||||
#[cfg(all(not(target_os = "macos"), feature = "seeder_x86_64_rdseed"))]
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
|
||||
use tfhe_csprng::seeders::Seeder;
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(feature = "seeder_x86_64_rdseed"),
|
||||
feature = "seeder_unix"
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
))]
|
||||
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
|
||||
|
||||
fn write_bytes(
|
||||
buffer: &mut [u8],
|
||||
generator: &mut ActivatedRandomGenerator,
|
||||
generator: &mut DefaultRandomGenerator,
|
||||
stdout: &mut StdoutLock<'_>,
|
||||
) -> std::io::Result<()> {
|
||||
buffer.iter_mut().zip(generator).for_each(|(b, g)| *b = g);
|
||||
@@ -39,7 +33,7 @@ fn write_bytes(
|
||||
|
||||
fn infinite_bytes_generation(
|
||||
buffer: &mut [u8],
|
||||
generator: &mut ActivatedRandomGenerator,
|
||||
generator: &mut DefaultRandomGenerator,
|
||||
stdout: &mut StdoutLock<'_>,
|
||||
) {
|
||||
while write_bytes(buffer, generator, stdout).is_ok() {}
|
||||
@@ -48,7 +42,7 @@ fn infinite_bytes_generation(
|
||||
fn bytes_generation(
|
||||
bytes_total: usize,
|
||||
buffer: &mut [u8],
|
||||
generator: &mut ActivatedRandomGenerator,
|
||||
generator: &mut DefaultRandomGenerator,
|
||||
stdout: &mut StdoutLock<'_>,
|
||||
) {
|
||||
let quotient = bytes_total / buffer.len();
|
||||
@@ -61,6 +55,29 @@ fn bytes_generation(
|
||||
write_bytes(&mut buffer[0..remaining], generator, stdout).unwrap()
|
||||
}
|
||||
|
||||
fn new_seeder() -> ActivatedSeeder {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
ActivatedSeeder
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new()
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new(0)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let matches = Command::new(
|
||||
"Generate a stream of random numbers, specify no flags for infinite generation",
|
||||
@@ -74,25 +91,11 @@ pub fn main() {
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
// Ugly hack to be able to use UnixSeeder
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(feature = "seeder_x86_64_rdseed"),
|
||||
feature = "seeder_unix"
|
||||
))]
|
||||
let new_seeder = || ActivatedSeeder::new(0);
|
||||
#[cfg(not(all(
|
||||
not(target_os = "macos"),
|
||||
not(feature = "seeder_x86_64_rdseed"),
|
||||
feature = "seeder_unix"
|
||||
)))]
|
||||
let new_seeder = || ActivatedSeeder;
|
||||
|
||||
let mut seeder = new_seeder();
|
||||
let seed = seeder.seed();
|
||||
// Don't print on std out
|
||||
eprintln!("seed={seed:?}");
|
||||
let mut generator = ActivatedRandomGenerator::new(seed);
|
||||
let mut generator = DefaultRandomGenerator::new(seed);
|
||||
let stdout = stdout();
|
||||
let mut buffer = [0u8; 16];
|
||||
|
||||
|
||||
@@ -206,7 +206,6 @@ pub use index::*;
|
||||
|
||||
/// A module containing structures to manage table indices and buffer pointers together properly.
|
||||
mod states;
|
||||
pub use states::*;
|
||||
|
||||
/// A module containing an abstraction for aes block ciphers.
|
||||
mod block_cipher;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use crate::generators::aes_ctr::{
|
||||
AesBlockCipher, AesCtrGenerator, ChildrenClosure, State, TableIndex,
|
||||
};
|
||||
use crate::generators::aes_ctr::states::State;
|
||||
use crate::generators::aes_ctr::{AesBlockCipher, AesCtrGenerator, ChildrenClosure, TableIndex};
|
||||
use crate::generators::{BytesPerChild, ChildrenCount, ForkError};
|
||||
|
||||
/// A type alias for the parallel children iterator type.
|
||||
|
||||
9
tfhe-csprng/src/generators/default.rs
Normal file
9
tfhe-csprng/src/generators/default.rs
Normal file
@@ -0,0 +1,9 @@
|
||||
#[cfg(all(target_arch = "x86_64", not(feature = "software-prng")))]
|
||||
pub type DefaultRandomGenerator = super::AesniRandomGenerator;
|
||||
#[cfg(all(target_arch = "aarch64", not(feature = "software-prng")))]
|
||||
pub type DefaultRandomGenerator = super::NeonAesRandomGenerator;
|
||||
#[cfg(any(
|
||||
feature = "software-prng",
|
||||
not(any(target_arch = "x86_64", target_arch = "aarch64"))
|
||||
))]
|
||||
pub type DefaultRandomGenerator = super::SoftwareRandomGenerator;
|
||||
@@ -25,7 +25,8 @@ impl AesBlockCipher for ArmAesBlockCipher {
|
||||
if !(aes_detected && neon_detected) {
|
||||
panic!(
|
||||
"The ArmAesBlockCipher requires both aes and neon aarch64 CPU features.\n\
|
||||
aes feature available: {}\nneon feature available: {}\n.",
|
||||
aes feature available: {}\nneon feature available: {}\n\
|
||||
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
|
||||
aes_detected, neon_detected
|
||||
)
|
||||
}
|
||||
|
||||
@@ -20,7 +20,8 @@ impl AesBlockCipher for AesniBlockCipher {
|
||||
if !(aes_detected && sse2_detected) {
|
||||
panic!(
|
||||
"The AesniBlockCipher requires both aes and sse2 x86 CPU features.\n\
|
||||
aes feature available: {}\nsse2 feature available: {}\n.",
|
||||
aes feature available: {}\nsse2 feature available: {}\n\
|
||||
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
|
||||
aes_detected, sse2_detected
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,14 +1,12 @@
|
||||
#[cfg(feature = "generator_x86_64_aesni")]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod aesni;
|
||||
#[cfg(feature = "generator_x86_64_aesni")]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub use aesni::*;
|
||||
|
||||
#[cfg(feature = "generator_aarch64_aes")]
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
mod aarch64;
|
||||
#[cfg(feature = "generator_aarch64_aes")]
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub use aarch64::*;
|
||||
|
||||
#[cfg(feature = "generator_fallback")]
|
||||
mod soft;
|
||||
#[cfg(feature = "generator_fallback")]
|
||||
pub use soft::*;
|
||||
|
||||
@@ -123,6 +123,10 @@ mod aes_ctr;
|
||||
mod implem;
|
||||
pub use implem::*;
|
||||
|
||||
pub mod default;
|
||||
/// Convenience alias for the most efficient CSPRNG implementation available.
|
||||
pub use default::DefaultRandomGenerator;
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(unused)] // to please clippy when tests are not activated
|
||||
pub mod generator_generic_test {
|
||||
|
||||
@@ -3,12 +3,12 @@ mod apple_secure_enclave_seeder;
|
||||
#[cfg(target_os = "macos")]
|
||||
pub use apple_secure_enclave_seeder::AppleSecureEnclaveSeeder;
|
||||
|
||||
#[cfg(feature = "seeder_x86_64_rdseed")]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
mod rdseed;
|
||||
#[cfg(feature = "seeder_x86_64_rdseed")]
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub use rdseed::RdseedSeeder;
|
||||
|
||||
#[cfg(feature = "seeder_unix")]
|
||||
#[cfg(target_family = "unix")]
|
||||
mod unix;
|
||||
#[cfg(feature = "seeder_unix")]
|
||||
#[cfg(target_family = "unix")]
|
||||
pub use unix::UnixSeeder;
|
||||
|
||||
@@ -4,7 +4,23 @@ use crate::seeders::{Seed, Seeder};
|
||||
///
|
||||
/// The `rdseed` instruction allows to deliver seeds from a hardware source of entropy see
|
||||
/// <https://www.felixcloutier.com/x86/rdseed> .
|
||||
pub struct RdseedSeeder;
|
||||
pub struct RdseedSeeder(());
|
||||
|
||||
impl RdseedSeeder {
|
||||
pub fn new() -> Self {
|
||||
if Self::is_available() {
|
||||
Self(())
|
||||
} else {
|
||||
panic!("Tried to use RdSeedSeeder but rdseed instruction is not enabled on the current machine");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RdseedSeeder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl Seeder for RdseedSeeder {
|
||||
fn seed(&mut self) -> Seed {
|
||||
@@ -46,6 +62,6 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn check_bounded_sequence_difference() {
|
||||
check_seeder_fixed_sequences_different(|_| RdseedSeeder);
|
||||
check_seeder_fixed_sequences_different(|_| RdseedSeeder::new());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ serde = ["dep:serde", "num-complex/serde"]
|
||||
|
||||
[dev-dependencies]
|
||||
rustfft = "6.0"
|
||||
rand = "0.8"
|
||||
rand = { workspace = true }
|
||||
bincode = "1.3"
|
||||
more-asserts = "0.3.1"
|
||||
serde_json = "1.0.96"
|
||||
|
||||
@@ -40,7 +40,7 @@ Additionally, an optional 128-bit negacyclic FFT module is provided.
|
||||
```rust
|
||||
use tfhe_fft::c64;
|
||||
use tfhe_fft::ordered::{Method, Plan};
|
||||
use dyn_stack::{GlobalPodBuffer, PodStack, ReborrowMut};
|
||||
use dyn_stack::{GlobalPodBuffer, PodStack};
|
||||
use num_complex::ComplexFloat;
|
||||
use std::time::Duration;
|
||||
|
||||
@@ -48,7 +48,7 @@ fn main() {
|
||||
const N: usize = 4;
|
||||
let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
|
||||
let mut scratch_memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
|
||||
let mut stack = PodStack::new(&mut scratch_memory);
|
||||
let stack = PodStack::new(&mut scratch_memory);
|
||||
|
||||
let data = [
|
||||
c64::new(1.0, 0.0),
|
||||
@@ -58,10 +58,10 @@ fn main() {
|
||||
];
|
||||
|
||||
let mut transformed_fwd = data;
|
||||
plan.fwd(&mut transformed_fwd, stack.rb_mut());
|
||||
plan.fwd(&mut transformed_fwd, stack);
|
||||
|
||||
let mut transformed_inv = transformed_fwd;
|
||||
plan.inv(&mut transformed_inv, stack.rb_mut());
|
||||
plan.inv(&mut transformed_inv, stack);
|
||||
|
||||
for (actual, expected) in transformed_inv.iter().map(|z| z / N as f64).zip(data) {
|
||||
assert!((expected - actual).abs() < 1e-9);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use core::ptr::NonNull;
|
||||
use criterion::{criterion_group, criterion_main, Criterion};
|
||||
use dyn_stack::{PodStack, ReborrowMut, StackReq};
|
||||
use dyn_stack::{PodStack, StackReq};
|
||||
use serde::Serialize;
|
||||
use std::{fs, path::PathBuf};
|
||||
use tfhe_fft::c64;
|
||||
@@ -129,7 +129,7 @@ pub fn bench_ffts(c: &mut Criterion) {
|
||||
StackReq::new_aligned::<c64>(n, 256), // src
|
||||
StackReq::new_aligned::<c64>(n, 256), // dst
|
||||
]));
|
||||
let mut stack = PodStack::new(&mut mem);
|
||||
let stack = PodStack::new(&mut mem);
|
||||
let z = c64::new(0.0, 0.0);
|
||||
|
||||
use rustfft::FftPlannerAvx;
|
||||
@@ -139,8 +139,8 @@ pub fn bench_ffts(c: &mut Criterion) {
|
||||
let unordered =
|
||||
tfhe_fft::unordered::Plan::new(n, tfhe_fft::unordered::Method::Measure(bench_duration));
|
||||
|
||||
let (dst, stack) = stack.rb_mut().make_aligned_with::<c64, _>(n, 64, |_| z);
|
||||
let (src, mut stack) = stack.make_aligned_with::<c64, _>(n, 64, |_| z);
|
||||
let (dst, stack) = stack.make_aligned_with::<c64>(n, 64, |_| z);
|
||||
let (src, stack) = stack.make_aligned_with::<c64>(n, 64, |_| z);
|
||||
|
||||
let bench_id = format!("rustfft-fwd-{n}");
|
||||
c.bench_function(&bench_id, |b| {
|
||||
@@ -164,19 +164,19 @@ pub fn bench_ffts(c: &mut Criterion) {
|
||||
tfhe_fft::ordered::Plan::new(n, tfhe_fft::ordered::Method::Measure(bench_duration));
|
||||
|
||||
let bench_id = format!("tfhe-ordered-fwd-{n}");
|
||||
c.bench_function(&bench_id, |b| b.iter(|| ordered.fwd(dst, stack.rb_mut())));
|
||||
c.bench_function(&bench_id, |b| b.iter(|| ordered.fwd(dst, stack)));
|
||||
write_to_json(&bench_id, "tfhe-ordered-fwd", n);
|
||||
}
|
||||
|
||||
let bench_id = format!("tfhe-unordered-fwd-{n}");
|
||||
c.bench_function(&bench_id, |b| {
|
||||
b.iter(|| unordered.fwd(dst, stack.rb_mut()));
|
||||
b.iter(|| unordered.fwd(dst, stack));
|
||||
});
|
||||
write_to_json(&bench_id, "tfhe-unordered-fwd", n);
|
||||
|
||||
let bench_id = format!("tfhe-unordered-inv-{n}");
|
||||
c.bench_function(&bench_id, |b| {
|
||||
b.iter(|| unordered.inv(dst, stack.rb_mut()));
|
||||
b.iter(|| unordered.inv(dst, stack));
|
||||
});
|
||||
write_to_json(&bench_id, "tfhe-unordered-inv", n);
|
||||
|
||||
|
||||
@@ -645,7 +645,7 @@ pub mod x86 {
|
||||
|
||||
#[inline(always)]
|
||||
pub(crate) fn two_diff_f64x4(simd: V3, a: f64x4, b: f64x4) -> (f64x4, f64x4) {
|
||||
two_sum_f64x4(simd, a, simd.f64s_neg(b))
|
||||
two_sum_f64x4(simd, a, simd.neg_f64s(b))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@@ -677,7 +677,7 @@ pub mod x86 {
|
||||
#[inline(always)]
|
||||
#[cfg(feature = "nightly")]
|
||||
pub(crate) fn two_diff_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
|
||||
two_sum_f64x8(simd, a, simd.f64s_neg(b))
|
||||
two_sum_f64x8(simd, a, simd.neg_f64s(b))
|
||||
}
|
||||
|
||||
#[cfg(feature = "nightly")]
|
||||
@@ -714,8 +714,8 @@ pub mod x86 {
|
||||
simd,
|
||||
a,
|
||||
f64x16 {
|
||||
lo: simd.f64s_neg(b.lo),
|
||||
hi: simd.f64s_neg(b.hi),
|
||||
lo: simd.neg_f64s(b.lo),
|
||||
hi: simd.neg_f64s(b.hi),
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
@@ -36,14 +36,14 @@
|
||||
#![cfg_attr(not(feature = "std"), doc = "```ignore")]
|
||||
//! use tfhe_fft::c64;
|
||||
//! use tfhe_fft::ordered::{Plan, Method};
|
||||
//! use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
|
||||
//! use dyn_stack::{PodStack, GlobalPodBuffer};
|
||||
//! use num_complex::ComplexFloat;
|
||||
//! use std::time::Duration;
|
||||
//!
|
||||
//! const N: usize = 4;
|
||||
//! let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
|
||||
//! let mut scratch_memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
|
||||
//! let mut stack = PodStack::new(&mut scratch_memory);
|
||||
//! let stack = PodStack::new(&mut scratch_memory);
|
||||
//!
|
||||
//! let data = [
|
||||
//! c64::new(1.0, 0.0),
|
||||
@@ -53,10 +53,10 @@
|
||||
//! ];
|
||||
//!
|
||||
//! let mut transformed_fwd = data;
|
||||
//! plan.fwd(&mut transformed_fwd, stack.rb_mut());
|
||||
//! plan.fwd(&mut transformed_fwd, stack);
|
||||
//!
|
||||
//! let mut transformed_inv = transformed_fwd;
|
||||
//! plan.inv(&mut transformed_inv, stack.rb_mut());
|
||||
//! plan.inv(&mut transformed_inv, stack);
|
||||
//!
|
||||
//! for (actual, expected) in transformed_inv.iter().map(|z| z / N as f64).zip(data) {
|
||||
//! assert!((expected - actual).abs() < 1e-9);
|
||||
|
||||
@@ -16,7 +16,7 @@ use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
|
||||
#[cfg(feature = "std")]
|
||||
use core::time::Duration;
|
||||
#[cfg(feature = "std")]
|
||||
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
|
||||
use dyn_stack::GlobalPodBuffer;
|
||||
use dyn_stack::{PodStack, SizeOverflow, StackReq};
|
||||
|
||||
/// Internal FFT algorithm.
|
||||
@@ -65,7 +65,7 @@ fn measure_n_runs(
|
||||
buf: &mut [c64],
|
||||
twiddles_init: &[c64],
|
||||
twiddles: &[c64],
|
||||
stack: PodStack,
|
||||
stack: &mut PodStack,
|
||||
) -> Duration {
|
||||
let n = buf.len();
|
||||
let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
|
||||
@@ -99,7 +99,7 @@ pub(crate) fn measure_fastest_scratch(n: usize) -> StackReq {
|
||||
pub(crate) fn measure_fastest(
|
||||
min_bench_duration_per_algo: Duration,
|
||||
n: usize,
|
||||
stack: PodStack,
|
||||
stack: &mut PodStack,
|
||||
) -> (FftAlgo, Duration) {
|
||||
const N_ALGOS: usize = 8;
|
||||
const MIN_DURATION: Duration = if cfg!(target_arch = "wasm32") {
|
||||
@@ -116,14 +116,14 @@ pub(crate) fn measure_fastest(
|
||||
|
||||
let f = |_| c64 { re: 0.0, im: 0.0 };
|
||||
|
||||
let (twiddles, stack) = stack.make_aligned_with::<c64, _>(2 * n, align, f);
|
||||
let (twiddles, stack) = stack.make_aligned_with::<c64>(2 * n, align, f);
|
||||
let twiddles_init = &twiddles[..n];
|
||||
let twiddles = &twiddles[n..];
|
||||
let (buf, mut stack) = stack.make_aligned_with::<c64, _>(n, align, f);
|
||||
let (buf, stack) = stack.make_aligned_with::<c64>(n, align, f);
|
||||
|
||||
{
|
||||
// initialize scratch to load it in the cpu cache
|
||||
drop(stack.rb_mut().make_aligned_with::<c64, _>(n, align, f));
|
||||
drop(stack.make_aligned_with::<c64>(n, align, f));
|
||||
}
|
||||
|
||||
let mut avg_durations = [Duration::ZERO; N_ALGOS];
|
||||
@@ -149,8 +149,7 @@ pub(crate) fn measure_fastest(
|
||||
let mut n_runs: u128 = 1;
|
||||
|
||||
loop {
|
||||
let duration =
|
||||
measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack.rb_mut());
|
||||
let duration = measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack);
|
||||
|
||||
if duration < MIN_DURATION {
|
||||
n_runs *= 2;
|
||||
@@ -165,8 +164,7 @@ pub(crate) fn measure_fastest(
|
||||
*avg = if n_runs <= init_n_runs {
|
||||
approx_duration
|
||||
} else {
|
||||
let duration =
|
||||
measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack.rb_mut());
|
||||
let duration = measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack);
|
||||
duration_div_f64(duration, n_runs as f64)
|
||||
};
|
||||
}
|
||||
@@ -339,7 +337,7 @@ impl Plan {
|
||||
/// let mut buf = [c64::default(); 4];
|
||||
/// plan.fwd(&mut buf, stack);
|
||||
/// ```
|
||||
pub fn fwd(&self, buf: &mut [c64], stack: PodStack) {
|
||||
pub fn fwd(&self, buf: &mut [c64], stack: &mut PodStack) {
|
||||
let n = self.fft_size();
|
||||
let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
|
||||
let (w_init, w) = split_2(&self.twiddles);
|
||||
@@ -353,19 +351,19 @@ impl Plan {
|
||||
#[cfg_attr(not(feature = "std"), doc = " ```ignore")]
|
||||
/// use tfhe_fft::c64;
|
||||
/// use tfhe_fft::ordered::{Method, Plan};
|
||||
/// use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
|
||||
/// use dyn_stack::{PodStack, GlobalPodBuffer};
|
||||
/// use core::time::Duration;
|
||||
///
|
||||
/// let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
|
||||
///
|
||||
/// let mut memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
|
||||
/// let mut stack = PodStack::new(&mut memory);
|
||||
/// let stack = PodStack::new(&mut memory);
|
||||
///
|
||||
/// let mut buf = [c64::default(); 4];
|
||||
/// plan.fwd(&mut buf, stack.rb_mut());
|
||||
/// plan.fwd(&mut buf, stack);
|
||||
/// plan.inv(&mut buf, stack);
|
||||
/// ```
|
||||
pub fn inv(&self, buf: &mut [c64], stack: PodStack) {
|
||||
pub fn inv(&self, buf: &mut [c64], stack: &mut PodStack) {
|
||||
let n = self.fft_size();
|
||||
let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
|
||||
let (w_init, w) = split_2(&self.twiddles_inv);
|
||||
|
||||
@@ -18,7 +18,7 @@ use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
|
||||
#[cfg(feature = "std")]
|
||||
use core::time::Duration;
|
||||
#[cfg(feature = "std")]
|
||||
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
|
||||
use dyn_stack::GlobalPodBuffer;
|
||||
use dyn_stack::{PodStack, SizeOverflow, StackReq};
|
||||
|
||||
#[inline(always)]
|
||||
@@ -553,7 +553,7 @@ fn measure_fastest_scratch(n: usize) -> StackReq {
|
||||
fn measure_fastest(
|
||||
mut min_bench_duration_per_algo: Duration,
|
||||
n: usize,
|
||||
mut stack: PodStack,
|
||||
stack: &mut PodStack,
|
||||
) -> (FftAlgo, usize, Duration) {
|
||||
const MIN_DURATION: Duration = Duration::from_millis(1);
|
||||
min_bench_duration_per_algo = min_bench_duration_per_algo.max(MIN_DURATION);
|
||||
@@ -581,11 +581,8 @@ fn measure_fastest(
|
||||
n_algos += 1;
|
||||
|
||||
// we'll measure the corresponding plan
|
||||
let (base_algo, duration) = crate::ordered::measure_fastest(
|
||||
min_bench_duration_per_algo,
|
||||
base_n,
|
||||
stack.rb_mut(),
|
||||
);
|
||||
let (base_algo, duration) =
|
||||
crate::ordered::measure_fastest(min_bench_duration_per_algo, base_n, stack);
|
||||
|
||||
algos[i] = Some(base_algo);
|
||||
|
||||
@@ -599,11 +596,9 @@ fn measure_fastest(
|
||||
|
||||
let f = |_| c64 { re: 0.0, im: 0.0 };
|
||||
let align = CACHELINE_ALIGN;
|
||||
let (w, stack) = stack
|
||||
.rb_mut()
|
||||
.make_aligned_with::<c64, _>(n + base_n, align, f);
|
||||
let (scratch, stack) = stack.make_aligned_with::<c64, _>(base_n, align, f);
|
||||
let (z, _) = stack.make_aligned_with::<c64, _>(n, align, f);
|
||||
let (w, stack) = stack.make_aligned_with::<c64>(n + base_n, align, f);
|
||||
let (scratch, stack) = stack.make_aligned_with::<c64>(base_n, align, f);
|
||||
let (z, _) = stack.make_aligned_with::<c64>(n, align, f);
|
||||
|
||||
let n_runs = min_bench_duration_per_algo.as_secs_f64()
|
||||
/ (duration.as_secs_f64() * (n / base_n) as f64);
|
||||
@@ -823,7 +818,7 @@ impl Plan {
|
||||
/// let mut buf = [c64::default(); 4];
|
||||
/// plan.fwd(&mut buf, stack);
|
||||
/// ```
|
||||
pub fn fwd(&self, buf: &mut [c64], stack: PodStack) {
|
||||
pub fn fwd(&self, buf: &mut [c64], stack: &mut PodStack) {
|
||||
assert_eq!(self.fft_size(), buf.len());
|
||||
let (scratch, _) = stack.make_aligned_raw::<c64>(self.algo().1, CACHELINE_ALIGN);
|
||||
fwd_depth(
|
||||
@@ -912,19 +907,19 @@ impl Plan {
|
||||
#[cfg_attr(not(feature = "std"), doc = " ```ignore")]
|
||||
/// use tfhe_fft::c64;
|
||||
/// use tfhe_fft::unordered::{Method, Plan};
|
||||
/// use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
|
||||
/// use dyn_stack::{PodStack, GlobalPodBuffer};
|
||||
/// use core::time::Duration;
|
||||
///
|
||||
/// let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
|
||||
///
|
||||
/// let mut memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
|
||||
/// let mut stack = PodStack::new(&mut memory);
|
||||
/// let stack = PodStack::new(&mut memory);
|
||||
///
|
||||
/// let mut buf = [c64::default(); 4];
|
||||
/// plan.fwd(&mut buf, stack.rb_mut());
|
||||
/// plan.fwd(&mut buf, stack);
|
||||
/// plan.inv(&mut buf, stack);
|
||||
/// ```
|
||||
pub fn inv(&self, buf: &mut [c64], stack: PodStack) {
|
||||
pub fn inv(&self, buf: &mut [c64], stack: &mut PodStack) {
|
||||
assert_eq!(self.fft_size(), buf.len());
|
||||
let (scratch, _) = stack.make_aligned_raw::<c64>(self.algo().1, CACHELINE_ALIGN);
|
||||
inv_depth(
|
||||
@@ -995,7 +990,7 @@ impl Plan {
|
||||
base_n: usize,
|
||||
}
|
||||
|
||||
impl<'de, 'a> Visitor<'de> for SeqVisitor<'a> {
|
||||
impl<'de> Visitor<'de> for SeqVisitor<'_> {
|
||||
type Value = ();
|
||||
|
||||
fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
|
||||
@@ -1062,7 +1057,7 @@ fn bit_rev_twice_inv(nbits: u32, base_nbits: u32, i: usize) -> usize {
|
||||
mod tests {
|
||||
use super::*;
|
||||
use alloc::vec;
|
||||
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
|
||||
use dyn_stack::GlobalPodBuffer;
|
||||
use num_complex::ComplexFloat;
|
||||
use rand::random;
|
||||
|
||||
@@ -1157,8 +1152,8 @@ mod tests {
|
||||
},
|
||||
);
|
||||
let mut mem = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
|
||||
let mut stack = PodStack::new(&mut mem);
|
||||
plan.fwd(&mut z, stack.rb_mut());
|
||||
let stack = PodStack::new(&mut mem);
|
||||
plan.fwd(&mut z, stack);
|
||||
plan.inv(&mut z, stack);
|
||||
|
||||
for z in &mut z {
|
||||
@@ -9400,7 +9395,7 @@ mod tests {
|
||||
mod tests_serde {
|
||||
use super::*;
|
||||
use alloc::{vec, vec::Vec};
|
||||
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
|
||||
use dyn_stack::GlobalPodBuffer;
|
||||
use num_complex::ComplexFloat;
|
||||
use rand::random;
|
||||
|
||||
@@ -9440,9 +9435,9 @@ mod tests_serde {
|
||||
.unwrap()
|
||||
.or(plan2.fft_scratch().unwrap()),
|
||||
);
|
||||
let mut stack = PodStack::new(&mut mem);
|
||||
let stack = PodStack::new(&mut mem);
|
||||
|
||||
plan1.fwd(&mut z, stack.rb_mut());
|
||||
plan1.fwd(&mut z, stack);
|
||||
|
||||
let mut buf = Vec::<u8>::new();
|
||||
let mut serializer = bincode::Serializer::new(&mut buf, bincode::options());
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user