mirror of
https://github.com/zama-ai/tfhe-rs.git
synced 2026-04-28 03:01:21 -04:00
Compare commits
3 Commits
tfhe-versi
...
am/chore/c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8378ce7d44 | ||
|
|
e0111f6dd1 | ||
|
|
51fd605a0c |
53
.github/actions/hyperstack_setup/action.yml
vendored
53
.github/actions/hyperstack_setup/action.yml
vendored
@@ -1,53 +0,0 @@
|
||||
name: Setup Cuda
|
||||
description: Setup Cuda on Hyperstack instance
|
||||
|
||||
inputs:
|
||||
cuda-version:
|
||||
description: Version of Cuda to use
|
||||
required: true
|
||||
gcc-version:
|
||||
description: Version of GCC to use
|
||||
required: true
|
||||
cmake-version:
|
||||
description: Version of cmake to use
|
||||
default: 3.29.6
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
|
||||
cd cmake-${{ inputs.cmake-version }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Export CUDA variables
|
||||
shell: bash
|
||||
run: |
|
||||
CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
shell: bash
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
|
||||
echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
shell: bash
|
||||
run: nvidia-smi
|
||||
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -50,7 +50,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -100,7 +100,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
28
.github/workflows/aws_tfhe_fast_tests.yml
vendored
28
.github/workflows/aws_tfhe_fast_tests.yml
vendored
@@ -11,26 +11,16 @@ env:
|
||||
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
|
||||
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
|
||||
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
|
||||
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
|
||||
|
||||
on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
pull_request:
|
||||
pull_request_target:
|
||||
|
||||
jobs:
|
||||
check-user-permission:
|
||||
if: github.event_name == 'pull_request' || github.event_name == 'pull_request_target'
|
||||
uses: ./.github/workflows/check_triggering_actor.yml
|
||||
secrets:
|
||||
TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
should-run:
|
||||
runs-on: ubuntu-latest
|
||||
needs: check-user-permission
|
||||
if: github.event_name != 'pull_request_target' ||
|
||||
needs.check-user-permission.result == 'success'
|
||||
permissions:
|
||||
pull-requests: write
|
||||
outputs:
|
||||
@@ -65,11 +55,10 @@ jobs:
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -144,7 +133,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -159,7 +148,7 @@ jobs:
|
||||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
|
||||
needs: [ should-run, setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
|
||||
group: ${{ github.workflow }}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
steps:
|
||||
@@ -168,10 +157,9 @@ jobs:
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -211,7 +199,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -224,7 +212,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -282,7 +270,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
9
.github/workflows/aws_tfhe_integer_tests.yml
vendored
9
.github/workflows/aws_tfhe_integer_tests.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -59,7 +59,6 @@ jobs:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- .github/workflows/aws_tfhe_integer_tests.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
@@ -75,7 +74,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +98,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -140,7 +139,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -59,7 +59,6 @@ jobs:
|
||||
- tfhe/src/core_crypto/**
|
||||
- tfhe/src/shortint/**
|
||||
- tfhe/src/integer/**
|
||||
- .github/workflows/aws_tfhe_signed_integer_tests.yml
|
||||
|
||||
setup-instance:
|
||||
name: Setup instance (unsigned-integer-tests)
|
||||
@@ -75,7 +74,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +98,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -144,7 +143,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/aws_tfhe_tests.yml
vendored
8
.github/workflows/aws_tfhe_tests.yml
vendored
@@ -67,7 +67,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -142,7 +142,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -168,7 +168,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -250,7 +250,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
10
.github/workflows/aws_tfhe_wasm_tests.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -61,7 +61,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -119,7 +119,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_boolean.yml
vendored
8
.github/workflows/benchmark_boolean.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -62,7 +62,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_boolean
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_core_crypto.yml
vendored
8
.github/workflows/benchmark_core_crypto.yml
vendored
@@ -26,7 +26,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -81,7 +81,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -115,7 +115,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_erc20.yml
vendored
8
.github/workflows/benchmark_erc20.yml
vendored
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -124,7 +124,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_gpu_4090.yml
vendored
8
.github/workflows/benchmark_gpu_4090.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -82,7 +82,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_multi_bit_gpu_default
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -127,7 +127,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -157,7 +157,7 @@ jobs:
|
||||
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
50
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
50
.github/workflows/benchmark_gpu_core_crypto.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -48,19 +48,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -75,10 +84,31 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Run benchmarks with AVX512
|
||||
run: |
|
||||
make bench_pbs_gpu
|
||||
@@ -98,7 +128,7 @@ jobs:
|
||||
--walk-subdirs
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_core_crypto
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -137,7 +167,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
3
.github/workflows/benchmark_gpu_erc20.yml
vendored
3
.github/workflows/benchmark_gpu_erc20.yml
vendored
@@ -12,10 +12,7 @@ on:
|
||||
- "l40 (n3-L40x1)"
|
||||
- "single-h100 (n3-H100x1)"
|
||||
- "2-h100 (n3-H100x2)"
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
|
||||
jobs:
|
||||
parse-inputs:
|
||||
|
||||
55
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
55
.github/workflows/benchmark_gpu_erc20_common.yml
vendored
@@ -54,7 +54,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -75,19 +75,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -102,10 +111,34 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run benchmarks
|
||||
run: |
|
||||
make bench_hlapi_erc20_gpu
|
||||
@@ -124,9 +157,9 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_erc20_${{ inputs.profile }}
|
||||
name: ${{ github.sha }}_erc20
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
@@ -163,7 +196,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
1
.github/workflows/benchmark_gpu_integer.yml
vendored
1
.github/workflows/benchmark_gpu_integer.yml
vendored
@@ -15,7 +15,6 @@ on:
|
||||
- "4-h100 (n3-H100x4)"
|
||||
- "multi-h100 (n3-H100x8)"
|
||||
- "multi-h100-nvlink (n3-H100x8-NVLink)"
|
||||
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
|
||||
- "multi-a100-nvlink (n3-A100x8-NVLink)"
|
||||
command:
|
||||
description: "Benchmark command to run"
|
||||
|
||||
@@ -118,7 +118,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -145,19 +145,28 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs repo with tags
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Get benchmark details
|
||||
run: |
|
||||
{
|
||||
@@ -172,10 +181,41 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CUDA_PATH=$CUDA_PATH";
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
|
||||
} >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Should run benchmarks with all precisions
|
||||
if: inputs.all_precisions
|
||||
run: |
|
||||
@@ -196,22 +236,14 @@ jobs:
|
||||
--commit-date "${{ env.COMMIT_DATE }}" \
|
||||
--bench-date "${{ env.BENCH_DATE }}" \
|
||||
--walk-subdirs \
|
||||
--name-suffix avx512 \
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
|
||||
- name: Checkout Slab repo
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
repository: zama-ai/slab
|
||||
path: slab
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Send data to Slab
|
||||
shell: bash
|
||||
run: |
|
||||
@@ -239,7 +271,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_integer.yml
vendored
8
.github/workflows/benchmark_integer.yml
vendored
@@ -90,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -170,7 +170,7 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -197,7 +197,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
8
.github/workflows/benchmark_shortint.yml
vendored
8
.github/workflows/benchmark_shortint.yml
vendored
@@ -56,7 +56,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -93,7 +93,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -136,7 +136,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -163,7 +163,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -90,7 +90,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -130,7 +130,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -164,7 +164,7 @@ jobs:
|
||||
--bench-type ${{ matrix.bench_type }}
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -191,7 +191,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_tfhe_fft.yml
vendored
6
.github/workflows/benchmark_tfhe_fft.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_fft
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/benchmark_tfhe_ntt.yml
vendored
6
.github/workflows/benchmark_tfhe_ntt.yml
vendored
@@ -32,7 +32,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -84,7 +84,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_ntt
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -126,7 +126,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
10
.github/workflows/benchmark_tfhe_zk_pok.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -58,7 +58,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -91,7 +91,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
--name-suffix avx512
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_tfhe_zk_pok
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -155,7 +155,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
14
.github/workflows/benchmark_wasm_client.yml
vendored
14
.github/workflows/benchmark_wasm_client.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -65,7 +65,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -99,7 +99,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -109,7 +109,7 @@ jobs:
|
||||
|
||||
- name: Node cache restoration
|
||||
id: node-cache
|
||||
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
with:
|
||||
path: |
|
||||
~/.nvm
|
||||
@@ -122,7 +122,7 @@ jobs:
|
||||
make install_node
|
||||
|
||||
- name: Node cache save
|
||||
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
|
||||
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
|
||||
if: steps.node-cache.outputs.cache-hit != 'true'
|
||||
with:
|
||||
path: |
|
||||
@@ -166,7 +166,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -200,7 +200,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
10
.github/workflows/benchmark_zk_pke.yml
vendored
10
.github/workflows/benchmark_zk_pke.yml
vendored
@@ -47,7 +47,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -104,7 +104,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -141,7 +141,7 @@ jobs:
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install rust
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: nightly
|
||||
|
||||
@@ -177,7 +177,7 @@ jobs:
|
||||
--append-results
|
||||
|
||||
- name: Upload parsed results artifact
|
||||
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
|
||||
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
|
||||
with:
|
||||
name: ${{ github.sha }}_integer_zk
|
||||
path: ${{ env.RESULTS_FILENAME }}
|
||||
@@ -211,7 +211,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/cargo_build.yml
vendored
2
.github/workflows/cargo_build.yml
vendored
@@ -28,7 +28,7 @@ jobs:
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
29
.github/workflows/check_triggering_actor.yml
vendored
29
.github/workflows/check_triggering_actor.yml
vendored
@@ -1,29 +0,0 @@
|
||||
# Check if triggering actor is a collaborator and has write access
|
||||
name: Check Triggering Actor
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
secrets:
|
||||
TOKEN:
|
||||
required: true
|
||||
|
||||
jobs:
|
||||
check-actor-permission:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Get User Permission
|
||||
id: check-access
|
||||
uses: actions-cool/check-user-permission@956b2e73cdfe3bcb819bb7225e490cb3b18fd76e # v2.2.1
|
||||
with:
|
||||
require: write
|
||||
username: ${{ github.triggering_actor }}
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.TOKEN }}
|
||||
|
||||
- name: Check User Permission
|
||||
if: steps.check-access.outputs.require-result == 'false'
|
||||
run: |
|
||||
echo "${{ github.triggering_actor }} does not have permissions on this repo."
|
||||
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
|
||||
echo "Job originally triggered by ${{ github.actor }}"
|
||||
exit 1
|
||||
2
.github/workflows/ci_lint.yml
vendored
2
.github/workflows/ci_lint.yml
vendored
@@ -27,7 +27,7 @@ jobs:
|
||||
make lint_workflow
|
||||
|
||||
- name: Ensure SHA pinned actions
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6ae615f6475d2ede5ad88bea6baa7a1d5e93ffaa # v3.0.19
|
||||
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
|
||||
with:
|
||||
allowlist: |
|
||||
slsa-framework/slsa-github-generator
|
||||
|
||||
12
.github/workflows/code_coverage.yml
vendored
12
.github/workflows/code_coverage.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -47,13 +47,13 @@ jobs:
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
files_yaml: |
|
||||
tfhe:
|
||||
@@ -83,7 +83,7 @@ jobs:
|
||||
make test_shortint_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -97,7 +97,7 @@ jobs:
|
||||
make test_integer_cov
|
||||
|
||||
- name: Upload tfhe coverage to Codecov
|
||||
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
|
||||
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
|
||||
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
|
||||
with:
|
||||
token: ${{ secrets.CODECOV_TOKEN }}
|
||||
@@ -121,7 +121,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -51,7 +51,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/gpu_4090_tests.yml
vendored
2
.github/workflows/gpu_4090_tests.yml
vendored
@@ -40,7 +40,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
52
.github/workflows/gpu_fast_h100_tests.yml
vendored
52
.github/workflows/gpu_fast_h100_tests.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,28 +94,60 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
|
||||
@@ -155,7 +187,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
52
.github/workflows/gpu_fast_tests.yml
vendored
52
.github/workflows/gpu_fast_tests.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -66,7 +66,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -92,28 +92,60 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto and internal CUDA backend tests
|
||||
run: |
|
||||
make test_core_crypto_gpu
|
||||
@@ -153,7 +185,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
38
.github/workflows/gpu_full_h100_tests.yml
vendored
38
.github/workflows/gpu_full_h100_tests.yml
vendored
@@ -25,7 +25,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -49,6 +49,9 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
@@ -68,21 +71,38 @@ jobs:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run core crypto, integer and internal CUDA backend tests
|
||||
run: |
|
||||
make test_gpu
|
||||
@@ -119,7 +139,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
52
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
52
.github/workflows/gpu_full_multi_gpu_tests.yml
vendored
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,28 +94,60 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run multi-bit CUDA integer compression tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
|
||||
@@ -158,7 +190,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
63
.github/workflows/gpu_integer_long_run_tests.yml
vendored
63
.github/workflows/gpu_integer_long_run_tests.yml
vendored
@@ -1,4 +1,4 @@
|
||||
name: Long Run Tests on GPU
|
||||
name: AWS Long Run Tests on GPU
|
||||
|
||||
env:
|
||||
CARGO_TERM_COLOR: always
|
||||
@@ -15,8 +15,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -29,17 +29,17 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
slab-url: ${{ secrets.SLAB_BASE_URL }}
|
||||
job-secret: ${{ secrets.JOB_SECRET }}
|
||||
backend: hyperstack
|
||||
profile: multi-gpu-test
|
||||
profile: single-h100
|
||||
|
||||
cuda-tests:
|
||||
name: Long run GPU tests
|
||||
name: Long run GPU H100 tests
|
||||
needs: [ setup-instance ]
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
@@ -53,26 +53,57 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run tests
|
||||
run: |
|
||||
make test_integer_long_run_gpu
|
||||
@@ -88,7 +119,7 @@ jobs:
|
||||
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
|
||||
env:
|
||||
SLACK_COLOR: ${{ needs.cuda-tests.result }}
|
||||
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
|
||||
|
||||
teardown-instance:
|
||||
name: Teardown instance (gpu-tests)
|
||||
@@ -98,7 +129,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
6
.github/workflows/gpu_pcc.yml
vendored
6
.github/workflows/gpu_pcc.yml
vendored
@@ -24,7 +24,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -63,7 +63,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -110,7 +110,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,25 +94,58 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
|
||||
@@ -138,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,25 +94,58 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
|
||||
@@ -138,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
53
.github/workflows/gpu_signed_integer_tests.yml
vendored
53
.github/workflows/gpu_signed_integer_tests.yml
vendored
@@ -42,7 +42,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -75,7 +75,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -101,28 +101,57 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
with:
|
||||
persist-credentials: 'false'
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
@@ -131,6 +160,10 @@ jobs:
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run signed integer multi-bit tests
|
||||
run: |
|
||||
make test_signed_integer_multi_bit_gpu_ci
|
||||
@@ -156,7 +189,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,25 +94,58 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
|
||||
@@ -138,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
@@ -35,7 +35,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -68,7 +68,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -94,25 +94,58 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
|
||||
@@ -138,7 +171,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
53
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
53
.github/workflows/gpu_unsigned_integer_tests.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
||||
|
||||
- name: Check for file changes
|
||||
id: changed-files
|
||||
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
|
||||
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
|
||||
with:
|
||||
since_last_remote_commit: true
|
||||
files_yaml: |
|
||||
@@ -74,7 +74,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -100,25 +100,54 @@ jobs:
|
||||
- os: ubuntu-22.04
|
||||
cuda: "12.2"
|
||||
gcc: 11
|
||||
env:
|
||||
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
|
||||
CMAKE_VERSION: 3.29.6
|
||||
steps:
|
||||
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
|
||||
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
|
||||
cd cmake-${{ env.CMAKE_VERSION }}
|
||||
./bootstrap
|
||||
make -j"$(nproc)"
|
||||
sudo make install
|
||||
|
||||
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
|
||||
- name: Setup Hyperstack dependencies
|
||||
uses: ./.github/actions/hyperstack_setup
|
||||
with:
|
||||
cuda-version: ${{ matrix.cuda }}
|
||||
gcc-version: ${{ matrix.gcc }}
|
||||
|
||||
- name: Set up home
|
||||
run: |
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
- name: Export CUDA variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
|
||||
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
|
||||
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
|
||||
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
|
||||
|
||||
# Specify the correct host compilers
|
||||
- name: Export gcc and g++ variables
|
||||
if: ${{ !cancelled() }}
|
||||
run: |
|
||||
{
|
||||
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
|
||||
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
|
||||
echo "HOME=/home/ubuntu";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Should run nightly tests
|
||||
if: github.event_name == 'schedule'
|
||||
run: |
|
||||
@@ -127,6 +156,10 @@ jobs:
|
||||
echo "NIGHTLY_TESTS=TRUE";
|
||||
} >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Check device is detected
|
||||
if: ${{ !cancelled() }}
|
||||
run: nvidia-smi
|
||||
|
||||
- name: Run unsigned integer multi-bit tests
|
||||
run: |
|
||||
make test_unsigned_integer_multi_bit_gpu_ci
|
||||
@@ -152,7 +185,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
11
.github/workflows/integer_long_run_tests.yml
vendored
11
.github/workflows/integer_long_run_tests.yml
vendored
@@ -15,8 +15,8 @@ on:
|
||||
# Allows you to run this workflow manually from the Actions tab as an alternative.
|
||||
workflow_dispatch:
|
||||
schedule:
|
||||
# Weekly tests will be triggered each Friday at 9p.m.
|
||||
- cron: "0 21 * * 5"
|
||||
# Weekly tests will be triggered each Friday at 1a.m.
|
||||
- cron: '0 1 * * FRI'
|
||||
|
||||
jobs:
|
||||
setup-instance:
|
||||
@@ -29,7 +29,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -45,7 +45,6 @@ jobs:
|
||||
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
|
||||
timeout-minutes: 4320 # 72 hours
|
||||
steps:
|
||||
- name: Checkout tfhe-rs
|
||||
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
@@ -54,7 +53,7 @@ jobs:
|
||||
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -78,7 +77,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
2
.github/workflows/m1_tests.yml
vendored
2
.github/workflows/m1_tests.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
||||
persist-credentials: "false"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
|
||||
2
.github/workflows/make_release.yml
vendored
2
.github/workflows/make_release.yml
vendored
@@ -50,7 +50,7 @@ jobs:
|
||||
- name: Prepare package
|
||||
run: |
|
||||
cargo package -p tfhe
|
||||
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
|
||||
- uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
|
||||
with:
|
||||
name: crate
|
||||
path: target/package/*.crate
|
||||
|
||||
6
.github/workflows/make_release_cuda.yml
vendored
6
.github/workflows/make_release_cuda.yml
vendored
@@ -36,7 +36,7 @@ jobs:
|
||||
steps:
|
||||
- name: Start instance
|
||||
id: start-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: start
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
@@ -71,7 +71,7 @@ jobs:
|
||||
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
|
||||
|
||||
- name: Install latest stable
|
||||
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
|
||||
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
|
||||
with:
|
||||
toolchain: stable
|
||||
|
||||
@@ -120,7 +120,7 @@ jobs:
|
||||
steps:
|
||||
- name: Stop instance
|
||||
id: stop-instance
|
||||
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
|
||||
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
|
||||
with:
|
||||
mode: stop
|
||||
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
|
||||
|
||||
233
Makefile
233
Makefile
@@ -2,6 +2,7 @@ SHELL:=$(shell /usr/bin/env which bash)
|
||||
OS:=$(shell uname)
|
||||
RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
|
||||
CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
|
||||
TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
|
||||
CPU_COUNT=$(shell ./scripts/cpu_count.sh)
|
||||
RS_BUILD_TOOLCHAIN:=stable
|
||||
CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
|
||||
@@ -281,14 +282,14 @@ check_typos: install_typos_checker
|
||||
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
|
||||
clippy_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
|
||||
check_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
|
||||
--all-targets \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
@@ -307,51 +308,52 @@ lint_workflow: check_actionlint_installed
|
||||
.PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
|
||||
clippy_core: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=$(TARGET_ARCH_FEATURE) \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=experimental \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=nightly-avx512 \
|
||||
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=experimental,nightly-avx512 \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=zk-pok \
|
||||
--features=$(TARGET_ARCH_FEATURE),zk-pok \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_boolean # Run clippy lints enabling the boolean features
|
||||
clippy_boolean: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_shortint # Run clippy lints enabling the shortint features
|
||||
clippy_shortint: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=shortint \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=shortint,experimental \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=zk-pok,shortint \
|
||||
--features=$(TARGET_ARCH_FEATURE),zk-pok,shortint \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_integer # Run clippy lints enabling the integer features
|
||||
clippy_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=integer \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=integer,experimental \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
|
||||
clippy: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
|
||||
@@ -362,13 +364,13 @@ clippy_rustdoc: install_rs_check_toolchain
|
||||
fi && \
|
||||
CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
|
||||
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok,pbs-stats,strings \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
|
||||
clippy_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
|
||||
@@ -393,16 +395,17 @@ clippy_trivium: install_rs_check_toolchain
|
||||
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
|
||||
clippy_all_targets: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
|
||||
-p $(TFHE_SPEC) -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
|
||||
clippy_tfhe_csprng: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
|
||||
--features=parallel,software-prng -p tfhe-csprng -- --no-deps -D warnings
|
||||
--features=$(TARGET_ARCH_FEATURE) \
|
||||
-p tfhe-csprng -- --no-deps -D warnings
|
||||
|
||||
.PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
|
||||
clippy_zk_pok: install_rs_check_toolchain
|
||||
@@ -441,67 +444,67 @@ check_rust_bindings_did_not_change:
|
||||
.PHONY: tfhe_lints # Run custom tfhe-rs lints
|
||||
tfhe_lints: install_tfhe_lints
|
||||
cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
|
||||
--features=boolean,shortint,integer,zk-pok -- -D warnings
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok -- -D warnings
|
||||
|
||||
.PHONY: build_core # Build core_crypto without experimental features
|
||||
build_core: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
-p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=nightly-avx512 -p $(TFHE_SPEC); \
|
||||
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 -p $(TFHE_SPEC); \
|
||||
fi
|
||||
|
||||
.PHONY: build_core_experimental # Build core_crypto with experimental features
|
||||
build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=experimental -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 -p $(TFHE_SPEC); \
|
||||
fi
|
||||
|
||||
.PHONY: build_boolean # Build with boolean enabled
|
||||
build_boolean: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean -p $(TFHE_SPEC) --all-targets
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_shortint # Build with shortint enabled
|
||||
build_shortint: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=shortint -p $(TFHE_SPEC) --all-targets
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_integer # Build with integer enabled
|
||||
build_integer: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=integer -p $(TFHE_SPEC) --all-targets
|
||||
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
|
||||
build_tfhe_full: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
|
||||
|
||||
.PHONY: build_tfhe_coverage # Build with test coverage enabled
|
||||
build_tfhe_coverage: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
|
||||
|
||||
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
|
||||
build_c_api: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
|
||||
build_c_api_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
|
||||
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: build_web_js_api # Build the js API targeting the web browser
|
||||
@@ -532,15 +535,15 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
|
||||
.PHONY: build_tfhe_csprng # Build tfhe_csprng
|
||||
build_tfhe_csprng: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-csprng --all-targets
|
||||
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng --all-targets
|
||||
|
||||
.PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
|
||||
test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
|
||||
fi
|
||||
|
||||
.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
|
||||
@@ -548,13 +551,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=experimental,internal-keycache \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- core_crypto::
|
||||
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=experimental,internal-keycache,nightly-avx512 \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
|
||||
fi
|
||||
|
||||
@@ -572,38 +575,35 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
|
||||
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
|
||||
test_core_crypto_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
|
||||
|
||||
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
|
||||
|
||||
.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
|
||||
test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
LONG_TESTS=TRUE \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)" --backend "gpu"
|
||||
.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
|
||||
test_integer_long_run_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
|
||||
|
||||
.PHONY: test_integer_compression
|
||||
test_integer_compression: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
|
||||
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
|
||||
|
||||
.PHONY: test_integer_compression_gpu
|
||||
test_integer_compression_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
|
||||
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
|
||||
|
||||
.PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
|
||||
test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
|
||||
@@ -662,20 +662,20 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
|
||||
.PHONY: test_boolean # Run the tests of the boolean module
|
||||
test_boolean: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=boolean -p $(TFHE_SPEC) -- boolean::
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::
|
||||
|
||||
.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
|
||||
test_boolean_cov: install_rs_check_toolchain install_tarpaulin
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
|
||||
$(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=boolean,internal-keycache \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::
|
||||
|
||||
.PHONY: test_c_api_rs # Run the rust tests for the C API
|
||||
test_c_api_rs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
|
||||
-p $(TFHE_SPEC) \
|
||||
c_api
|
||||
|
||||
@@ -707,14 +707,14 @@ test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
|
||||
.PHONY: test_shortint # Run all the tests for shortint
|
||||
test_shortint: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
|
||||
|
||||
.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
|
||||
test_shortint_cov: install_rs_check_toolchain install_tarpaulin
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
|
||||
--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
|
||||
$(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=shortint,internal-keycache \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::
|
||||
|
||||
.PHONY: test_integer_ci # Run the tests for integer ci
|
||||
@@ -771,28 +771,26 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
|
||||
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--signed-only --tfhe-package "$(TFHE_SPEC)"
|
||||
|
||||
.PHONY: test_integer_long_run # Run the long run integer tests
|
||||
test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
|
||||
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
|
||||
LONG_TESTS=TRUE \
|
||||
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
|
||||
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
|
||||
--tfhe-package "$(TFHE_SPEC)"
|
||||
.PHONY: test_integer_long_run # Run the long run tests for integer
|
||||
test_integer_long_run: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
|
||||
|
||||
|
||||
.PHONY: test_safe_serialization # Run the tests for safe serialization
|
||||
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
|
||||
|
||||
.PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
|
||||
test_zk: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,zk-pok -p $(TFHE_SPEC) -- zk::
|
||||
|
||||
.PHONY: test_integer # Run all the tests for integer
|
||||
test_integer: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::
|
||||
|
||||
.PHONY: test_integer_cov # Run the tests of the integer module with code coverage
|
||||
test_integer_cov: install_rs_check_toolchain install_tarpaulin
|
||||
@@ -800,38 +798,38 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
|
||||
--out xml --output-dir coverage/integer --line --engine llvm --timeout 500 \
|
||||
--implicit-test-threads \
|
||||
--exclude-files $(COVERAGE_EXCLUDED_FILES) \
|
||||
--features=integer,internal-keycache \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
|
||||
-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::
|
||||
|
||||
.PHONY: test_high_level_api # Run all the tests for high_level_api
|
||||
test_high_level_api: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p $(TFHE_SPEC) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
|
||||
-- high_level_api::
|
||||
|
||||
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) \
|
||||
-E "test(/high_level_api::.*gpu.*/)"
|
||||
|
||||
.PHONY: test_strings # Run the tests for strings ci
|
||||
test_strings: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--features=shortint,integer,strings -p $(TFHE_SPEC) \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,integer,strings -p $(TFHE_SPEC) \
|
||||
-- strings::
|
||||
|
||||
|
||||
.PHONY: test_user_doc # Run tests from the .md documentation
|
||||
test_user_doc: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
|
||||
-p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
|
||||
test_user_doc_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
|
||||
--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
|
||||
-- test_user_docs::
|
||||
|
||||
|
||||
@@ -839,12 +837,14 @@ test_user_doc_gpu: install_rs_build_toolchain
|
||||
.PHONY: test_regex_engine # Run tests for regex_engine example
|
||||
test_regex_engine: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--example regex_engine --features=integer
|
||||
--example regex_engine \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer
|
||||
|
||||
.PHONY: test_sha256_bool # Run tests for sha256_bool example
|
||||
test_sha256_bool: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--example sha256_bool --features=boolean
|
||||
--example sha256_bool \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean
|
||||
|
||||
.PHONY: test_examples # Run tests for examples
|
||||
test_examples: test_sha256_bool test_regex_engine
|
||||
@@ -862,7 +862,7 @@ test_kreyvium: install_rs_build_toolchain
|
||||
.PHONY: test_tfhe_csprng # Run tfhe-csprng tests
|
||||
test_tfhe_csprng: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe-csprng
|
||||
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng
|
||||
|
||||
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
|
||||
test_zk_pok: install_rs_build_toolchain
|
||||
@@ -880,7 +880,7 @@ test_zk_wasm_x86_compat_ci: check_nvm_installed
|
||||
test_zk_wasm_x86_compat: install_rs_build_toolchain build_node_js_api
|
||||
cd tfhe/tests/zk_wasm_x86_test && npm install
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
-p tfhe --test zk_wasm_x86_test --features=integer,zk-pok
|
||||
-p tfhe --test zk_wasm_x86_test --features=$(TARGET_ARCH_FEATURE),integer,zk-pok
|
||||
|
||||
.PHONY: test_versionable # Run tests for tfhe-versionable subcrate
|
||||
test_versionable: install_rs_build_toolchain
|
||||
@@ -893,7 +893,7 @@ test_versionable: install_rs_build_toolchain
|
||||
test_backward_compatibility_ci: install_rs_build_toolchain
|
||||
TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
|
||||
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
|
||||
--features=shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
|
||||
|
||||
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
|
||||
test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
|
||||
@@ -908,7 +908,7 @@ doc: install_rs_check_toolchain
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: docs # Build rust doc alias for doc
|
||||
docs: doc
|
||||
@@ -919,7 +919,7 @@ lint_doc: install_rs_check_toolchain
|
||||
DOCS_RS=1 \
|
||||
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
|
||||
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
|
||||
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
|
||||
|
||||
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
|
||||
lint_docs: lint_doc
|
||||
@@ -948,7 +948,7 @@ check_md_links: install_mlc
|
||||
.PHONY: check_compile_tests # Build tests in debug without running them
|
||||
check_compile_tests: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
--features=experimental,boolean,shortint,integer,internal-keycache \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
|
||||
-p $(TFHE_SPEC)
|
||||
|
||||
@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
|
||||
@@ -959,7 +959,7 @@ check_compile_tests: install_rs_build_toolchain
|
||||
.PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
|
||||
check_compile_tests_benches_gpu: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
|
||||
--features=experimental,boolean,shortint,integer,internal-keycache,gpu \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,gpu \
|
||||
-p $(TFHE_SPEC)
|
||||
mkdir -p "$(TFHECUDA_BUILD)" && \
|
||||
cd "$(TFHECUDA_BUILD)" && \
|
||||
@@ -1038,42 +1038,42 @@ dieharder_csprng: install_dieharder build_tfhe_csprng
|
||||
.PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
|
||||
print_doc_bench_parameters:
|
||||
RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
|
||||
--features=shortint,internal-keycache -p tfhe
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe
|
||||
|
||||
.PHONY: bench_integer # Run benchmarks for unsigned integer
|
||||
bench_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer # Run benchmarks for signed integer
|
||||
bench_signed_integer: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
|
||||
bench_integer_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
|
||||
bench_integer_compression: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_compression_gpu
|
||||
bench_integer_compression_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench glwe_packing_compression-integer-bench \
|
||||
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
|
||||
bench_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1081,7 +1081,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
|
||||
bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
@@ -1089,7 +1089,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-signed-bench \
|
||||
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
|
||||
bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1097,7 +1097,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
|
||||
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
@@ -1105,14 +1105,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench integer-bench \
|
||||
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
|
||||
|
||||
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
|
||||
bench_integer_zk: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench zk-pke-bench \
|
||||
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,zk-pok,nightly-avx512 \
|
||||
-p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_shortint # Run benchmarks for shortint
|
||||
@@ -1120,14 +1120,14 @@ bench_shortint: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench shortint-bench \
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_shortint_oprf # Run benchmarks for shortint
|
||||
bench_shortint_oprf: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench oprf-shortint-bench \
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
|
||||
bench_shortint_multi_bit: install_rs_check_toolchain
|
||||
@@ -1135,43 +1135,43 @@ bench_shortint_multi_bit: install_rs_check_toolchain
|
||||
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
|
||||
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench shortint-bench \
|
||||
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_boolean # Run benchmarks for boolean
|
||||
bench_boolean: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench boolean-bench \
|
||||
--features=boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs # Run benchmarks for PBS
|
||||
bench_pbs: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
|
||||
bench_pbs128: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs128-bench \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_pbs_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench pbs-bench \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_ks # Run benchmarks for keyswitch
|
||||
bench_ks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
.PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
|
||||
bench_ks_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench ks-bench \
|
||||
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
|
||||
|
||||
bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
|
||||
bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
|
||||
@@ -1207,13 +1207,13 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
|
||||
bench_hlapi_erc20: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ECR20 operations on GPU
|
||||
bench_hlapi_erc20_gpu: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
|
||||
--bench hlapi-erc20 \
|
||||
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
|
||||
|
||||
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
|
||||
bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
@@ -1228,32 +1228,32 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
|
||||
gen_key_cache: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example generates_test_keys \
|
||||
--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
|
||||
-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
|
||||
|
||||
.PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
|
||||
gen_key_cache_core_crypto: install_rs_build_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
|
||||
--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
|
||||
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
|
||||
core_crypto::keycache::generate_keys
|
||||
|
||||
.PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
|
||||
measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example hlapi_compact_pk_ct_sizes \
|
||||
--features=integer,internal-keycache
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache
|
||||
|
||||
.PHONY: measure_shortint_key_sizes # Measure sizes of bootstrapping and key switching keys for shortint
|
||||
measure_shortint_key_sizes: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example shortint_key_sizes \
|
||||
--features=shortint,internal-keycache
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache
|
||||
|
||||
.PHONY: measure_boolean_key_sizes # Measure sizes of bootstrapping and key switching keys for boolean
|
||||
measure_boolean_key_sizes: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example boolean_key_sizes \
|
||||
--features=boolean,internal-keycache
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache
|
||||
|
||||
.PHONY: parse_integer_benches # Run python parser to output a csv containing integer benches data
|
||||
parse_integer_benches:
|
||||
@@ -1265,13 +1265,14 @@ parse_integer_benches:
|
||||
parse_wasm_benchmarks: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example wasm_benchmarks_parser \
|
||||
--features=shortint,internal-keycache \
|
||||
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
|
||||
-- wasm_benchmark_results.json
|
||||
|
||||
.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
|
||||
write_params_to_file: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example write_params_to_file --features=boolean,shortint,internal-keycache
|
||||
--example write_params_to_file \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
|
||||
|
||||
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
|
||||
clone_backward_compat_data:
|
||||
@@ -1286,26 +1287,26 @@ tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
|
||||
.PHONY: regex_engine # Run regex_engine example
|
||||
regex_engine: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example regex_engine --features=integer \
|
||||
--example regex_engine \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer \
|
||||
-- $(REGEX_STRING) $(REGEX_PATTERN)
|
||||
|
||||
.PHONY: dark_market # Run dark market example
|
||||
dark_market: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example dark_market \
|
||||
--features=integer,internal-keycache \
|
||||
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
|
||||
-- fhe-modified fhe-parallel plain fhe
|
||||
|
||||
.PHONY: sha256_bool # Run sha256_bool example
|
||||
sha256_bool: install_rs_check_toolchain
|
||||
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
|
||||
--example sha256_bool --features=boolean
|
||||
--example sha256_bool \
|
||||
--features=$(TARGET_ARCH_FEATURE),boolean
|
||||
|
||||
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
|
||||
pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
|
||||
clippy_all check_compile_tests
|
||||
# TFHE lints deactivated as it's incompatible with 1.83 - temporary
|
||||
# tfhe_lints
|
||||
clippy_all tfhe_lints check_compile_tests
|
||||
|
||||
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
|
||||
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
|
||||
|
||||
16
README.md
16
README.md
@@ -70,8 +70,22 @@ production-ready library for all the advanced features of TFHE.
|
||||
### Cargo.toml configuration
|
||||
To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:
|
||||
|
||||
+ For x86_64-based machines running Unix-like OSes:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-unix"] }
|
||||
```
|
||||
|
||||
+ For Apple Silicon or aarch64-based machines running Unix-like OSes:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
|
||||
```
|
||||
|
||||
+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
|
||||
|
||||
```toml
|
||||
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
|
||||
```
|
||||
|
||||
> [!Note]
|
||||
|
||||
@@ -7,7 +7,14 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
rayon = { workspace = true }
|
||||
tfhe = { path = "../../tfhe", features = [ "boolean", "shortint", "integer" ] }
|
||||
|
||||
[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
|
||||
path = "../../tfhe"
|
||||
features = [ "boolean", "shortint", "integer", "x86_64" ]
|
||||
|
||||
[target.'cfg(target_arch = "aarch64")'.dependencies.tfhe]
|
||||
path = "../../tfhe"
|
||||
features = [ "boolean", "shortint", "integer", "aarch64-unix" ]
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5.1", features = [ "html_reports" ]}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
|
||||
@@ -12,12 +12,12 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -63,12 +63,12 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
@@ -109,12 +109,12 @@ pub fn kreyvium_shortint_trans(c: &mut Criterion) {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use criterion::Criterion;
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::shortint::prelude::*;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
|
||||
use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
|
||||
@@ -12,12 +12,12 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -63,12 +63,12 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
@@ -109,12 +109,12 @@ pub fn trivium_shortint_trans(c: &mut Criterion) {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo renaud1239/Kreyvium,
|
||||
// commit fd6828f68711276c25f55e605935028f5e843f43
|
||||
@@ -222,12 +222,12 @@ fn kreyvium_test_shortint_long() {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
|
||||
use tfhe::prelude::*;
|
||||
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
|
||||
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
|
||||
// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
|
||||
// file testvectors/trivium-80.80.test-vectors
|
||||
@@ -358,12 +358,12 @@ fn trivium_test_shortint_long() {
|
||||
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
|
||||
|
||||
let (client_key, server_key): (ClientKey, ServerKey) =
|
||||
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
|
||||
|
||||
let ksk = KeySwitchingKey::new(
|
||||
(&client_key, Some(&server_key)),
|
||||
(&underlying_ck, &underlying_sk),
|
||||
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
|
||||
);
|
||||
|
||||
let key_string = "0053A6F94C9FF24598EB".to_string();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-cuda-backend"
|
||||
version = "0.7.0"
|
||||
version = "0.6.0"
|
||||
edition = "2021"
|
||||
authors = ["Zama team"]
|
||||
license = "BSD-3-Clause-Clear"
|
||||
|
||||
@@ -38,7 +38,6 @@ template <typename Torus> struct int_compression {
|
||||
|
||||
scratch_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
streams[0], gpu_indexes[0], &fp_ks_buffer,
|
||||
compression_params.small_lwe_dimension,
|
||||
compression_params.glwe_dimension, compression_params.polynomial_size,
|
||||
num_radix_blocks, true);
|
||||
}
|
||||
@@ -65,7 +64,7 @@ template <typename Torus> struct int_decompression {
|
||||
Torus *tmp_extracted_lwe;
|
||||
uint32_t *tmp_indexes_array;
|
||||
|
||||
int_radix_lut<Torus> *decompression_rescale_lut;
|
||||
int_radix_lut<Torus> *carry_extract_lut;
|
||||
|
||||
int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_params encryption_params,
|
||||
@@ -84,7 +83,7 @@ template <typename Torus> struct int_decompression {
|
||||
Torus lwe_accumulator_size = (compression_params.glwe_dimension *
|
||||
compression_params.polynomial_size +
|
||||
1);
|
||||
decompression_rescale_lut = new int_radix_lut<Torus>(
|
||||
carry_extract_lut = new int_radix_lut<Torus>(
|
||||
streams, gpu_indexes, gpu_count, encryption_params, 1,
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
|
||||
@@ -97,28 +96,19 @@ template <typename Torus> struct int_decompression {
|
||||
num_radix_blocks * lwe_accumulator_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
|
||||
// Rescale is done using an identity LUT
|
||||
// Here we do not divide by message_modulus
|
||||
// Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
|
||||
// space, we want to keep the original 2 bits value in the 4 bits space,
|
||||
// so we apply the identity and the encoding will rescale it for us.
|
||||
auto decompression_rescale_f = [encryption_params](Torus x) -> Torus {
|
||||
return x;
|
||||
// Carry extract LUT
|
||||
auto carry_extract_f = [encryption_params](Torus x) -> Torus {
|
||||
return x / encryption_params.message_modulus;
|
||||
};
|
||||
|
||||
auto effective_compression_message_modulus =
|
||||
encryption_params.carry_modulus;
|
||||
auto effective_compression_carry_modulus = 1;
|
||||
|
||||
generate_device_accumulator_with_encoding<Torus>(
|
||||
streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
carry_extract_lut->get_lut(gpu_indexes[0], 0),
|
||||
encryption_params.glwe_dimension, encryption_params.polynomial_size,
|
||||
effective_compression_message_modulus,
|
||||
effective_compression_carry_modulus,
|
||||
encryption_params.message_modulus, encryption_params.carry_modulus,
|
||||
decompression_rescale_f);
|
||||
carry_extract_f);
|
||||
|
||||
decompression_rescale_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
carry_extract_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
}
|
||||
}
|
||||
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
@@ -127,8 +117,8 @@ template <typename Torus> struct int_decompression {
|
||||
cuda_drop_async(tmp_extracted_lwe, streams[0], gpu_indexes[0]);
|
||||
cuda_drop_async(tmp_indexes_array, streams[0], gpu_indexes[0]);
|
||||
|
||||
decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
|
||||
delete decompression_rescale_lut;
|
||||
carry_extract_lut->release(streams, gpu_indexes, gpu_count);
|
||||
delete carry_extract_lut;
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -46,14 +46,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory);
|
||||
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
@@ -447,41 +440,5 @@ void cleanup_cuda_integer_abs_inplace(void *const *streams,
|
||||
uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
|
||||
|
||||
void cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
|
||||
|
||||
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void);
|
||||
|
||||
} // extern C
|
||||
#endif // CUDA_INTEGER_H
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -21,8 +21,8 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
|
||||
|
||||
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory);
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
|
||||
bool allocate_gpu_memory);
|
||||
|
||||
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, void *glwe_array_out,
|
||||
|
||||
@@ -1,358 +0,0 @@
|
||||
#ifndef CNCRT_FAST_KS_CUH
|
||||
#define CNCRT_FAST_KS_CUH
|
||||
|
||||
#undef NDEBUG
|
||||
#include <assert.h>
|
||||
|
||||
#include "device.h"
|
||||
#include "gadget.cuh"
|
||||
#include "helper_multi_gpu.h"
|
||||
#include "keyswitch.cuh"
|
||||
#include "polynomial/functions.cuh"
|
||||
#include "polynomial/polynomial_math.cuh"
|
||||
#include "torus.cuh"
|
||||
#include "utils/helper.cuh"
|
||||
#include "utils/kernel_dimensions.cuh"
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
|
||||
|
||||
const int BLOCK_SIZE_GEMM = 64;
|
||||
const int THREADS_GEMM = 8;
|
||||
const int BLOCK_SIZE_DECOMP = 8;
|
||||
|
||||
template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
|
||||
return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
|
||||
}
|
||||
|
||||
__host__ inline bool can_use_pks_fast_path(uint32_t lwe_dimension,
|
||||
uint32_t num_lwe,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t level_count,
|
||||
uint32_t glwe_dimension) {
|
||||
// TODO: activate it back, fix tests and extend to level_count > 1
|
||||
return false;
|
||||
}
|
||||
|
||||
// Initialize decomposition by performing rounding
|
||||
// and decomposing one level of an array of Torus LWEs. Only
|
||||
// decomposes the mask elements of the incoming LWEs.
|
||||
template <typename Torus, typename TorusVec>
|
||||
__global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
|
||||
uint32_t lwe_dimension,
|
||||
uint32_t num_lwe, uint32_t base_log,
|
||||
uint32_t level_count) {
|
||||
|
||||
// index of this LWE ct in the buffer
|
||||
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
// index of the LWE sample in the LWE ct
|
||||
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
|
||||
return;
|
||||
|
||||
// Input LWE array is [mask_0, .., mask_lwe_dim, message] and
|
||||
// we only decompose the mask. Thus the stride for reading
|
||||
// is lwe_dimension + 1, while for writing it is lwe_dimension
|
||||
auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
|
||||
auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
|
||||
|
||||
Torus a_i = lwe_in[read_val_idx];
|
||||
|
||||
Torus state = init_decomposer_state(a_i, base_log, level_count);
|
||||
|
||||
Torus mod_b_mask = (1ll << base_log) - 1ll;
|
||||
lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
|
||||
}
|
||||
|
||||
// Continue decomposiion of an array of Torus elements in place. Supposes
|
||||
// that the array contains already decomposed elements and
|
||||
// computes the new decomposed level in place.
|
||||
template <typename Torus, typename TorusVec>
|
||||
__global__ void
|
||||
decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
|
||||
uint32_t num_lwe, uint32_t base_log,
|
||||
uint32_t level_count) {
|
||||
|
||||
// index of this LWE ct in the buffer
|
||||
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
// index of the LWE sample in the LWE ct
|
||||
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
|
||||
return;
|
||||
|
||||
auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
|
||||
|
||||
Torus state = buffer_in[val_idx];
|
||||
|
||||
Torus mod_b_mask = (1ll << base_log) - 1ll;
|
||||
|
||||
buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
|
||||
}
|
||||
|
||||
// Multiply matrices A, B of size (M, K), (K, N) respectively
|
||||
// with K as the inner dimension.
|
||||
//
|
||||
// A block of threads processeds blocks of size (BLOCK_SIZE_GEMM,
|
||||
// BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
|
||||
// THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
|
||||
// BLOCK_SIZE_GEMM)-shaped tiles of values from B.
|
||||
template <typename Torus, typename TorusVec>
|
||||
__global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
|
||||
int stride_B, Torus *C) {
|
||||
|
||||
const int BM = BLOCK_SIZE_GEMM;
|
||||
const int BN = BLOCK_SIZE_GEMM;
|
||||
const int BK = THREADS_GEMM;
|
||||
const int TM = THREADS_GEMM;
|
||||
|
||||
const uint cRow = blockIdx.y;
|
||||
const uint cCol = blockIdx.x;
|
||||
|
||||
const uint totalResultsBlocktile = BM * BN;
|
||||
const int threadCol = threadIdx.x % BN;
|
||||
const int threadRow = threadIdx.x / BN;
|
||||
|
||||
// Allocate space for the current block tile in shared memory
|
||||
__shared__ Torus As[BM * BK];
|
||||
__shared__ Torus Bs[BK * BN];
|
||||
|
||||
// Initialize the pointers to the input blocks from A, B
|
||||
// Tiles from these blocks are loaded to shared memory
|
||||
A += cRow * BM * K;
|
||||
B += cCol * BN;
|
||||
|
||||
// Each thread will handle multiple sub-blocks
|
||||
const uint innerColA = threadIdx.x % BK;
|
||||
const uint innerRowA = threadIdx.x / BK;
|
||||
const uint innerColB = threadIdx.x % BN;
|
||||
const uint innerRowB = threadIdx.x / BN;
|
||||
|
||||
// allocate thread-local cache for results in registerfile
|
||||
Torus threadResults[TM] = {0};
|
||||
|
||||
auto row_A = cRow * BM + innerRowA;
|
||||
auto col_B = cCol * BN + innerColB;
|
||||
|
||||
// For each thread, loop over block tiles
|
||||
for (uint bkIdx = 0; bkIdx < K; bkIdx += BK) {
|
||||
auto col_A = bkIdx + innerColA;
|
||||
auto row_B = bkIdx + innerRowB;
|
||||
|
||||
if (row_A < M && col_A < K) {
|
||||
As[innerRowA * BK + innerColA] = A[innerRowA * K + innerColA];
|
||||
} else {
|
||||
As[innerRowA * BK + innerColA] = 0;
|
||||
}
|
||||
|
||||
if (col_B < N && row_B < K) {
|
||||
Bs[innerRowB * BN + innerColB] = B[innerRowB * stride_B + innerColB];
|
||||
} else {
|
||||
Bs[innerRowB * BN + innerColB] = 0;
|
||||
}
|
||||
__syncthreads();
|
||||
|
||||
// Advance blocktile for the next iteration of this loop
|
||||
A += BK;
|
||||
B += BK * stride_B;
|
||||
|
||||
// calculate per-thread results
|
||||
for (uint dotIdx = 0; dotIdx < BK; ++dotIdx) {
|
||||
// we make the dotproduct loop the outside loop, which facilitates
|
||||
// reuse of the Bs entry, which we can cache in a tmp var.
|
||||
Torus tmp = Bs[dotIdx * BN + threadCol];
|
||||
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
|
||||
threadResults[resIdx] +=
|
||||
As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
// Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
|
||||
// BLOCK_SIZE_GEMM)
|
||||
C += cRow * BM * N + cCol * BN;
|
||||
|
||||
// write out the results
|
||||
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
|
||||
int outRow = cRow * BM + threadRow * TM + resIdx;
|
||||
int outCol = cCol * BN + threadCol;
|
||||
|
||||
if (outRow >= M)
|
||||
continue;
|
||||
if (outCol >= N)
|
||||
continue;
|
||||
|
||||
C[(threadRow * TM + resIdx) * N + threadCol] += threadResults[resIdx];
|
||||
}
|
||||
}
|
||||
|
||||
// Finish the keyswitching operation and prepare GLWEs for accumulation.
|
||||
// 1. Finish the keyswitching computation partially performed with a GEMM:
|
||||
// - negate the dot product between the GLWE and KSK polynomial
|
||||
// - add the GLWE message for the N-th polynomial coeff in the message poly
|
||||
// 2. Rotate each of the GLWE . KSK poly dot products to
|
||||
// prepare them for accumulation into a single GLWE
|
||||
template <typename Torus>
|
||||
__global__ void polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C(
|
||||
Torus *in_glwe_buffer, Torus *out_glwe_buffer, Torus const *lwe_array,
|
||||
uint32_t lwe_dimension, uint32_t num_glwes, uint32_t polynomial_size,
|
||||
uint32_t glwe_dimension) {
|
||||
|
||||
uint32_t glwe_id = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
uint32_t degree = glwe_id; // lwe 0 rotate 0, lwe 1 rotate 1, .. , lwe
|
||||
// poly_size-1 rotate poly_size-1
|
||||
uint32_t coeffIdx = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (glwe_id >= num_glwes)
|
||||
return;
|
||||
if (coeffIdx >= polynomial_size)
|
||||
return;
|
||||
|
||||
auto in_poly =
|
||||
in_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
|
||||
auto out_result =
|
||||
out_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
|
||||
if (coeffIdx == 0) {
|
||||
// Add the message value of the input LWE (`C`) to the N-th coefficient
|
||||
// in the GLWE . KSK dot product
|
||||
|
||||
// The C is added to the first position of the last polynomial in the GLWE
|
||||
// which has (glwe_dimension+1) polynomials
|
||||
// The C value is extracted as the last value of the LWE ct. (of index
|
||||
// glwe_id) the LWEs have (polynomial_size + 1) values
|
||||
in_poly[polynomial_size * glwe_dimension] =
|
||||
lwe_array[glwe_id * (lwe_dimension + 1) + lwe_dimension] -
|
||||
in_poly[polynomial_size * glwe_dimension];
|
||||
|
||||
for (int gi = 1; gi < glwe_dimension; ++gi)
|
||||
in_poly[coeffIdx + gi * polynomial_size] =
|
||||
-in_poly[coeffIdx + gi * polynomial_size];
|
||||
|
||||
} else {
|
||||
// Otherwise simply negate the input coefficient
|
||||
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
|
||||
in_poly[coeffIdx + gi * polynomial_size] =
|
||||
-in_poly[coeffIdx + gi * polynomial_size];
|
||||
}
|
||||
// Negate all the coefficients for rotation for the first poly
|
||||
in_poly[coeffIdx] = -in_poly[coeffIdx];
|
||||
|
||||
// rotate the body
|
||||
polynomial_accumulate_monic_monomial_mul<Torus>(
|
||||
out_result, in_poly, degree, coeffIdx, polynomial_size, 1, true);
|
||||
// rotate the mask too
|
||||
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
|
||||
polynomial_accumulate_monic_monomial_mul<Torus>(
|
||||
out_result + gi * polynomial_size, in_poly + gi * polynomial_size,
|
||||
degree, coeffIdx, polynomial_size, 1, true);
|
||||
}
|
||||
|
||||
template <typename Torus, typename TorusVec>
|
||||
__host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
|
||||
Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t base_log, uint32_t level_count, uint32_t num_lwes) {
|
||||
|
||||
// Optimization of packing keyswitch when packing many LWEs
|
||||
|
||||
if (level_count > 1) {
|
||||
PANIC("Fast path PKS only supports level_count==1");
|
||||
}
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
// The fast path of PKS uses the scratch buffer (d_mem) differently than the
|
||||
// old path: it needs to store the decomposed masks in the first half of this
|
||||
// buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
|
||||
// scratch buffer for the fast path must determine the half-size of the
|
||||
// scratch buffer as the max between the size of the GLWE and the size of the
|
||||
// LWE-mask
|
||||
int memory_unit = glwe_accumulator_size > lwe_dimension
|
||||
? glwe_accumulator_size
|
||||
: lwe_dimension;
|
||||
|
||||
// ping pong the buffer between successive calls
|
||||
// split the buffer in two parts of this size
|
||||
auto d_mem_0 = (Torus *)fp_ks_buffer;
|
||||
auto d_mem_1 = d_mem_0 + num_lwes * memory_unit;
|
||||
|
||||
// Set the scratch buffer to 0 as it is used to accumulate
|
||||
// decomposition temporary results
|
||||
cuda_memset_async(d_mem_1, 0, num_lwes * memory_unit * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// decompose LWEs
|
||||
// don't decompose LWE body - the LWE has lwe_size + 1 elements. The last
|
||||
// element, the body is ignored by rounding down the number of blocks assuming
|
||||
// here that the LWE dimension is a multiple of the block size
|
||||
dim3 grid_decomp(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
|
||||
CEIL_DIV(lwe_dimension, BLOCK_SIZE_DECOMP));
|
||||
dim3 threads_decomp(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
|
||||
|
||||
// decompose first level
|
||||
decompose_vectorize_init<Torus, TorusVec>
|
||||
<<<grid_decomp, threads_decomp, 0, stream>>>(lwe_array_in, d_mem_0,
|
||||
lwe_dimension, num_lwes,
|
||||
base_log, level_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
// gemm to ks the individual LWEs to GLWEs
|
||||
dim3 grid_gemm(CEIL_DIV(glwe_accumulator_size, BLOCK_SIZE_GEMM),
|
||||
CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
|
||||
dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);
|
||||
|
||||
auto stride_KSK_buffer = glwe_accumulator_size;
|
||||
|
||||
uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
|
||||
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
|
||||
num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0, fp_ksk_array,
|
||||
stride_KSK_buffer, d_mem_1);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
/*
|
||||
TODO: transpose key to generalize to level_count > 1
|
||||
|
||||
for (int li = 1; li < level_count; ++li) {
|
||||
decompose_vectorize_step_inplace<Torus, TorusVec>
|
||||
<<<grid_decomp, threads_decomp, 0, stream>>>(
|
||||
d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size,
|
||||
stream>>>( num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
|
||||
fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
*/
|
||||
|
||||
// should we include the mask in the rotation ??
|
||||
dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
|
||||
CEIL_DIV(polynomial_size, BLOCK_SIZE_DECOMP));
|
||||
dim3 threads_rotate(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
|
||||
// rotate the GLWEs
|
||||
polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C<Torus>
|
||||
<<<grid_rotate, threads_rotate, 0, stream>>>(
|
||||
d_mem_1, d_mem_0, lwe_array_in, lwe_dimension, num_lwes,
|
||||
polynomial_size, glwe_dimension);
|
||||
check_cuda_error(cudaGetLastError());
|
||||
|
||||
dim3 grid_accumulate(
|
||||
CEIL_DIV(polynomial_size * (glwe_dimension + 1), BLOCK_SIZE_DECOMP));
|
||||
dim3 threads_accum(BLOCK_SIZE_DECOMP);
|
||||
|
||||
// accumulate to a single glwe
|
||||
accumulate_glwes<Torus><<<grid_accumulate, threads_accum, 0, stream>>>(
|
||||
glwe_out, d_mem_0, glwe_dimension, polynomial_size, num_lwes);
|
||||
|
||||
check_cuda_error(cudaGetLastError());
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,8 +1,6 @@
|
||||
#include "fast_packing_keyswitch.cuh"
|
||||
#include "keyswitch.cuh"
|
||||
#include "keyswitch.h"
|
||||
#include <cstdint>
|
||||
#include <stdio.h>
|
||||
|
||||
/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
|
||||
* Head out to the equivalent operation on 64 bits for more details.
|
||||
@@ -55,17 +53,15 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
|
||||
|
||||
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory) {
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
|
||||
bool allocate_gpu_memory) {
|
||||
scratch_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer, lwe_dimension,
|
||||
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer,
|
||||
glwe_dimension, polynomial_size, num_lwes, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
/* Perform functional packing keyswitch on a batch of 64 bits input LWE
|
||||
* ciphertexts.
|
||||
*/
|
||||
|
||||
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
void *stream, uint32_t gpu_index, void *glwe_array_out,
|
||||
void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
|
||||
@@ -73,24 +69,13 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
|
||||
uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
|
||||
uint32_t num_lwes) {
|
||||
|
||||
if (can_use_pks_fast_path(input_lwe_dimension, num_lwes,
|
||||
output_polynomial_size, level_count,
|
||||
output_glwe_dimension)) {
|
||||
host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(glwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in),
|
||||
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
|
||||
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
|
||||
base_log, level_count, num_lwes);
|
||||
} else
|
||||
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(glwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in),
|
||||
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
|
||||
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
|
||||
base_log, level_count, num_lwes);
|
||||
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index,
|
||||
static_cast<uint64_t *>(glwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in),
|
||||
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
|
||||
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
|
||||
base_log, level_count, num_lwes);
|
||||
}
|
||||
|
||||
void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,
|
||||
|
||||
@@ -158,20 +158,16 @@ void execute_keyswitch_async(cudaStream_t const *streams,
|
||||
template <typename Torus>
|
||||
__host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
|
||||
cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
|
||||
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t num_lwes, bool allocate_gpu_memory) {
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
|
||||
bool allocate_gpu_memory) {
|
||||
cudaSetDevice(gpu_index);
|
||||
|
||||
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
|
||||
|
||||
int memory_unit = glwe_accumulator_size > lwe_dimension
|
||||
? glwe_accumulator_size
|
||||
: lwe_dimension;
|
||||
|
||||
if (allocate_gpu_memory) {
|
||||
if (allocate_gpu_memory)
|
||||
*fp_ks_buffer = (int8_t *)cuda_malloc_async(
|
||||
2 * num_lwes * memory_unit * sizeof(Torus), stream, gpu_index);
|
||||
}
|
||||
2 * num_lwes * glwe_accumulator_size * sizeof(Torus), stream,
|
||||
gpu_index);
|
||||
}
|
||||
|
||||
// public functional packing keyswitch for a single LWE ciphertext
|
||||
@@ -245,7 +241,6 @@ __global__ void packing_keyswitch_lwe_list_to_glwe(
|
||||
auto lwe_in = lwe_array_in + input_id * lwe_size;
|
||||
auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
|
||||
auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
|
||||
|
||||
// KS LWE to GLWE
|
||||
packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
|
||||
ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
|
||||
@@ -298,18 +293,8 @@ __host__ void host_packing_keyswitch_lwe_list_to_glwe(
|
||||
dim3 grid(num_blocks, num_lwes);
|
||||
dim3 threads(num_threads);
|
||||
|
||||
// The fast path of PKS uses the scratch buffer (d_mem) differently:
|
||||
// it needs to store the decomposed masks in the first half of this buffer
|
||||
// and the keyswitched GLWEs in the second half of the buffer. Thus the
|
||||
// scratch buffer for the fast path must determine the half-size of the
|
||||
// scratch buffer as the max between the size of the GLWE and the size of the
|
||||
// LWE-mask
|
||||
int memory_unit = glwe_accumulator_size > lwe_dimension_in
|
||||
? glwe_accumulator_size
|
||||
: lwe_dimension_in;
|
||||
|
||||
auto d_mem = (Torus *)fp_ks_buffer;
|
||||
auto d_tmp_glwe_array_out = d_mem + num_lwes * memory_unit;
|
||||
auto d_tmp_glwe_array_out = d_mem + num_lwes * glwe_accumulator_size;
|
||||
|
||||
// individually keyswitch each lwe
|
||||
packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(
|
||||
|
||||
@@ -37,32 +37,39 @@ __host__ void host_integer_radix_cmux_kb(
|
||||
uint32_t num_radix_blocks) {
|
||||
|
||||
auto params = mem_ptr->params;
|
||||
Torus lwe_size = params.big_lwe_dimension + 1;
|
||||
Torus radix_lwe_size = lwe_size * num_radix_blocks;
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in, lwe_array_true,
|
||||
radix_lwe_size * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in + radix_lwe_size,
|
||||
lwe_array_false, radix_lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
|
||||
cuda_memcpy_async_gpu_to_gpu(mem_ptr->condition_array + i * lwe_size,
|
||||
lwe_condition, lwe_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
|
||||
// Since our CPU threads will be working on different streams we shall assert
|
||||
// the work in the main stream is completed
|
||||
auto true_streams = mem_ptr->zero_if_true_buffer->true_streams;
|
||||
auto false_streams = mem_ptr->zero_if_false_buffer->false_streams;
|
||||
for (uint j = 0; j < gpu_count; j++) {
|
||||
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
|
||||
}
|
||||
|
||||
auto mem_true = mem_ptr->zero_if_true_buffer;
|
||||
zero_out_if<Torus>(true_streams, gpu_indexes, gpu_count, mem_ptr->tmp_true_ct,
|
||||
lwe_array_true, lwe_condition, mem_true,
|
||||
mem_ptr->inverted_predicate_lut, bsks, ksks,
|
||||
num_radix_blocks);
|
||||
auto mem_false = mem_ptr->zero_if_false_buffer;
|
||||
zero_out_if<Torus>(false_streams, gpu_indexes, gpu_count,
|
||||
mem_ptr->tmp_false_ct, lwe_array_false, lwe_condition,
|
||||
mem_false, mem_ptr->predicate_lut, bsks, ksks,
|
||||
num_radix_blocks);
|
||||
for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
|
||||
}
|
||||
for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
|
||||
}
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
|
||||
mem_ptr->condition_array, bsks, ksks, 2 * num_radix_blocks,
|
||||
mem_ptr->predicate_lut, params.message_modulus);
|
||||
|
||||
// If the condition was true, true_ct will have kept its value and false_ct
|
||||
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
|
||||
// have kept its value
|
||||
auto mem_true = mem_ptr->buffer_out;
|
||||
auto mem_false = &mem_ptr->buffer_out[radix_lwe_size];
|
||||
auto added_cts = mem_true;
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
|
||||
mem_false, params.big_lwe_dimension, num_radix_blocks);
|
||||
auto added_cts = mem_ptr->tmp_true_ct;
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts,
|
||||
mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
|
||||
params.big_lwe_dimension, num_radix_blocks);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,
|
||||
|
||||
@@ -58,9 +58,6 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
case GE:
|
||||
case LT:
|
||||
case LE:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (comparisons): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -71,8 +68,6 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (num_radix_blocks % 2 != 0)
|
||||
PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
|
||||
host_integer_radix_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -94,91 +89,3 @@ void cleanup_cuda_integer_comparison(void *const *streams,
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
|
||||
false, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_are_all_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
big_lwe_dimension, small_lwe_dimension, ks_level,
|
||||
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
|
||||
false, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
|
||||
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *buffer =
|
||||
(int_comparison_buffer<uint64_t> *)mem_ptr;
|
||||
|
||||
host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
|
||||
(uint64_t **)(ksks), num_radix_blocks);
|
||||
}
|
||||
|
||||
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr_void) {
|
||||
|
||||
int_comparison_buffer<uint64_t> *mem_ptr =
|
||||
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
|
||||
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
|
||||
}
|
||||
|
||||
@@ -58,7 +58,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
|
||||
template <typename Torus>
|
||||
__host__ void are_all_comparisons_block_true(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
@@ -85,19 +85,16 @@ __host__ void are_all_comparisons_block_true(
|
||||
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
uint32_t chunk_length = std::min(max_value, remaining_blocks);
|
||||
int num_chunks = remaining_blocks / chunk_length;
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = tmp_out;
|
||||
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
|
||||
auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
auto is_equal_to_num_blocks_map =
|
||||
&are_all_block_true_buffer->is_equal_to_lut_map;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension,
|
||||
chunk_length);
|
||||
@@ -114,33 +111,29 @@ __host__ void are_all_comparisons_block_true(
|
||||
// is_non_zero_lut_buffer LUT
|
||||
lut = mem_ptr->eq_buffer->is_non_zero_lut;
|
||||
} else {
|
||||
if (chunk_lengths[num_chunks - 1] != max_value) {
|
||||
if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
|
||||
(*is_equal_to_num_blocks_map).end()) {
|
||||
// The LUT is already computed
|
||||
lut = (*is_equal_to_num_blocks_map)[chunk_length];
|
||||
} else {
|
||||
// LUT needs to be computed
|
||||
uint32_t chunk_length = chunk_lengths[num_chunks - 1];
|
||||
auto new_lut =
|
||||
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
max_value, num_radix_blocks, true);
|
||||
|
||||
auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
|
||||
return x == chunk_length;
|
||||
};
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
|
||||
streams[0], gpu_indexes[0], new_lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
is_equal_to_num_blocks_lut_f);
|
||||
|
||||
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
|
||||
for (int index = 0; index < num_chunks; index++) {
|
||||
if (index == num_chunks - 1) {
|
||||
h_lut_indexes[index] = 1;
|
||||
} else {
|
||||
h_lut_indexes[index] = 0;
|
||||
}
|
||||
}
|
||||
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
|
||||
h_lut_indexes, num_chunks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
free(h_lut_indexes);
|
||||
new_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
(*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
|
||||
lut = new_lut;
|
||||
}
|
||||
lut = is_max_value_lut;
|
||||
}
|
||||
|
||||
// Applies the LUT
|
||||
@@ -167,7 +160,7 @@ __host__ void are_all_comparisons_block_true(
|
||||
template <typename Torus>
|
||||
__host__ void is_at_least_one_comparisons_block_true(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
@@ -189,18 +182,14 @@ __host__ void is_at_least_one_comparisons_block_true(
|
||||
uint32_t remaining_blocks = num_radix_blocks;
|
||||
while (remaining_blocks > 0) {
|
||||
// Split in max_value chunks
|
||||
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
|
||||
uint32_t chunk_length = std::min(max_value, remaining_blocks);
|
||||
int num_chunks = remaining_blocks / chunk_length;
|
||||
|
||||
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
|
||||
// as in the worst case we will be adding `max_value` ones
|
||||
auto input_blocks = mem_ptr->tmp_lwe_array_out;
|
||||
auto accumulator = buffer->tmp_block_accumulated;
|
||||
uint32_t chunk_lengths[num_chunks];
|
||||
auto begin_remaining_blocks = remaining_blocks;
|
||||
for (int i = 0; i < num_chunks; i++) {
|
||||
uint32_t chunk_length =
|
||||
std::min(max_value, begin_remaining_blocks - i * max_value);
|
||||
chunk_lengths[i] = chunk_length;
|
||||
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
|
||||
input_blocks, big_lwe_dimension,
|
||||
chunk_length);
|
||||
@@ -460,9 +449,9 @@ __host__ void tree_sign_reduction(
|
||||
f = sign_handler_f;
|
||||
}
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
streams[0], gpu_indexes[0], last_lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus, f);
|
||||
last_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
// Last leaf
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
@@ -492,9 +481,8 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
if (carry_modulus >= message_modulus) {
|
||||
// Packing is possible
|
||||
// Pack inputs
|
||||
Torus *packed_left = diff_buffer->tmp_packed;
|
||||
Torus *packed_right =
|
||||
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *packed_left = diff_buffer->tmp_packed_left;
|
||||
Torus *packed_right = diff_buffer->tmp_packed_right;
|
||||
// In case the ciphertext is signed, the sign block and the one before it
|
||||
// are handled separately
|
||||
if (mem_ptr->is_signed) {
|
||||
@@ -513,7 +501,10 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
|
||||
2 * packed_num_radix_blocks, identity_lut);
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
|
||||
packed_num_radix_blocks, identity_lut);
|
||||
|
||||
lhs = packed_left;
|
||||
rhs = packed_right;
|
||||
@@ -542,13 +533,11 @@ __host__ void host_integer_radix_difference_check_kb(
|
||||
|
||||
// Compare the last block before the sign block separately
|
||||
auto identity_lut = mem_ptr->identity_lut;
|
||||
Torus *packed_left = diff_buffer->tmp_packed;
|
||||
Torus *packed_right =
|
||||
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *last_left_block_before_sign_block =
|
||||
packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
|
||||
Torus *last_right_block_before_sign_block =
|
||||
packed_right + packed_num_radix_blocks * big_lwe_size;
|
||||
diff_buffer->tmp_packed_right +
|
||||
packed_num_radix_blocks * big_lwe_size;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
|
||||
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
|
||||
@@ -626,35 +615,4 @@ __host__ void host_integer_radix_maxmin_kb(
|
||||
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_are_all_comparisons_block_true_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto eq_buffer = mem_ptr->eq_buffer;
|
||||
|
||||
// It returns a block encrypting 1 if all input blocks are 1
|
||||
// otherwise the block encrypts 0
|
||||
are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
|
||||
lwe_array_out, lwe_array_in, mem_ptr,
|
||||
bsks, ksks, num_radix_blocks);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
__host__ void host_integer_is_at_least_one_comparisons_block_true_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
|
||||
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
|
||||
Torus *const *ksks, uint32_t num_radix_blocks) {
|
||||
|
||||
auto eq_buffer = mem_ptr->eq_buffer;
|
||||
|
||||
// It returns a block encrypting 1 if all input blocks are 1
|
||||
// otherwise the block encrypts 0
|
||||
is_at_least_one_comparisons_block_true<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_in, mem_ptr,
|
||||
bsks, ksks, num_radix_blocks);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
#define CUDA_INTEGER_COMPRESSION_CUH
|
||||
|
||||
#include "ciphertext.h"
|
||||
#include "crypto/fast_packing_keyswitch.cuh"
|
||||
#include "crypto/keyswitch.cuh"
|
||||
#include "device.h"
|
||||
#include "integer/compression/compression.h"
|
||||
@@ -117,21 +116,11 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
while (rem_lwes > 0) {
|
||||
auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);
|
||||
|
||||
if (can_use_pks_fast_path(
|
||||
input_lwe_dimension, chunk_size, compression_params.polynomial_size,
|
||||
compression_params.ks_level, compression_params.glwe_dimension)) {
|
||||
host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
|
||||
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
|
||||
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
|
||||
compression_params.polynomial_size, compression_params.ks_base_log,
|
||||
compression_params.ks_level, chunk_size);
|
||||
} else {
|
||||
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
|
||||
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
|
||||
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
|
||||
compression_params.polynomial_size, compression_params.ks_base_log,
|
||||
compression_params.ks_level, chunk_size);
|
||||
}
|
||||
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
|
||||
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
|
||||
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
|
||||
compression_params.polynomial_size, compression_params.ks_base_log,
|
||||
compression_params.ks_level, chunk_size);
|
||||
|
||||
rem_lwes -= chunk_size;
|
||||
lwe_subset += chunk_size * lwe_in_size;
|
||||
@@ -311,7 +300,7 @@ __host__ void host_integer_decompress(
|
||||
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
|
||||
/// dimension to a big LWE dimension
|
||||
auto encryption_params = h_mem_ptr->encryption_params;
|
||||
auto lut = h_mem_ptr->decompression_rescale_lut;
|
||||
auto lut = h_mem_ptr->carry_extract_lut;
|
||||
auto active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
|
||||
if (active_gpu_count == 1) {
|
||||
execute_pbs_async<Torus>(
|
||||
|
||||
@@ -198,27 +198,6 @@ void scratch_cuda_apply_univariate_lut_kb_64(
|
||||
allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
|
||||
uint32_t num_many_lut, bool allocate_gpu_memory) {
|
||||
|
||||
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
|
||||
glwe_dimension * polynomial_size, lwe_dimension,
|
||||
ks_level, ks_base_log, pbs_level, pbs_base_log,
|
||||
grouping_factor, message_modulus, carry_modulus);
|
||||
|
||||
scratch_cuda_apply_many_univariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
num_many_lut, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_univariate_lut_kb_64(void *const *streams,
|
||||
uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, void *output_radix_lwe,
|
||||
@@ -258,7 +237,7 @@ void cuda_apply_many_univariate_lut_kb_64(
|
||||
|
||||
void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
|
||||
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
|
||||
int8_t **mem_ptr, void *input_lut, uint32_t lwe_dimension,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
|
||||
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
|
||||
uint32_t grouping_factor, uint32_t num_radix_blocks,
|
||||
@@ -272,9 +251,8 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
|
||||
|
||||
scratch_cuda_apply_bivariate_lut_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
(int_radix_lut<uint64_t> **)mem_ptr,
|
||||
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
|
||||
allocate_gpu_memory);
|
||||
(int_radix_lut<uint64_t> **)mem_ptr, static_cast<uint64_t *>(input_lut),
|
||||
num_radix_blocks, params, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
void cuda_apply_bivariate_lut_kb_64(
|
||||
|
||||
@@ -627,48 +627,26 @@ void rotate_left(Torus *buffer, int mid, uint32_t array_length) {
|
||||
std::rotate(buffer, buffer + mid, buffer + array_length);
|
||||
}
|
||||
|
||||
/// Caller needs to ensure that the operation applied is coherent from an
|
||||
/// encoding perspective.
|
||||
///
|
||||
/// For example:
|
||||
///
|
||||
/// Input encoding has 2 bits and output encoding has 4 bits, applying the
|
||||
/// identity lut would map the following:
|
||||
///
|
||||
/// 0|00|xx -> 0|00|00
|
||||
/// 0|01|xx -> 0|00|01
|
||||
/// 0|10|xx -> 0|00|10
|
||||
/// 0|11|xx -> 0|00|11
|
||||
///
|
||||
/// The reason is the identity function is computed in the input space but the
|
||||
/// scaling is done in the output space, as there are more bits in the output
|
||||
/// space, the delta is smaller hence the apparent "division" happening.
|
||||
template <typename Torus>
|
||||
void generate_lookup_table_with_encoding(Torus *acc, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size,
|
||||
uint32_t input_message_modulus,
|
||||
uint32_t input_carry_modulus,
|
||||
uint32_t output_message_modulus,
|
||||
uint32_t output_carry_modulus,
|
||||
std::function<Torus(Torus)> f) {
|
||||
void generate_lookup_table(Torus *acc, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t message_modulus,
|
||||
uint32_t carry_modulus,
|
||||
std::function<Torus(Torus)> f) {
|
||||
|
||||
uint32_t input_modulus_sup = input_message_modulus * input_carry_modulus;
|
||||
uint32_t output_modulus_sup = output_message_modulus * output_carry_modulus;
|
||||
uint32_t box_size = polynomial_size / input_modulus_sup;
|
||||
auto nbits = sizeof(Torus) * 8;
|
||||
Torus output_delta =
|
||||
(static_cast<Torus>(1) << (nbits - 1)) / output_modulus_sup;
|
||||
uint32_t modulus_sup = message_modulus * carry_modulus;
|
||||
uint32_t box_size = polynomial_size / modulus_sup;
|
||||
Torus delta = (1ul << 63) / modulus_sup;
|
||||
|
||||
memset(acc, 0, glwe_dimension * polynomial_size * sizeof(Torus));
|
||||
|
||||
auto body = &acc[glwe_dimension * polynomial_size];
|
||||
|
||||
// This accumulator extracts the carry bits
|
||||
for (int i = 0; i < input_modulus_sup; i++) {
|
||||
for (int i = 0; i < modulus_sup; i++) {
|
||||
int index = i * box_size;
|
||||
for (int j = index; j < index + box_size; j++) {
|
||||
auto f_eval = f(i);
|
||||
body[j] = f_eval * output_delta;
|
||||
body[j] = f_eval * delta;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -682,16 +660,6 @@ void generate_lookup_table_with_encoding(Torus *acc, uint32_t glwe_dimension,
|
||||
rotate_left<Torus>(body, half_box_size, polynomial_size);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void generate_lookup_table(Torus *acc, uint32_t glwe_dimension,
|
||||
uint32_t polynomial_size, uint32_t message_modulus,
|
||||
uint32_t carry_modulus,
|
||||
std::function<Torus(Torus)> f) {
|
||||
generate_lookup_table_with_encoding(acc, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus,
|
||||
message_modulus, carry_modulus, f);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void generate_many_lookup_table(
|
||||
Torus *acc, uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
@@ -700,8 +668,7 @@ void generate_many_lookup_table(
|
||||
|
||||
uint32_t modulus_sup = message_modulus * carry_modulus;
|
||||
uint32_t box_size = polynomial_size / modulus_sup;
|
||||
auto nbits = sizeof(Torus) * 8;
|
||||
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) / modulus_sup;
|
||||
Torus delta = (1ul << 63) / modulus_sup;
|
||||
|
||||
memset(acc, 0, glwe_dimension * polynomial_size * sizeof(Torus));
|
||||
|
||||
@@ -836,32 +803,6 @@ void generate_device_accumulator_bivariate_with_factor(
|
||||
free(h_lut);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void generate_device_accumulator_with_encoding(
|
||||
cudaStream_t stream, uint32_t gpu_index, Torus *acc,
|
||||
uint32_t glwe_dimension, uint32_t polynomial_size,
|
||||
uint32_t input_message_modulus, uint32_t input_carry_modulus,
|
||||
uint32_t output_message_modulus, uint32_t output_carry_modulus,
|
||||
std::function<Torus(Torus)> f) {
|
||||
|
||||
// host lut
|
||||
Torus *h_lut =
|
||||
(Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus));
|
||||
|
||||
// fill accumulator
|
||||
generate_lookup_table_with_encoding<Torus>(
|
||||
h_lut, glwe_dimension, polynomial_size, input_message_modulus,
|
||||
input_carry_modulus, output_message_modulus, output_carry_modulus, f);
|
||||
|
||||
// copy host lut and lut_indexes_vec to device
|
||||
cuda_memcpy_async_to_gpu(
|
||||
acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus),
|
||||
stream, gpu_index);
|
||||
|
||||
cuda_synchronize_stream(stream, gpu_index);
|
||||
free(h_lut);
|
||||
}
|
||||
|
||||
/*
|
||||
* generate accumulator for device pointer
|
||||
* v_stream - cuda stream
|
||||
@@ -877,9 +818,21 @@ void generate_device_accumulator(cudaStream_t stream, uint32_t gpu_index,
|
||||
uint32_t carry_modulus,
|
||||
std::function<Torus(Torus)> f) {
|
||||
|
||||
generate_device_accumulator_with_encoding(
|
||||
stream, gpu_index, acc, glwe_dimension, polynomial_size, message_modulus,
|
||||
carry_modulus, message_modulus, carry_modulus, f);
|
||||
// host lut
|
||||
Torus *h_lut =
|
||||
(Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus));
|
||||
|
||||
// fill accumulator
|
||||
generate_lookup_table<Torus>(h_lut, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, f);
|
||||
|
||||
// copy host lut and lut_indexes_vec to device
|
||||
cuda_memcpy_async_to_gpu(
|
||||
acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus),
|
||||
stream, gpu_index);
|
||||
|
||||
cuda_synchronize_stream(stream, gpu_index);
|
||||
free(h_lut);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1102,8 +1055,7 @@ void host_compute_propagation_simulators_and_group_carries(
|
||||
message_modulus, carry_modulus);
|
||||
|
||||
uint32_t modulus_sup = message_modulus * carry_modulus;
|
||||
auto nbits = sizeof(Torus) * 8;
|
||||
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) / modulus_sup;
|
||||
Torus delta = (1ull << 63) / modulus_sup;
|
||||
auto simulators = mem->simulators;
|
||||
auto grouping_pgns = mem->grouping_pgns;
|
||||
host_radix_split_simulators_and_grouping_pgns<Torus>(
|
||||
@@ -1430,8 +1382,8 @@ __host__ void
|
||||
create_trivial_radix(cudaStream_t stream, uint32_t gpu_index,
|
||||
Torus *lwe_array_out, Torus const *scalar_array,
|
||||
uint32_t lwe_dimension, uint32_t num_radix_blocks,
|
||||
uint32_t num_scalar_blocks, Torus message_modulus,
|
||||
Torus carry_modulus) {
|
||||
uint32_t num_scalar_blocks, uint64_t message_modulus,
|
||||
uint64_t carry_modulus) {
|
||||
|
||||
cudaSetDevice(gpu_index);
|
||||
size_t radix_size = (lwe_dimension + 1) * num_radix_blocks;
|
||||
@@ -1451,9 +1403,7 @@ create_trivial_radix(cudaStream_t stream, uint32_t gpu_index,
|
||||
// Value of the shift we multiply our messages by
|
||||
// If message_modulus and carry_modulus are always powers of 2 we can simplify
|
||||
// this
|
||||
auto nbits = sizeof(Torus) * 8;
|
||||
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) /
|
||||
(message_modulus * carry_modulus);
|
||||
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
|
||||
|
||||
device_create_trivial_radix<Torus><<<grid, thds, 0, stream>>>(
|
||||
lwe_array_out, scalar_array, num_scalar_blocks, lwe_dimension, delta);
|
||||
@@ -1513,10 +1463,10 @@ reduce_signs(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
if (num_sign_blocks > 2) {
|
||||
auto lut = diff_buffer->reduce_signs_lut;
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
reduce_two_orderings_function);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
while (num_sign_blocks > 2) {
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
|
||||
@@ -1547,9 +1497,10 @@ reduce_signs(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
|
||||
auto lut = diff_buffer->reduce_signs_lut;
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, final_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
final_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
|
||||
big_lwe_dimension, 2, 4);
|
||||
@@ -1566,9 +1517,10 @@ reduce_signs(cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->reduce_signs_lut;
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus, final_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
final_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, signs_array_out, signs_a, bsks, ksks,
|
||||
@@ -1587,11 +1539,11 @@ void scratch_cuda_apply_univariate_lut_kb(
|
||||
1, num_radix_blocks, allocate_gpu_memory);
|
||||
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
|
||||
// 0
|
||||
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
|
||||
(params.glwe_dimension + 1) *
|
||||
params.polynomial_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
|
||||
cuda_memcpy_async_to_gpu(
|
||||
(*mem_ptr)->get_lut(gpu_indexes[0], 0), (void *)input_lut,
|
||||
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -1607,25 +1559,6 @@ void host_apply_univariate_lut_kb(cudaStream_t const *streams,
|
||||
num_blocks, mem);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void scratch_cuda_apply_many_univariate_lut_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
uint32_t gpu_count, int_radix_lut<Torus> **mem_ptr, Torus const *input_lut,
|
||||
uint32_t num_radix_blocks, int_radix_params params, uint32_t num_many_lut,
|
||||
bool allocate_gpu_memory) {
|
||||
|
||||
*mem_ptr = new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
|
||||
1, num_radix_blocks, num_many_lut,
|
||||
allocate_gpu_memory);
|
||||
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
|
||||
// 0
|
||||
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
|
||||
(params.glwe_dimension + 1) *
|
||||
params.polynomial_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
void host_apply_many_univariate_lut_kb(
|
||||
cudaStream_t const *streams, uint32_t const *gpu_indexes,
|
||||
@@ -1649,11 +1582,11 @@ void scratch_cuda_apply_bivariate_lut_kb(
|
||||
1, num_radix_blocks, allocate_gpu_memory);
|
||||
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
|
||||
// 0
|
||||
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
|
||||
(params.glwe_dimension + 1) *
|
||||
params.polynomial_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
|
||||
cuda_memcpy_async_to_gpu(
|
||||
(*mem_ptr)->get_lut(gpu_indexes[0], 0), (void *)input_lut,
|
||||
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
}
|
||||
|
||||
template <typename Torus>
|
||||
@@ -1693,12 +1626,13 @@ void host_propagate_single_carry(cudaStream_t const *streams,
|
||||
auto params = mem->params;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
auto message_modulus = params.message_modulus;
|
||||
auto carry_modulus = params.carry_modulus;
|
||||
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
|
||||
auto lut_stride = mem->lut_stride;
|
||||
auto num_many_lut = mem->num_many_lut;
|
||||
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW)
|
||||
PANIC("Cuda error: single carry propagation is not supported for overflow, "
|
||||
"try using add_and_propagate_single_carry");
|
||||
@@ -1715,7 +1649,7 @@ void host_propagate_single_carry(cudaStream_t const *streams,
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_CARRY) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
// Step 2
|
||||
@@ -1735,40 +1669,45 @@ void host_propagate_single_carry(cudaStream_t const *streams,
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
|
||||
requested_flag == outputFlag::FLAG_CARRY) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
|
||||
mem->output_flag,
|
||||
mem->prop_simu_group_carries_mem->simulators +
|
||||
(num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
}
|
||||
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
// Step 3
|
||||
// Add carries and cleanup OutputFlag::None
|
||||
host_radix_sum_in_groups<Torus>(
|
||||
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
|
||||
big_lwe_size, group_size);
|
||||
|
||||
auto message_extract = mem->lut_message_extract;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_1, gpu_indexes, gpu_count, lwe_array, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks, message_extract);
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_CARRY) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
|
||||
mem->output_flag, mem->output_flag,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries +
|
||||
(mem->num_groups - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
|
||||
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
|
||||
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lwe_array, mem->output_flag,
|
||||
big_lwe_size_bytes * num_radix_blocks,
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
} else {
|
||||
auto message_extract = mem->lut_message_extract;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array, prepared_blocks, bsks, ksks,
|
||||
num_radix_blocks, message_extract);
|
||||
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
|
||||
big_lwe_size_bytes, mem->sub_streams_2[0],
|
||||
gpu_indexes[0]);
|
||||
}
|
||||
for (int j = 0; j < mem->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1784,12 +1723,13 @@ void host_add_and_propagate_single_carry(
|
||||
auto params = mem->params;
|
||||
auto glwe_dimension = params.glwe_dimension;
|
||||
auto polynomial_size = params.polynomial_size;
|
||||
auto message_modulus = params.message_modulus;
|
||||
auto carry_modulus = params.carry_modulus;
|
||||
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
|
||||
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
|
||||
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
|
||||
auto lut_stride = mem->lut_stride;
|
||||
auto num_many_lut = mem->num_many_lut;
|
||||
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
@@ -1816,12 +1756,12 @@ void host_add_and_propagate_single_carry(
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
|
||||
auto lut_overflow_prep = mem->lut_overflow_flag_prep;
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, output_flag, mem->last_lhs,
|
||||
streams, gpu_indexes, gpu_count, mem->output_flag, mem->last_lhs,
|
||||
mem->last_rhs, bsks, ksks, 1, lut_overflow_prep,
|
||||
lut_overflow_prep->params.message_modulus);
|
||||
} else if (requested_flag == outputFlag::FLAG_CARRY) {
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
}
|
||||
|
||||
@@ -1842,50 +1782,58 @@ void host_add_and_propagate_single_carry(
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
|
||||
requested_flag == outputFlag::FLAG_CARRY) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
|
||||
mem->output_flag,
|
||||
mem->prop_simu_group_carries_mem->simulators +
|
||||
(num_radix_blocks - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
}
|
||||
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
// Step 3
|
||||
// Add carries and cleanup OutputFlag::None
|
||||
host_radix_sum_in_groups<Torus>(
|
||||
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
|
||||
big_lwe_size, group_size);
|
||||
|
||||
auto message_extract = mem->lut_message_extract;
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_1, gpu_indexes, gpu_count, lhs_array, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks, message_extract);
|
||||
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
|
||||
requested_flag == outputFlag::FLAG_CARRY) {
|
||||
if (num_radix_blocks == 1 && requested_flag == outputFlag::FLAG_OVERFLOW &&
|
||||
uses_carry == 1) {
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
input_carries, big_lwe_dimension, 1);
|
||||
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
|
||||
mem->output_flag, mem->output_flag, input_carries,
|
||||
big_lwe_dimension, 1);
|
||||
|
||||
} else {
|
||||
|
||||
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
|
||||
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
|
||||
mem->output_flag, mem->output_flag,
|
||||
mem->prop_simu_group_carries_mem->resolved_carries +
|
||||
(mem->num_groups - 1) * big_lwe_size,
|
||||
big_lwe_dimension, 1);
|
||||
}
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
|
||||
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(lhs_array, mem->output_flag,
|
||||
big_lwe_size_bytes * num_radix_blocks,
|
||||
streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
|
||||
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
|
||||
} else {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lhs_array, prepared_blocks, bsks, ksks,
|
||||
num_radix_blocks, mem->lut_message_extract);
|
||||
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
|
||||
mem->output_flag, bsks, ksks, 1, mem->lut_overflow_flag_last);
|
||||
} else {
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
|
||||
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
|
||||
}
|
||||
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
|
||||
big_lwe_size_bytes, mem->sub_streams_2[0],
|
||||
gpu_indexes[0]);
|
||||
}
|
||||
for (int j = 0; j < mem->active_gpu_count; j++) {
|
||||
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
|
||||
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -267,8 +267,8 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
streams, gpu_indexes, gpu_count, mem_ptr->params, 2,
|
||||
2 * ch_amount * num_blocks, reused_lut);
|
||||
}
|
||||
auto message_acc = luts_message_carry->get_lut(0, 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(0, 1);
|
||||
auto message_acc = luts_message_carry->get_lut(gpu_indexes[0], 0);
|
||||
auto carry_acc = luts_message_carry->get_lut(gpu_indexes[0], 1);
|
||||
|
||||
// define functions for each accumulator
|
||||
auto lut_f_message = [message_modulus](Torus x) -> Torus {
|
||||
@@ -285,7 +285,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], carry_acc, glwe_dimension, polynomial_size,
|
||||
message_modulus, carry_modulus, lut_f_carry);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
while (r > 2) {
|
||||
size_t cur_total_blocks = r * num_blocks;
|
||||
@@ -334,10 +334,10 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
|
||||
if (carry_count > 0)
|
||||
cuda_set_value_async<Torus>(
|
||||
streams[0], gpu_indexes[0],
|
||||
luts_message_carry->get_lut_indexes(0, message_count), 1,
|
||||
luts_message_carry->get_lut_indexes(gpu_indexes[0], message_count), 1,
|
||||
carry_count);
|
||||
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
|
||||
luts_message_carry->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
/// For multi GPU execution we create vectors of pointers for inputs and
|
||||
/// outputs
|
||||
|
||||
@@ -31,10 +31,10 @@ __host__ void host_integer_radix_scalar_bitop_kb(
|
||||
} else {
|
||||
// We have all possible LUTs pre-computed and we use the decomposed scalar
|
||||
// as index to recover the right one
|
||||
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks,
|
||||
num_clear_blocks * sizeof(Torus), streams[0],
|
||||
gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(gpu_indexes[0], 0),
|
||||
clear_blocks, num_clear_blocks * sizeof(Torus),
|
||||
streams[0], gpu_indexes[0]);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_input, bsks,
|
||||
|
||||
@@ -22,9 +22,6 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
|
||||
case GE:
|
||||
case LT:
|
||||
case LE:
|
||||
if (lwe_ciphertext_count % 2 != 0)
|
||||
PANIC("Cuda error (scalar comparisons): the number of radix blocks has "
|
||||
"to be even.")
|
||||
host_integer_radix_scalar_difference_check_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
@@ -35,9 +32,6 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
|
||||
break;
|
||||
case MAX:
|
||||
case MIN:
|
||||
if (lwe_ciphertext_count % 2 != 0)
|
||||
PANIC("Cuda error (scalar max/min): the number of radix blocks has to be "
|
||||
"even.")
|
||||
host_integer_radix_scalar_maxmin_kb<uint64_t>(
|
||||
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
|
||||
static_cast<uint64_t *>(lwe_array_out),
|
||||
|
||||
@@ -110,11 +110,11 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
};
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator<Torus>(streams[0], gpu_indexes[0],
|
||||
lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus,
|
||||
carry_modulus, scalar_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
generate_device_accumulator<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out,
|
||||
@@ -141,9 +141,8 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
|
||||
//////////////
|
||||
// lsb
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -195,10 +194,10 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
|
||||
auto lut = diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_lsb_out,
|
||||
@@ -211,9 +210,8 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
|
||||
uint32_t num_lsb_radix_blocks = total_num_radix_blocks;
|
||||
uint32_t num_scalar_blocks = total_num_scalar_blocks;
|
||||
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
|
||||
pack_blocks<Torus>(streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -331,10 +329,10 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
|
||||
polynomial_size, message_modulus, carry_modulus,
|
||||
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
|
||||
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
|
||||
scalar_bivariate_last_leaf_lut_f);
|
||||
lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
streams, gpu_indexes, gpu_count, lwe_array_out, are_all_msb_zeros,
|
||||
@@ -360,9 +358,8 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
//////////////
|
||||
// lsb
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks,
|
||||
@@ -425,10 +422,11 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
|
||||
auto signed_msb_lut = mem_ptr->signed_msb_lut;
|
||||
generate_device_accumulator_bivariate<Torus>(
|
||||
msb_streams[0], gpu_indexes[0], signed_msb_lut->get_lut(0, 0),
|
||||
params.glwe_dimension, params.polynomial_size, params.message_modulus,
|
||||
params.carry_modulus, lut_f);
|
||||
signed_msb_lut->broadcast_lut(streams, gpu_indexes, 0);
|
||||
msb_streams[0], gpu_indexes[0],
|
||||
signed_msb_lut->get_lut(gpu_indexes[0], 0), params.glwe_dimension,
|
||||
params.polynomial_size, params.message_modulus, params.carry_modulus,
|
||||
lut_f);
|
||||
signed_msb_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
|
||||
|
||||
Torus const *sign_block = msb + (num_msb_radix_blocks - 1) * big_lwe_size;
|
||||
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
|
||||
@@ -461,9 +459,8 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
|
||||
auto lwe_array_ct_out = mem_ptr->tmp_lwe_array_out;
|
||||
auto lwe_array_sign_out =
|
||||
lwe_array_ct_out + (num_lsb_radix_blocks / 2) * big_lwe_size;
|
||||
Torus *lhs = diff_buffer->tmp_packed;
|
||||
Torus *rhs =
|
||||
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
|
||||
Torus *lhs = diff_buffer->tmp_packed_left;
|
||||
Torus *rhs = diff_buffer->tmp_packed_right;
|
||||
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
|
||||
big_lwe_dimension, num_lsb_radix_blocks - 1,
|
||||
@@ -679,10 +676,10 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
|
||||
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], packed_scalar,
|
||||
scalar_blocks, 0, num_scalar_blocks, message_modulus);
|
||||
|
||||
cuda_memcpy_async_gpu_to_gpu(scalar_comparison_luts->get_lut_indexes(0, 0),
|
||||
packed_scalar,
|
||||
num_halved_scalar_blocks * sizeof(Torus),
|
||||
lsb_streams[0], gpu_indexes[0]);
|
||||
cuda_memcpy_async_gpu_to_gpu(
|
||||
scalar_comparison_luts->get_lut_indexes(gpu_indexes[0], 0),
|
||||
packed_scalar, num_halved_scalar_blocks * sizeof(Torus), lsb_streams[0],
|
||||
gpu_indexes[0]);
|
||||
scalar_comparison_luts->broadcast_lut(lsb_streams, gpu_indexes, 0);
|
||||
|
||||
integer_radix_apply_univariate_lookup_table_kb<Torus>(
|
||||
|
||||
@@ -36,7 +36,7 @@ __host__ void scratch_cuda_integer_radix_scalar_mul_kb(
|
||||
|
||||
*mem_ptr =
|
||||
new int_scalar_mul_buffer<T>(streams, gpu_indexes, gpu_count, params,
|
||||
num_radix_blocks, allocate_gpu_memory, true);
|
||||
num_radix_blocks, allocate_gpu_memory);
|
||||
}
|
||||
|
||||
template <typename T, class params>
|
||||
@@ -94,11 +94,9 @@ __host__ void host_integer_scalar_mul_radix(
|
||||
}
|
||||
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
|
||||
|
||||
if (mem->anticipated_buffers_drop) {
|
||||
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
|
||||
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
||||
delete (mem->logical_scalar_shift_buffer);
|
||||
}
|
||||
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
|
||||
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
|
||||
delete (mem->logical_scalar_shift_buffer);
|
||||
|
||||
if (j == 0) {
|
||||
// lwe array = 0
|
||||
|
||||
@@ -136,7 +136,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<512>>(
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -144,7 +144,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<1024>>(
|
||||
host_programmable_bootstrap_tbc<Torus, Degree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -393,7 +393,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<512>>(
|
||||
host_programmable_bootstrap_cg<Torus, Degree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -401,7 +401,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap_cg<Torus, AmortizedDegree<1024>>(
|
||||
host_programmable_bootstrap_cg<Torus, Degree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -468,7 +468,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 512:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<512>>(
|
||||
host_programmable_bootstrap<Torus, Degree<512>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
@@ -476,7 +476,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
|
||||
num_many_lut, lut_stride);
|
||||
break;
|
||||
case 1024:
|
||||
host_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
|
||||
host_programmable_bootstrap<Torus, Degree<1024>>(
|
||||
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
|
||||
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
|
||||
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
|
||||
|
||||
@@ -237,7 +237,7 @@ TEST_P(ClassicalProgrammableBootstrapTestPrimitives_u64, bootstrap) {
|
||||
(ClassicalProgrammableBootstrapTestParams){
|
||||
887, 1, 2048, new_t_uniform(46), new_t_uniform(17), 22, 1, 4, 4,
|
||||
100, 1, 1},
|
||||
// V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64
|
||||
// PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64
|
||||
(ClassicalProgrammableBootstrapTestParams){
|
||||
977, 1, 8192, new_gaussian_from_std_dev(3.0144389706858286e-07),
|
||||
new_gaussian_from_std_dev(2.168404344971009e-19), 16, 2, 8, 8, 100,
|
||||
|
||||
@@ -163,29 +163,6 @@ extern "C" {
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn scratch_cuda_apply_many_univariate_lut_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
input_lut: *const ffi::c_void,
|
||||
lwe_dimension: u32,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
grouping_factor: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
num_many_lut: u32,
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_apply_univariate_lut_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
@@ -1106,92 +1083,6 @@ extern "C" {
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
big_lwe_dimension: u32,
|
||||
small_lwe_dimension: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
grouping_factor: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_integer_are_all_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
lwe_array_out: *mut ffi::c_void,
|
||||
lwe_array_in: *const ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
num_radix_blocks: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cleanup_cuda_integer_are_all_comparisons_block_true(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr: *mut *mut i8,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
big_lwe_dimension: u32,
|
||||
small_lwe_dimension: u32,
|
||||
ks_level: u32,
|
||||
ks_base_log: u32,
|
||||
pbs_level: u32,
|
||||
pbs_base_log: u32,
|
||||
grouping_factor: u32,
|
||||
num_radix_blocks: u32,
|
||||
message_modulus: u32,
|
||||
carry_modulus: u32,
|
||||
pbs_type: PBS_TYPE,
|
||||
allocate_gpu_memory: bool,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
lwe_array_out: *mut ffi::c_void,
|
||||
lwe_array_in: *const ffi::c_void,
|
||||
mem_ptr: *mut i8,
|
||||
bsks: *const *mut ffi::c_void,
|
||||
ksks: *const *mut ffi::c_void,
|
||||
num_radix_blocks: u32,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
|
||||
streams: *const *mut ffi::c_void,
|
||||
gpu_indexes: *const u32,
|
||||
gpu_count: u32,
|
||||
mem_ptr_void: *mut *mut i8,
|
||||
);
|
||||
}
|
||||
extern "C" {
|
||||
pub fn cuda_keyswitch_lwe_ciphertext_vector_32(
|
||||
stream: *mut ffi::c_void,
|
||||
@@ -1229,7 +1120,6 @@ extern "C" {
|
||||
stream: *mut ffi::c_void,
|
||||
gpu_index: u32,
|
||||
fp_ks_buffer: *mut *mut i8,
|
||||
lwe_dimension: u32,
|
||||
glwe_dimension: u32,
|
||||
polynomial_size: u32,
|
||||
num_lwes: u32,
|
||||
|
||||
@@ -91,8 +91,6 @@ if __name__ == "__main__":
|
||||
"boolean_parameters_lattice_estimator.sage",
|
||||
"shortint_classic_parameters_lattice_estimator.sage",
|
||||
"shortint_multi_bit_parameters_lattice_estimator.sage",
|
||||
"shortint_cpke_parameters_lattice_estimator.sage",
|
||||
"shortint_list_compression_parameters_lattice_estimator.sage",
|
||||
):
|
||||
to_update, to_watch = check_security(params_filename)
|
||||
params_to_update.extend(to_update)
|
||||
|
||||
@@ -31,7 +31,7 @@ instance_type = "m6i.4xlarge"
|
||||
[backend.hyperstack.gpu-test]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
flavor_name = "n3-L40x1"
|
||||
flavor_name = "n3-RTX-A6000x1"
|
||||
|
||||
[backend.hyperstack.single-h100]
|
||||
environment_name = "canada"
|
||||
@@ -58,12 +58,6 @@ environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
flavor_name = "n3-H100x8-NVLink"
|
||||
|
||||
|
||||
[backend.hyperstack.multi-h100-sxm5]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
flavor_name = "n3-H100-SXM5x8"
|
||||
|
||||
[backend.hyperstack.multi-a100-nvlink]
|
||||
environment_name = "canada"
|
||||
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
|
||||
|
||||
19
scripts/get_arch_feature.sh
Executable file
19
scripts/get_arch_feature.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -e
|
||||
|
||||
ARCH_FEATURE=x86_64
|
||||
|
||||
IS_AARCH64="$( (uname -a | grep -c "arm64\|aarch64") || true)"
|
||||
|
||||
if [[ "${IS_AARCH64}" != "0" ]]; then
|
||||
ARCH_FEATURE=aarch64
|
||||
fi
|
||||
|
||||
UNAME="$(uname)"
|
||||
|
||||
if [[ "${UNAME}" == "Linux" || "${UNAME}" == "Darwin" ]]; then
|
||||
ARCH_FEATURE="${ARCH_FEATURE}-unix"
|
||||
fi
|
||||
|
||||
echo "${ARCH_FEATURE}"
|
||||
@@ -10,9 +10,6 @@ function usage() {
|
||||
echo "--multi-bit Run multi-bit tests only: default off"
|
||||
echo "--unsigned-only Run only unsigned integer tests, by default both signed and unsigned tests are run"
|
||||
echo "--signed-only Run only signed integer tests, by default both signed and unsigned tests are run"
|
||||
echo "--nightly-tests Run integer tests configured for nightly runs (3_3 params)"
|
||||
echo "--fast-tests Run integer set but skip a subset of longer tests"
|
||||
echo "--long-tests Run only long run integer tests"
|
||||
echo "--cargo-profile The cargo profile used to build tests"
|
||||
echo "--backend Backend to use with tfhe-rs"
|
||||
echo "--avx512-support Set to ON to enable avx512"
|
||||
@@ -24,7 +21,6 @@ RUST_TOOLCHAIN="+stable"
|
||||
multi_bit_argument=
|
||||
sign_argument=
|
||||
fast_tests_argument=
|
||||
long_tests_argument=
|
||||
nightly_tests_argument=
|
||||
no_big_params_argument=
|
||||
cargo_profile="release"
|
||||
@@ -95,10 +91,6 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
|
||||
fast_tests_argument=--fast-tests
|
||||
fi
|
||||
|
||||
if [[ "${LONG_TESTS}" == TRUE ]]; then
|
||||
long_tests_argument=--long-tests
|
||||
fi
|
||||
|
||||
if [[ "${NIGHTLY_TESTS}" == TRUE ]]; then
|
||||
nightly_tests_argument=--nightly-tests
|
||||
fi
|
||||
@@ -112,6 +104,7 @@ if [[ "${backend}" == "gpu" ]]; then
|
||||
fi
|
||||
|
||||
CURR_DIR="$(dirname "$0")"
|
||||
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
|
||||
|
||||
# TODO autodetect/have a finer CPU count depending on memory
|
||||
num_cpu_threads="$("${CURR_DIR}"/cpu_count.sh)"
|
||||
@@ -145,38 +138,32 @@ if [[ "${backend}" == "gpu" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${long_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
|
||||
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
|
||||
|
||||
if [[ "${FAST_TESTS}" == "TRUE" ]]; then
|
||||
echo "Running 'fast' test set"
|
||||
elif [[ "${LONG_TESTS}" == "FALSE" ]]; then
|
||||
else
|
||||
echo "Running 'slow' test set"
|
||||
fi
|
||||
|
||||
if [[ "${LONG_TESTS}" == "TRUE" ]]; then
|
||||
echo "Running 'long run' test set"
|
||||
fi
|
||||
|
||||
if [[ "${NIGHTLY_TESTS}" == "TRUE" ]]; then
|
||||
echo "Running 'nightly' test set"
|
||||
fi
|
||||
|
||||
echo "${filter_expression}"
|
||||
|
||||
cargo "${RUST_TOOLCHAIN}" nextest run \
|
||||
--tests \
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features=integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--features="${ARCH_FEATURE}",integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--test-threads "${test_threads}" \
|
||||
-E "$filter_expression"
|
||||
|
||||
if [[ -z ${multi_bit_argument} && -z ${long_tests_argument} ]]; then
|
||||
if [[ -z ${multi_bit_argument} ]]; then
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--features=integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--features="${ARCH_FEATURE}",integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
|
||||
--doc \
|
||||
-- --test-threads="${doctest_threads}" integer::"${gpu_feature}"
|
||||
fi
|
||||
|
||||
@@ -65,6 +65,7 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
|
||||
fi
|
||||
|
||||
CURR_DIR="$(dirname "$0")"
|
||||
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
|
||||
|
||||
n_threads_small="$("${CURR_DIR}"/cpu_count.sh)"
|
||||
n_threads_big="${n_threads_small}"
|
||||
@@ -93,7 +94,7 @@ if [[ "${BIG_TESTS_INSTANCE}" != TRUE ]]; then
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features=shortint,internal-keycache,zk-pok,experimental \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
|
||||
--test-threads "${n_threads_small}" \
|
||||
-E "${filter_expression_small_params}"
|
||||
|
||||
@@ -110,7 +111,7 @@ and not test(~smart_add_and_mul)"""
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features=shortint,internal-keycache,zk-pok,experimental \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
|
||||
--test-threads "${n_threads_big}" \
|
||||
--no-tests=warn \
|
||||
-E "${filter_expression_big_params}"
|
||||
@@ -119,7 +120,7 @@ and not test(~smart_add_and_mul)"""
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--features=shortint,internal-keycache,zk-pok,experimental \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
|
||||
--doc \
|
||||
-- shortint::
|
||||
fi
|
||||
@@ -133,7 +134,7 @@ else
|
||||
--cargo-profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--profile ci \
|
||||
--features=shortint,internal-keycache,experimental \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
|
||||
--test-threads "${n_threads_big}" \
|
||||
-E "${filter_expression}"
|
||||
|
||||
@@ -141,7 +142,7 @@ else
|
||||
cargo "${RUST_TOOLCHAIN}" test \
|
||||
--profile "${cargo_profile}" \
|
||||
--package "${tfhe_package}" \
|
||||
--features=shortint,internal-keycache,experimental \
|
||||
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
|
||||
--doc \
|
||||
-- --test-threads="${n_threads_big}" shortint::
|
||||
fi
|
||||
|
||||
@@ -26,12 +26,6 @@ parser.add_argument(
|
||||
action="store_true",
|
||||
help="Run only a small subset of test suite",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--long-tests",
|
||||
dest="long_tests",
|
||||
action="store_true",
|
||||
help="Run only the long tests suite",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--nightly-tests",
|
||||
dest="nightly_tests",
|
||||
@@ -86,7 +80,6 @@ EXCLUDED_INTEGER_TESTS = [
|
||||
"/.*test_wopbs_bivariate_crt_wopbs_param_message_[34]_carry_[34]_ks_pbs_gaussian_2m64$/",
|
||||
"/.*test_integer_smart_mul_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
|
||||
"/.*test_integer_default_add_sequence_multi_thread_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
|
||||
"/.*::tests_long_run::.*/",
|
||||
]
|
||||
|
||||
# skip default_div, default_rem which are covered by default_div_rem
|
||||
@@ -101,61 +94,55 @@ EXCLUDED_BIG_PARAMETERS = [
|
||||
"/.*_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
|
||||
]
|
||||
|
||||
|
||||
def filter_integer_tests(input_args):
|
||||
(multi_bit_filter, group_filter) = (
|
||||
("_multi_bit", "_group_[0-9]") if input_args.multi_bit else ("", "")
|
||||
)
|
||||
backend_filter = ""
|
||||
if not input_args.long_tests:
|
||||
if input_args.backend == "gpu":
|
||||
backend_filter = "gpu::"
|
||||
if multi_bit_filter:
|
||||
# For now, GPU only has specific parameters set for multi-bit
|
||||
multi_bit_filter = "_gpu_multi_bit"
|
||||
if input_args.backend == "gpu":
|
||||
backend_filter = "gpu::"
|
||||
if multi_bit_filter:
|
||||
# For now, GPU only has specific parameters set for multi-bit
|
||||
multi_bit_filter = "_gpu_multi_bit"
|
||||
|
||||
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
|
||||
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
|
||||
|
||||
if input_args.multi_bit:
|
||||
filter_expression.append("test(~_multi_bit)")
|
||||
else:
|
||||
filter_expression.append("not test(~_multi_bit)")
|
||||
if input_args.multi_bit:
|
||||
filter_expression.append("test(~_multi_bit)")
|
||||
else:
|
||||
filter_expression.append("not test(~_multi_bit)")
|
||||
|
||||
if input_args.signed_only:
|
||||
filter_expression.append("test(~_signed)")
|
||||
if input_args.unsigned_only:
|
||||
filter_expression.append("not test(~_signed)")
|
||||
if input_args.signed_only:
|
||||
filter_expression.append("test(~_signed)")
|
||||
if input_args.unsigned_only:
|
||||
filter_expression.append("not test(~_signed)")
|
||||
|
||||
if input_args.no_big_params:
|
||||
for pattern in EXCLUDED_BIG_PARAMETERS:
|
||||
filter_expression.append(f"not test({pattern})")
|
||||
|
||||
if input_args.fast_tests and input_args.nightly_tests:
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
|
||||
)
|
||||
elif input_args.fast_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
|
||||
)
|
||||
elif input_args.nightly_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
# This subset would run slower than fast_tests hence the use of nightly_tests
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
|
||||
)
|
||||
excluded_tests = (
|
||||
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
|
||||
)
|
||||
for pattern in excluded_tests:
|
||||
if input_args.no_big_params:
|
||||
for pattern in EXCLUDED_BIG_PARAMETERS:
|
||||
filter_expression.append(f"not test({pattern})")
|
||||
|
||||
else:
|
||||
if input_args.backend == "gpu":
|
||||
filter_expression = [f"test(/^integer::gpu::server_key::radix::tests_long_run.*/)"]
|
||||
elif input_args.backend == "cpu":
|
||||
filter_expression = [f"test(/^integer::server_key::radix_parallel::tests_long_run.*/)"]
|
||||
if input_args.fast_tests and input_args.nightly_tests:
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
|
||||
)
|
||||
elif input_args.fast_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
|
||||
)
|
||||
elif input_args.nightly_tests:
|
||||
# Test only fast default operations with only one set of parameters
|
||||
# This subset would run slower than fast_tests hence the use of nightly_tests
|
||||
filter_expression.append(
|
||||
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
|
||||
)
|
||||
|
||||
excluded_tests = (
|
||||
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
|
||||
)
|
||||
for pattern in excluded_tests:
|
||||
filter_expression.append(f"not test({pattern})")
|
||||
|
||||
return " and ".join(filter_expression)
|
||||
|
||||
|
||||
@@ -7,6 +7,7 @@ edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
clap = "=4.4.4"
|
||||
lazy_static = "1.4"
|
||||
log = "0.4"
|
||||
simplelog = "0.12"
|
||||
walkdir = "2.5.0"
|
||||
|
||||
@@ -101,7 +101,7 @@ pub fn check_tfhe_docs_are_tested() -> Result<(), Error> {
|
||||
.into_iter()
|
||||
.filter_map(|entry| {
|
||||
let path = entry.path().canonicalize().ok()?;
|
||||
if path.is_file() && path.extension().is_some_and(|e| e == "md") {
|
||||
if path.is_file() && path.extension().map_or(false, |e| e == "md") {
|
||||
let file_content = std::fs::read_to_string(&path).ok()?;
|
||||
if file_content.contains("```rust") {
|
||||
Some(path.to_path_buf())
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
use clap::{Arg, Command};
|
||||
use lazy_static::lazy_static;
|
||||
use log::LevelFilter;
|
||||
use simplelog::{ColorChoice, CombinedLogger, Config, TermLogger, TerminalMode};
|
||||
use std::sync::atomic::AtomicBool;
|
||||
@@ -11,8 +12,9 @@ mod utils;
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// CONSTANTS
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
|
||||
static DRY_RUN: AtomicBool = AtomicBool::new(false);
|
||||
lazy_static! {
|
||||
static ref DRY_RUN: AtomicBool = AtomicBool::new(false);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------------------
|
||||
// MAIN
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-csprng"
|
||||
version = "0.5.0"
|
||||
version = "0.4.1"
|
||||
edition = "2021"
|
||||
license = "BSD-3-Clause-Clear"
|
||||
description = "Cryptographically Secure PRNG used in the TFHE-rs library."
|
||||
@@ -25,13 +25,29 @@ clap = "=4.4.4"
|
||||
|
||||
[features]
|
||||
parallel = ["rayon"]
|
||||
software-prng = []
|
||||
seeder_x86_64_rdseed = []
|
||||
seeder_unix = []
|
||||
generator_x86_64_aesni = []
|
||||
generator_fallback = []
|
||||
generator_aarch64_aes = []
|
||||
|
||||
x86_64 = [
|
||||
"parallel",
|
||||
"seeder_x86_64_rdseed",
|
||||
"generator_x86_64_aesni",
|
||||
"generator_fallback",
|
||||
]
|
||||
x86_64-unix = ["x86_64", "seeder_unix"]
|
||||
aarch64 = ["parallel", "generator_aarch64_aes", "generator_fallback"]
|
||||
aarch64-unix = ["aarch64", "seeder_unix"]
|
||||
|
||||
[[bench]]
|
||||
name = "benchmark"
|
||||
path = "benches/benchmark.rs"
|
||||
harness = false
|
||||
required-features = ["seeder_x86_64_rdseed", "generator_x86_64_aesni"]
|
||||
|
||||
[[example]]
|
||||
name = "generate"
|
||||
path = "examples/generate.rs"
|
||||
required-features = ["seeder_unix", "generator_fallback"]
|
||||
|
||||
@@ -8,13 +8,13 @@ The implementation is based on the AES blockcipher used in CTR mode, as describe
|
||||
|
||||
Two implementations are available, an accelerated one on x86_64 CPUs with the `aes` feature and the `sse2` feature, and a pure software one that can be used on other platforms.
|
||||
|
||||
The crate also makes two seeders available, one needing the x86_64 instruction `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
|
||||
The crate also makes two seeders available, one needing the x86_64 feature `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
|
||||
|
||||
## Running the benchmarks
|
||||
|
||||
To execute the benchmarks on an x86_64 platform:
|
||||
```shell
|
||||
RUSTFLAGS="-Ctarget-cpu=native" cargo bench
|
||||
RUSTFLAGS="-Ctarget-cpu=native" cargo bench --features=seeder_x86_64_rdseed,generator_x86_64_aesni
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
@@ -1,53 +1,15 @@
|
||||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use tfhe_csprng::generators::{
|
||||
BytesPerChild, ChildrenCount, DefaultRandomGenerator, RandomGenerator,
|
||||
AesniRandomGenerator, BytesPerChild, ChildrenCount, RandomGenerator,
|
||||
};
|
||||
#[cfg(target_os = "macos")]
|
||||
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
))]
|
||||
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
|
||||
|
||||
use tfhe_csprng::seeders::Seeder;
|
||||
use tfhe_csprng::seeders::{RdseedSeeder, Seeder};
|
||||
|
||||
// The number of bytes to generate during one benchmark iteration.
|
||||
const N_GEN: usize = 1_000_000;
|
||||
|
||||
fn new_seeder() -> ActivatedSeeder {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
ActivatedSeeder
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new()
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new(0)
|
||||
}
|
||||
}
|
||||
|
||||
fn parent_generate(c: &mut Criterion) {
|
||||
let mut seeder = new_seeder();
|
||||
let mut generator = DefaultRandomGenerator::new(seeder.seed());
|
||||
let mut seeder = RdseedSeeder;
|
||||
let mut generator = AesniRandomGenerator::new(seeder.seed());
|
||||
c.bench_function("parent_generate", |b| {
|
||||
b.iter(|| {
|
||||
(0..N_GEN).for_each(|_| {
|
||||
@@ -58,8 +20,8 @@ fn parent_generate(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
fn child_generate(c: &mut Criterion) {
|
||||
let mut seeder = new_seeder();
|
||||
let mut generator = DefaultRandomGenerator::new(seeder.seed());
|
||||
let mut seeder = RdseedSeeder;
|
||||
let mut generator = AesniRandomGenerator::new(seeder.seed());
|
||||
let mut generator = generator
|
||||
.try_fork(ChildrenCount(1), BytesPerChild(N_GEN * 10_000))
|
||||
.unwrap()
|
||||
@@ -75,8 +37,8 @@ fn child_generate(c: &mut Criterion) {
|
||||
}
|
||||
|
||||
fn fork(c: &mut Criterion) {
|
||||
let mut seeder = new_seeder();
|
||||
let mut generator = DefaultRandomGenerator::new(seeder.seed());
|
||||
let mut seeder = RdseedSeeder;
|
||||
let mut generator = AesniRandomGenerator::new(seeder.seed());
|
||||
c.bench_function("fork", |b| {
|
||||
b.iter(|| {
|
||||
black_box(
|
||||
|
||||
115
tfhe-csprng/build.rs
Normal file
115
tfhe-csprng/build.rs
Normal file
@@ -0,0 +1,115 @@
|
||||
// To have clear error messages during compilation about why some piece of code may not be available
|
||||
// we decided to check the features compatibility with the target configuration in this script.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
// See https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch for various
|
||||
// compilation configuration
|
||||
|
||||
// Can be easily extended if needed
|
||||
pub struct FeatureRequirement {
|
||||
pub feature_name: &'static str,
|
||||
// target_arch requirement
|
||||
pub feature_req_target_arch: Option<&'static str>,
|
||||
// target_family requirement
|
||||
pub feature_req_target_family: Option<&'static str>,
|
||||
}
|
||||
|
||||
// We implement a version of default that is const which is not possible through the Default trait
|
||||
impl FeatureRequirement {
|
||||
// As we cannot use cfg!(feature = "feature_name") with something else than a literal, we need
|
||||
// a reference to the HashMap we populate with the enabled features
|
||||
fn is_activated(&self, build_activated_features: &HashMap<&'static str, bool>) -> bool {
|
||||
*build_activated_features.get(self.feature_name).unwrap()
|
||||
}
|
||||
|
||||
// panics if the requirements are not met
|
||||
fn check_requirements(&self) {
|
||||
let target_arch = get_target_arch_cfg();
|
||||
if let Some(feature_req_target_arch) = self.feature_req_target_arch {
|
||||
if feature_req_target_arch != target_arch {
|
||||
panic!(
|
||||
"Feature `{}` requires target_arch `{}`, current cfg: `{}`",
|
||||
self.feature_name, feature_req_target_arch, target_arch
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
let target_families = get_target_family_cfgs();
|
||||
if let Some(feature_req_target_family) = self.feature_req_target_family {
|
||||
if target_families
|
||||
.split(',')
|
||||
.all(|family| family != feature_req_target_family)
|
||||
{
|
||||
panic!(
|
||||
"Feature `{}` requires target_family `{}`, current cfgs: `{}`",
|
||||
self.feature_name, feature_req_target_family, target_families
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// const vecs are not yet a thing so use a fixed size array (update the array size when adding
|
||||
// requirements)
|
||||
static FEATURE_REQUIREMENTS: [FeatureRequirement; 4] = [
|
||||
FeatureRequirement {
|
||||
feature_name: "seeder_x86_64_rdseed",
|
||||
feature_req_target_arch: Some("x86_64"),
|
||||
feature_req_target_family: None,
|
||||
},
|
||||
FeatureRequirement {
|
||||
feature_name: "generator_x86_64_aesni",
|
||||
feature_req_target_arch: Some("x86_64"),
|
||||
feature_req_target_family: None,
|
||||
},
|
||||
FeatureRequirement {
|
||||
feature_name: "seeder_unix",
|
||||
feature_req_target_arch: None,
|
||||
feature_req_target_family: Some("unix"),
|
||||
},
|
||||
FeatureRequirement {
|
||||
feature_name: "generator_aarch64_aes",
|
||||
feature_req_target_arch: Some("aarch64"),
|
||||
feature_req_target_family: None,
|
||||
},
|
||||
];
|
||||
|
||||
// For a "feature_name" feature_cfg!("feature_name") expands to
|
||||
// ("feature_name", cfg!(feature = "feature_name"))
|
||||
macro_rules! feature_cfg {
|
||||
($feat_name:literal) => {
|
||||
($feat_name, cfg!(feature = $feat_name))
|
||||
};
|
||||
}
|
||||
|
||||
// Static HashMap would require an additional crate (phf or lazy static e.g.), so we just write a
|
||||
// function that returns the HashMap we are interested in
|
||||
fn get_feature_enabled_status() -> HashMap<&'static str, bool> {
|
||||
HashMap::from([
|
||||
feature_cfg!("seeder_x86_64_rdseed"),
|
||||
feature_cfg!("generator_x86_64_aesni"),
|
||||
feature_cfg!("seeder_unix"),
|
||||
feature_cfg!("generator_aarch64_aes"),
|
||||
])
|
||||
}
|
||||
|
||||
// See https://stackoverflow.com/a/43435335/18088947 for the inspiration of this code
|
||||
fn get_target_arch_cfg() -> String {
|
||||
env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH is not set")
|
||||
}
|
||||
|
||||
fn get_target_family_cfgs() -> String {
|
||||
env::var("CARGO_CFG_TARGET_FAMILY").expect("CARGO_CFG_TARGET_FAMILY is not set")
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let feature_enabled_status = get_feature_enabled_status();
|
||||
|
||||
// This will panic if some requirements for a feature are not met
|
||||
FEATURE_REQUIREMENTS
|
||||
.iter()
|
||||
.filter(|&req| FeatureRequirement::is_activated(req, &feature_enabled_status))
|
||||
.for_each(FeatureRequirement::check_requirements);
|
||||
}
|
||||
@@ -2,29 +2,35 @@
|
||||
//! the program stdout. It can also generate a fixed number of bytes by passing a value along the
|
||||
//! optional argument `--bytes_total`. For testing purpose.
|
||||
use clap::{value_parser, Arg, Command};
|
||||
use tfhe_csprng::generators::{DefaultRandomGenerator, RandomGenerator};
|
||||
#[cfg(feature = "generator_x86_64_aesni")]
|
||||
use tfhe_csprng::generators::AesniRandomGenerator as ActivatedRandomGenerator;
|
||||
#[cfg(feature = "generator_aarch64_aes")]
|
||||
use tfhe_csprng::generators::NeonAesRandomGenerator as ActivatedRandomGenerator;
|
||||
use tfhe_csprng::generators::RandomGenerator;
|
||||
#[cfg(all(
|
||||
not(feature = "generator_x86_64_aesni"),
|
||||
not(feature = "generator_aarch64_aes"),
|
||||
feature = "generator_fallback"
|
||||
))]
|
||||
use tfhe_csprng::generators::SoftwareRandomGenerator as ActivatedRandomGenerator;
|
||||
|
||||
use std::io::prelude::*;
|
||||
use std::io::{stdout, StdoutLock};
|
||||
#[cfg(target_os = "macos")]
|
||||
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
#[cfg(all(not(target_os = "macos"), feature = "seeder_x86_64_rdseed"))]
|
||||
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
|
||||
use tfhe_csprng::seeders::Seeder;
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
not(feature = "seeder_x86_64_rdseed"),
|
||||
feature = "seeder_unix"
|
||||
))]
|
||||
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
|
||||
|
||||
fn write_bytes(
|
||||
buffer: &mut [u8],
|
||||
generator: &mut DefaultRandomGenerator,
|
||||
generator: &mut ActivatedRandomGenerator,
|
||||
stdout: &mut StdoutLock<'_>,
|
||||
) -> std::io::Result<()> {
|
||||
buffer.iter_mut().zip(generator).for_each(|(b, g)| *b = g);
|
||||
@@ -33,7 +39,7 @@ fn write_bytes(
|
||||
|
||||
fn infinite_bytes_generation(
|
||||
buffer: &mut [u8],
|
||||
generator: &mut DefaultRandomGenerator,
|
||||
generator: &mut ActivatedRandomGenerator,
|
||||
stdout: &mut StdoutLock<'_>,
|
||||
) {
|
||||
while write_bytes(buffer, generator, stdout).is_ok() {}
|
||||
@@ -42,7 +48,7 @@ fn infinite_bytes_generation(
|
||||
fn bytes_generation(
|
||||
bytes_total: usize,
|
||||
buffer: &mut [u8],
|
||||
generator: &mut DefaultRandomGenerator,
|
||||
generator: &mut ActivatedRandomGenerator,
|
||||
stdout: &mut StdoutLock<'_>,
|
||||
) {
|
||||
let quotient = bytes_total / buffer.len();
|
||||
@@ -55,29 +61,6 @@ fn bytes_generation(
|
||||
write_bytes(&mut buffer[0..remaining], generator, stdout).unwrap()
|
||||
}
|
||||
|
||||
fn new_seeder() -> ActivatedSeeder {
|
||||
#[cfg(target_os = "macos")]
|
||||
{
|
||||
ActivatedSeeder
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
target_arch = "x86_64",
|
||||
target_feature = "rdseed"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new()
|
||||
}
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(all(target_arch = "x86_64", target_feature = "rdseed")),
|
||||
target_family = "unix"
|
||||
))]
|
||||
{
|
||||
ActivatedSeeder::new(0)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let matches = Command::new(
|
||||
"Generate a stream of random numbers, specify no flags for infinite generation",
|
||||
@@ -91,11 +74,25 @@ pub fn main() {
|
||||
)
|
||||
.get_matches();
|
||||
|
||||
// Ugly hack to be able to use UnixSeeder
|
||||
#[cfg(all(
|
||||
not(target_os = "macos"),
|
||||
not(feature = "seeder_x86_64_rdseed"),
|
||||
feature = "seeder_unix"
|
||||
))]
|
||||
let new_seeder = || ActivatedSeeder::new(0);
|
||||
#[cfg(not(all(
|
||||
not(target_os = "macos"),
|
||||
not(feature = "seeder_x86_64_rdseed"),
|
||||
feature = "seeder_unix"
|
||||
)))]
|
||||
let new_seeder = || ActivatedSeeder;
|
||||
|
||||
let mut seeder = new_seeder();
|
||||
let seed = seeder.seed();
|
||||
// Don't print on std out
|
||||
eprintln!("seed={seed:?}");
|
||||
let mut generator = DefaultRandomGenerator::new(seed);
|
||||
let mut generator = ActivatedRandomGenerator::new(seed);
|
||||
let stdout = stdout();
|
||||
let mut buffer = [0u8; 16];
|
||||
|
||||
|
||||
@@ -206,6 +206,7 @@ pub use index::*;
|
||||
|
||||
/// A module containing structures to manage table indices and buffer pointers together properly.
|
||||
mod states;
|
||||
pub use states::*;
|
||||
|
||||
/// A module containing an abstraction for aes block ciphers.
|
||||
mod block_cipher;
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
use crate::generators::aes_ctr::states::State;
|
||||
use crate::generators::aes_ctr::{AesBlockCipher, AesCtrGenerator, ChildrenClosure, TableIndex};
|
||||
use crate::generators::aes_ctr::{
|
||||
AesBlockCipher, AesCtrGenerator, ChildrenClosure, State, TableIndex,
|
||||
};
|
||||
use crate::generators::{BytesPerChild, ChildrenCount, ForkError};
|
||||
|
||||
/// A type alias for the parallel children iterator type.
|
||||
|
||||
@@ -1,9 +0,0 @@
|
||||
#[cfg(all(target_arch = "x86_64", not(feature = "software-prng")))]
|
||||
pub type DefaultRandomGenerator = super::AesniRandomGenerator;
|
||||
#[cfg(all(target_arch = "aarch64", not(feature = "software-prng")))]
|
||||
pub type DefaultRandomGenerator = super::NeonAesRandomGenerator;
|
||||
#[cfg(any(
|
||||
feature = "software-prng",
|
||||
not(any(target_arch = "x86_64", target_arch = "aarch64"))
|
||||
))]
|
||||
pub type DefaultRandomGenerator = super::SoftwareRandomGenerator;
|
||||
@@ -25,8 +25,7 @@ impl AesBlockCipher for ArmAesBlockCipher {
|
||||
if !(aes_detected && neon_detected) {
|
||||
panic!(
|
||||
"The ArmAesBlockCipher requires both aes and neon aarch64 CPU features.\n\
|
||||
aes feature available: {}\nneon feature available: {}\n\
|
||||
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
|
||||
aes feature available: {}\nneon feature available: {}\n.",
|
||||
aes_detected, neon_detected
|
||||
)
|
||||
}
|
||||
|
||||
@@ -20,8 +20,7 @@ impl AesBlockCipher for AesniBlockCipher {
|
||||
if !(aes_detected && sse2_detected) {
|
||||
panic!(
|
||||
"The AesniBlockCipher requires both aes and sse2 x86 CPU features.\n\
|
||||
aes feature available: {}\nsse2 feature available: {}\n\
|
||||
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
|
||||
aes feature available: {}\nsse2 feature available: {}\n.",
|
||||
aes_detected, sse2_detected
|
||||
)
|
||||
}
|
||||
|
||||
@@ -1,12 +1,14 @@
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg(feature = "generator_x86_64_aesni")]
|
||||
mod aesni;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg(feature = "generator_x86_64_aesni")]
|
||||
pub use aesni::*;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[cfg(feature = "generator_aarch64_aes")]
|
||||
mod aarch64;
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
#[cfg(feature = "generator_aarch64_aes")]
|
||||
pub use aarch64::*;
|
||||
|
||||
#[cfg(feature = "generator_fallback")]
|
||||
mod soft;
|
||||
#[cfg(feature = "generator_fallback")]
|
||||
pub use soft::*;
|
||||
|
||||
@@ -123,10 +123,6 @@ mod aes_ctr;
|
||||
mod implem;
|
||||
pub use implem::*;
|
||||
|
||||
pub mod default;
|
||||
/// Convenience alias for the most efficient CSPRNG implementation available.
|
||||
pub use default::DefaultRandomGenerator;
|
||||
|
||||
#[cfg(test)]
|
||||
#[allow(unused)] // to please clippy when tests are not activated
|
||||
pub mod generator_generic_test {
|
||||
|
||||
@@ -3,12 +3,12 @@ mod apple_secure_enclave_seeder;
|
||||
#[cfg(target_os = "macos")]
|
||||
pub use apple_secure_enclave_seeder::AppleSecureEnclaveSeeder;
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg(feature = "seeder_x86_64_rdseed")]
|
||||
mod rdseed;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[cfg(feature = "seeder_x86_64_rdseed")]
|
||||
pub use rdseed::RdseedSeeder;
|
||||
|
||||
#[cfg(target_family = "unix")]
|
||||
#[cfg(feature = "seeder_unix")]
|
||||
mod unix;
|
||||
#[cfg(target_family = "unix")]
|
||||
#[cfg(feature = "seeder_unix")]
|
||||
pub use unix::UnixSeeder;
|
||||
|
||||
@@ -4,23 +4,7 @@ use crate::seeders::{Seed, Seeder};
|
||||
///
|
||||
/// The `rdseed` instruction allows to deliver seeds from a hardware source of entropy see
|
||||
/// <https://www.felixcloutier.com/x86/rdseed> .
|
||||
pub struct RdseedSeeder(());
|
||||
|
||||
impl RdseedSeeder {
|
||||
pub fn new() -> Self {
|
||||
if Self::is_available() {
|
||||
Self(())
|
||||
} else {
|
||||
panic!("Tried to use RdSeedSeeder but rdseed instruction is not enabled on the current machine");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for RdseedSeeder {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
pub struct RdseedSeeder;
|
||||
|
||||
impl Seeder for RdseedSeeder {
|
||||
fn seed(&mut self) -> Seed {
|
||||
@@ -62,6 +46,6 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn check_bounded_sequence_difference() {
|
||||
check_seeder_fixed_sequences_different(|_| RdseedSeeder::new());
|
||||
check_seeder_fixed_sequences_different(|_| RdseedSeeder);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-fft"
|
||||
version = "0.7.0"
|
||||
version = "0.6.0"
|
||||
edition = "2021"
|
||||
description = "tfhe-fft is a pure Rust high performance fast Fourier transform library."
|
||||
readme = "README.md"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "tfhe-ntt"
|
||||
version = "0.4.0"
|
||||
version = "0.3.0"
|
||||
edition = "2021"
|
||||
description = "tfhe-ntt is a pure Rust high performance number theoretic transform library."
|
||||
readme = "README.md"
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user