Compare commits

..

3 Commits

Author SHA1 Message Date
Arthur Meyre
8378ce7d44 clippy bug no span for large array on stack 2024-12-03 11:00:31 +01:00
Arthur Meyre
e0111f6dd1 chore(ci): toolchain update 2024-12-02 16:34:19 +01:00
Arthur Meyre
51fd605a0c chore: update dependencies 2024-12-02 11:12:46 +01:00
540 changed files with 12643 additions and 32174 deletions

View File

@@ -1,53 +0,0 @@
name: Setup Cuda
description: Setup Cuda on Hyperstack instance
inputs:
cuda-version:
description: Version of Cuda to use
required: true
gcc-version:
description: Version of GCC to use
required: true
cmake-version:
description: Version of cmake to use
default: 3.29.6
runs:
using: "composite"
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
shell: bash
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
cd cmake-${{ inputs.cmake-version }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Export CUDA variables
shell: bash
run: |
CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
shell: bash
run: |
{
echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
shell: bash
run: nvidia-smi

View File

@@ -26,7 +26,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -50,7 +50,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -100,7 +100,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -11,26 +11,16 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
pull_request_target:
jobs:
check-user-permission:
if: github.event_name == 'pull_request' || github.event_name == 'pull_request_target'
uses: ./.github/workflows/check_triggering_actor.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
should-run:
runs-on: ubuntu-latest
needs: check-user-permission
if: github.event_name != 'pull_request_target' ||
needs.check-user-permission.result == 'success'
permissions:
pull-requests: write
outputs:
@@ -65,11 +55,10 @@ jobs:
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
ref: ${{ github.event.pull_request.head.sha }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -144,7 +133,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -159,7 +148,7 @@ jobs:
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
group: ${{ github.workflow }}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -168,10 +157,9 @@ jobs:
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
ref: ${{ github.event.pull_request.head.sha }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -211,7 +199,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
with:
path: |
~/.nvm
@@ -224,7 +212,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -282,7 +270,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -59,7 +59,6 @@ jobs:
- tfhe/src/core_crypto/**
- tfhe/src/shortint/**
- tfhe/src/integer/**
- .github/workflows/aws_tfhe_integer_tests.yml
setup-instance:
name: Setup instance (unsigned-integer-tests)
@@ -75,7 +74,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +98,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -140,7 +139,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -59,7 +59,6 @@ jobs:
- tfhe/src/core_crypto/**
- tfhe/src/shortint/**
- tfhe/src/integer/**
- .github/workflows/aws_tfhe_signed_integer_tests.yml
setup-instance:
name: Setup instance (unsigned-integer-tests)
@@ -75,7 +74,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +98,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -144,7 +143,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -67,7 +67,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -142,7 +142,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -168,7 +168,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -250,7 +250,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -51,7 +51,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -61,7 +61,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
with:
path: |
~/.nvm
@@ -74,7 +74,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -119,7 +119,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -29,7 +29,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -62,7 +62,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -93,7 +93,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_boolean
path: ${{ env.RESULTS_FILENAME }}
@@ -127,7 +127,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -26,7 +26,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -58,7 +58,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -81,7 +81,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}
@@ -115,7 +115,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -29,7 +29,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -63,7 +63,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -97,7 +97,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_erc20
path: ${{ env.RESULTS_FILENAME }}
@@ -124,7 +124,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -54,7 +54,7 @@ jobs:
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -82,7 +82,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_integer_multi_bit_gpu_default
path: ${{ env.RESULTS_FILENAME }}
@@ -127,7 +127,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -157,7 +157,7 @@ jobs:
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -48,19 +48,28 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Get benchmark details
run: |
{
@@ -75,10 +84,31 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Run benchmarks with AVX512
run: |
make bench_pbs_gpu
@@ -98,7 +128,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}
@@ -137,7 +167,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -12,10 +12,7 @@ on:
- "l40 (n3-L40x1)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
jobs:
parse-inputs:

View File

@@ -54,7 +54,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -75,19 +75,28 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Get benchmark details
run: |
{
@@ -102,10 +111,34 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run benchmarks
run: |
make bench_hlapi_erc20_gpu
@@ -124,9 +157,9 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_erc20_${{ inputs.profile }}
name: ${{ github.sha }}_erc20
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
@@ -163,7 +196,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -15,7 +15,6 @@ on:
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
- "multi-a100-nvlink (n3-A100x8-NVLink)"
command:
description: "Benchmark command to run"

View File

@@ -118,7 +118,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -145,19 +145,28 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Get benchmark details
run: |
{
@@ -172,10 +181,41 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
} >> "${GITHUB_ENV}"
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Should run benchmarks with all precisions
if: inputs.all_precisions
run: |
@@ -196,22 +236,14 @@ jobs:
--commit-date "${{ env.COMMIT_DATE }}" \
--bench-date "${{ env.BENCH_DATE }}" \
--walk-subdirs \
--name-suffix avx512 \
--bench-type ${{ matrix.bench_type }}
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash
run: |
@@ -239,7 +271,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -90,7 +90,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -130,7 +130,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -170,7 +170,7 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -197,7 +197,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -56,7 +56,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -93,7 +93,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -136,7 +136,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
path: ${{ env.RESULTS_FILENAME }}
@@ -163,7 +163,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -90,7 +90,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -130,7 +130,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -164,7 +164,7 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -191,7 +191,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -32,7 +32,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_fft
path: ${{ env.RESULTS_FILENAME }}
@@ -126,7 +126,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -32,7 +32,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_ntt
path: ${{ env.RESULTS_FILENAME }}
@@ -126,7 +126,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -36,7 +36,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -58,7 +58,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -91,7 +91,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -121,7 +121,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_tfhe_zk_pok
path: ${{ env.RESULTS_FILENAME }}
@@ -155,7 +155,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -40,7 +40,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -65,7 +65,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -109,7 +109,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
with:
path: |
~/.nvm
@@ -122,7 +122,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 #v4.2.0
uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a #v4.1.2
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -166,7 +166,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
path: ${{ env.RESULTS_FILENAME }}
@@ -200,7 +200,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -104,7 +104,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -141,7 +141,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: nightly
@@ -177,7 +177,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
with:
name: ${{ github.sha }}_integer_zk
path: ${{ env.RESULTS_FILENAME }}
@@ -211,7 +211,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -28,7 +28,7 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable

View File

@@ -1,29 +0,0 @@
# Check if triggering actor is a collaborator and has write access
name: Check Triggering Actor
on:
workflow_call:
secrets:
TOKEN:
required: true
jobs:
check-actor-permission:
runs-on: ubuntu-latest
steps:
- name: Get User Permission
id: check-access
uses: actions-cool/check-user-permission@956b2e73cdfe3bcb819bb7225e490cb3b18fd76e # v2.2.1
with:
require: write
username: ${{ github.triggering_actor }}
env:
GITHUB_TOKEN: ${{ secrets.TOKEN }}
- name: Check User Permission
if: steps.check-access.outputs.require-result == 'false'
run: |
echo "${{ github.triggering_actor }} does not have permissions on this repo."
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
echo "Job originally triggered by ${{ github.actor }}"
exit 1

View File

@@ -27,7 +27,7 @@ jobs:
make lint_workflow
- name: Ensure SHA pinned actions
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6ae615f6475d2ede5ad88bea6baa7a1d5e93ffaa # v3.0.19
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
with:
allowlist: |
slsa-framework/slsa-github-generator

View File

@@ -25,7 +25,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -47,13 +47,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
files_yaml: |
tfhe:
@@ -83,7 +83,7 @@ jobs:
make test_shortint_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}
@@ -97,7 +97,7 @@ jobs:
make test_integer_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
uses: codecov/codecov-action@015f24e6818733317a2da2edd6290ab26238649a
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}
@@ -121,7 +121,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -51,7 +51,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -75,7 +75,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -40,7 +40,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,28 +94,60 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run core crypto and internal CUDA backend tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
@@ -155,7 +187,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -34,7 +34,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -66,7 +66,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -92,28 +92,60 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run core crypto and internal CUDA backend tests
run: |
make test_core_crypto_gpu
@@ -153,7 +185,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -25,7 +25,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -49,6 +49,9 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
@@ -68,21 +71,38 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run core crypto, integer and internal CUDA backend tests
run: |
make test_gpu
@@ -119,7 +139,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,28 +94,60 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run multi-bit CUDA integer compression tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
@@ -158,7 +190,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -1,4 +1,4 @@
name: Long Run Tests on GPU
name: AWS Long Run Tests on GPU
env:
CARGO_TERM_COLOR: always
@@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
jobs:
setup-instance:
@@ -29,17 +29,17 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: multi-gpu-test
profile: single-h100
cuda-tests:
name: Long run GPU tests
name: Long run GPU H100 tests
needs: [ setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
@@ -53,26 +53,57 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
timeout-minutes: 4320 # 72 hours
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run tests
run: |
make test_integer_long_run_gpu
@@ -88,7 +119,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests.result }}
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (gpu-tests)
@@ -98,7 +129,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -24,7 +24,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -63,7 +63,7 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -110,7 +110,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,25 +94,58 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run signed integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
@@ -138,7 +171,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,25 +94,58 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run signed integer multi-bit tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
@@ -138,7 +171,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -42,7 +42,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -75,7 +75,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -101,28 +101,57 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Should run nightly tests
if: github.event_name == 'schedule'
run: |
@@ -131,6 +160,10 @@ jobs:
echo "NIGHTLY_TESTS=TRUE";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run signed integer multi-bit tests
run: |
make test_signed_integer_multi_bit_gpu_ci
@@ -156,7 +189,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,25 +94,58 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run unsigned integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
@@ -138,7 +171,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,25 +94,58 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run unsigned integer multi-bit tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
@@ -138,7 +171,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -41,7 +41,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
uses: tj-actions/changed-files@4edd678ac3f81e2dc578756871e4d00c19191daf
with:
since_last_remote_commit: true
files_yaml: |
@@ -74,7 +74,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -100,25 +100,54 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Should run nightly tests
if: github.event_name == 'schedule'
run: |
@@ -127,6 +156,10 @@ jobs:
echo "NIGHTLY_TESTS=TRUE";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run unsigned integer multi-bit tests
run: |
make test_unsigned_integer_multi_bit_gpu_ci
@@ -152,7 +185,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
jobs:
setup-instance:
@@ -29,7 +29,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -45,7 +45,6 @@ jobs:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 4320 # 72 hours
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
@@ -54,7 +53,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -78,7 +77,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -39,7 +39,7 @@ jobs:
persist-credentials: "false"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable

View File

@@ -50,7 +50,7 @@ jobs:
- name: Prepare package
run: |
cargo package -p tfhe
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
- uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
with:
name: crate
path: target/package/*.crate

View File

@@ -36,7 +36,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -71,7 +71,7 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
with:
toolchain: stable
@@ -120,7 +120,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

233
Makefile
View File

@@ -2,6 +2,7 @@ SHELL:=$(shell /usr/bin/env which bash)
OS:=$(shell uname)
RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
CPU_COUNT=$(shell ./scripts/cpu_count.sh)
RS_BUILD_TOOLCHAIN:=stable
CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
@@ -281,14 +282,14 @@ check_typos: install_typos_checker
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
clippy_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
--all-targets \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
check_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
--features=boolean,shortint,integer,internal-keycache,gpu \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
--all-targets \
-p $(TFHE_SPEC)
@@ -307,51 +308,52 @@ lint_workflow: check_actionlint_installed
.PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
clippy_core: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE) \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=experimental \
--features=$(TARGET_ARCH_FEATURE),experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=nightly-avx512 \
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=experimental,nightly-avx512 \
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=zk-pok \
--features=$(TARGET_ARCH_FEATURE),zk-pok \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_boolean # Run clippy lints enabling the boolean features
clippy_boolean: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean \
--features=$(TARGET_ARCH_FEATURE),boolean \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_shortint # Run clippy lints enabling the shortint features
clippy_shortint: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=shortint \
--features=$(TARGET_ARCH_FEATURE),shortint \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=shortint,experimental \
--features=$(TARGET_ARCH_FEATURE),shortint,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=zk-pok,shortint \
--features=$(TARGET_ARCH_FEATURE),zk-pok,shortint \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_integer # Run clippy lints enabling the integer features
clippy_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=integer \
--features=$(TARGET_ARCH_FEATURE),integer \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=integer,experimental \
--features=$(TARGET_ARCH_FEATURE),integer,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
clippy: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
@@ -362,13 +364,13 @@ clippy_rustdoc: install_rs_check_toolchain
fi && \
CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok,pbs-stats,strings \
-p $(TFHE_SPEC)
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
clippy_c_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-c-api,shortint-c-api,high-level-c-api \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
@@ -393,16 +395,17 @@ clippy_trivium: install_rs_check_toolchain
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
clippy_all_targets: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
clippy_tfhe_csprng: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=parallel,software-prng -p tfhe-csprng -- --no-deps -D warnings
--features=$(TARGET_ARCH_FEATURE) \
-p tfhe-csprng -- --no-deps -D warnings
.PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
clippy_zk_pok: install_rs_check_toolchain
@@ -441,67 +444,67 @@ check_rust_bindings_did_not_change:
.PHONY: tfhe_lints # Run custom tfhe-rs lints
tfhe_lints: install_tfhe_lints
cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
--features=boolean,shortint,integer,zk-pok -- -D warnings
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok -- -D warnings
.PHONY: build_core # Build core_crypto without experimental features
build_core: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=nightly-avx512 -p $(TFHE_SPEC); \
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 -p $(TFHE_SPEC); \
fi
.PHONY: build_core_experimental # Build core_crypto with experimental features
build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=experimental -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 -p $(TFHE_SPEC); \
fi
.PHONY: build_boolean # Build with boolean enabled
build_boolean: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean -p $(TFHE_SPEC) --all-targets
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets
.PHONY: build_shortint # Build with shortint enabled
build_shortint: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=shortint -p $(TFHE_SPEC) --all-targets
--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets
.PHONY: build_integer # Build with integer enabled
build_integer: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=integer -p $(TFHE_SPEC) --all-targets
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets
.PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
build_tfhe_full: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
.PHONY: build_tfhe_coverage # Build with test coverage enabled
build_tfhe_coverage: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
build_c_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
-p $(TFHE_SPEC)
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
build_c_api_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
-p $(TFHE_SPEC)
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
-p $(TFHE_SPEC)
.PHONY: build_web_js_api # Build the js API targeting the web browser
@@ -532,15 +535,15 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
.PHONY: build_tfhe_csprng # Build tfhe_csprng
build_tfhe_csprng: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-p tfhe-csprng --all-targets
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng --all-targets
.PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
fi
.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
@@ -548,13 +551,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
--features=experimental,internal-keycache \
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache \
-p $(TFHE_SPEC) -- core_crypto::
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
--features=experimental,internal-keycache,nightly-avx512 \
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,nightly-avx512 \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
fi
@@ -572,38 +575,35 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
test_core_crypto_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
test_integer_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)" --backend "gpu"
.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
test_integer_long_run_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
.PHONY: test_integer_compression
test_integer_compression: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
.PHONY: test_integer_compression_gpu
test_integer_compression_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
.PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -662,20 +662,20 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
.PHONY: test_boolean # Run the tests of the boolean module
test_boolean: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean -p $(TFHE_SPEC) -- boolean::
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::
.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
test_boolean_cov: install_rs_check_toolchain install_tarpaulin
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
$(COVERAGE_EXCLUDED_FILES) \
--features=boolean,internal-keycache \
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::
.PHONY: test_c_api_rs # Run the rust tests for the C API
test_c_api_rs: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
-p $(TFHE_SPEC) \
c_api
@@ -707,14 +707,14 @@ test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
.PHONY: test_shortint # Run all the tests for shortint
test_shortint: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
test_shortint_cov: install_rs_check_toolchain install_tarpaulin
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
$(COVERAGE_EXCLUDED_FILES) \
--features=shortint,internal-keycache \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::
.PHONY: test_integer_ci # Run the tests for integer ci
@@ -771,28 +771,26 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
.PHONY: test_integer_long_run # Run the long run integer tests
test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"
.PHONY: test_integer_long_run # Run the long run tests for integer
test_integer_long_run: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
.PHONY: test_safe_serialization # Run the tests for safe serialization
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
.PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
test_zk: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::
--features=$(TARGET_ARCH_FEATURE),shortint,zk-pok -p $(TFHE_SPEC) -- zk::
.PHONY: test_integer # Run all the tests for integer
test_integer: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::
.PHONY: test_integer_cov # Run the tests of the integer module with code coverage
test_integer_cov: install_rs_check_toolchain install_tarpaulin
@@ -800,38 +798,38 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
--out xml --output-dir coverage/integer --line --engine llvm --timeout 500 \
--implicit-test-threads \
--exclude-files $(COVERAGE_EXCLUDED_FILES) \
--features=integer,internal-keycache \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::
.PHONY: test_high_level_api # Run all the tests for high_level_api
test_high_level_api: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p $(TFHE_SPEC) \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
-- high_level_api::
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) \
-E "test(/high_level_api::.*gpu.*/)"
.PHONY: test_strings # Run the tests for strings ci
test_strings: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=shortint,integer,strings -p $(TFHE_SPEC) \
--features=$(TARGET_ARCH_FEATURE),shortint,integer,strings -p $(TFHE_SPEC) \
-- strings::
.PHONY: test_user_doc # Run tests from the .md documentation
test_user_doc: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
-p $(TFHE_SPEC) \
-- test_user_docs::
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
test_user_doc_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
-- test_user_docs::
@@ -839,12 +837,14 @@ test_user_doc_gpu: install_rs_build_toolchain
.PHONY: test_regex_engine # Run tests for regex_engine example
test_regex_engine: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--example regex_engine --features=integer
--example regex_engine \
--features=$(TARGET_ARCH_FEATURE),integer
.PHONY: test_sha256_bool # Run tests for sha256_bool example
test_sha256_bool: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--example sha256_bool --features=boolean
--example sha256_bool \
--features=$(TARGET_ARCH_FEATURE),boolean
.PHONY: test_examples # Run tests for examples
test_examples: test_sha256_bool test_regex_engine
@@ -862,7 +862,7 @@ test_kreyvium: install_rs_build_toolchain
.PHONY: test_tfhe_csprng # Run tfhe-csprng tests
test_tfhe_csprng: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-p tfhe-csprng
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
test_zk_pok: install_rs_build_toolchain
@@ -880,7 +880,7 @@ test_zk_wasm_x86_compat_ci: check_nvm_installed
test_zk_wasm_x86_compat: install_rs_build_toolchain build_node_js_api
cd tfhe/tests/zk_wasm_x86_test && npm install
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-p tfhe --test zk_wasm_x86_test --features=integer,zk-pok
-p tfhe --test zk_wasm_x86_test --features=$(TARGET_ARCH_FEATURE),integer,zk-pok
.PHONY: test_versionable # Run tests for tfhe-versionable subcrate
test_versionable: install_rs_build_toolchain
@@ -893,7 +893,7 @@ test_versionable: install_rs_build_toolchain
test_backward_compatibility_ci: install_rs_build_toolchain
TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
--features=shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
--features=$(TARGET_ARCH_FEATURE),shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
@@ -908,7 +908,7 @@ doc: install_rs_check_toolchain
DOCS_RS=1 \
RUSTDOCFLAGS="--html-in-header katex-header.html" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
.PHONY: docs # Build rust doc alias for doc
docs: doc
@@ -919,7 +919,7 @@ lint_doc: install_rs_check_toolchain
DOCS_RS=1 \
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
lint_docs: lint_doc
@@ -948,7 +948,7 @@ check_md_links: install_mlc
.PHONY: check_compile_tests # Build tests in debug without running them
check_compile_tests: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--features=experimental,boolean,shortint,integer,internal-keycache \
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
-p $(TFHE_SPEC)
@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
@@ -959,7 +959,7 @@ check_compile_tests: install_rs_build_toolchain
.PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
check_compile_tests_benches_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--features=experimental,boolean,shortint,integer,internal-keycache,gpu \
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,gpu \
-p $(TFHE_SPEC)
mkdir -p "$(TFHECUDA_BUILD)" && \
cd "$(TFHECUDA_BUILD)" && \
@@ -1038,42 +1038,42 @@ dieharder_csprng: install_dieharder build_tfhe_csprng
.PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
print_doc_bench_parameters:
RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
--features=shortint,internal-keycache -p tfhe
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe
.PHONY: bench_integer # Run benchmarks for unsigned integer
bench_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer # Run benchmarks for signed integer
bench_signed_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
bench_integer_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
bench_integer_compression: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression_gpu
bench_integer_compression_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
bench_integer_multi_bit: install_rs_check_toolchain
@@ -1081,7 +1081,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -1089,7 +1089,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
bench_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1097,7 +1097,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1105,14 +1105,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
bench_integer_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-pke-bench \
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,zk-pok,nightly-avx512 \
-p $(TFHE_SPEC) --
.PHONY: bench_shortint # Run benchmarks for shortint
@@ -1120,14 +1120,14 @@ bench_shortint: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench shortint-bench \
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_shortint_oprf # Run benchmarks for shortint
bench_shortint_oprf: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench oprf-shortint-bench \
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
bench_shortint_multi_bit: install_rs_check_toolchain
@@ -1135,43 +1135,43 @@ bench_shortint_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench shortint-bench \
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_boolean # Run benchmarks for boolean
bench_boolean: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench boolean-bench \
--features=boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_pbs # Run benchmarks for PBS
bench_pbs: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench pbs-bench \
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
bench_pbs128: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench pbs128-bench \
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
bench_pbs_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench pbs-bench \
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_ks # Run benchmarks for keyswitch
bench_ks: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench ks-bench \
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
bench_ks_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench ks-bench \
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
@@ -1207,13 +1207,13 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
bench_hlapi_erc20: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ECR20 operations on GPU
bench_hlapi_erc20_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
bench_tfhe_zk_pok: install_rs_check_toolchain
@@ -1228,32 +1228,32 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
gen_key_cache: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example generates_test_keys \
--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
.PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
gen_key_cache_core_crypto: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
core_crypto::keycache::generate_keys
.PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example hlapi_compact_pk_ct_sizes \
--features=integer,internal-keycache
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache
.PHONY: measure_shortint_key_sizes # Measure sizes of bootstrapping and key switching keys for shortint
measure_shortint_key_sizes: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example shortint_key_sizes \
--features=shortint,internal-keycache
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache
.PHONY: measure_boolean_key_sizes # Measure sizes of bootstrapping and key switching keys for boolean
measure_boolean_key_sizes: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example boolean_key_sizes \
--features=boolean,internal-keycache
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache
.PHONY: parse_integer_benches # Run python parser to output a csv containing integer benches data
parse_integer_benches:
@@ -1265,13 +1265,14 @@ parse_integer_benches:
parse_wasm_benchmarks: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example wasm_benchmarks_parser \
--features=shortint,internal-keycache \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
-- wasm_benchmark_results.json
.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
write_params_to_file: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example write_params_to_file --features=boolean,shortint,internal-keycache
--example write_params_to_file \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
clone_backward_compat_data:
@@ -1286,26 +1287,26 @@ tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
.PHONY: regex_engine # Run regex_engine example
regex_engine: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example regex_engine --features=integer \
--example regex_engine \
--features=$(TARGET_ARCH_FEATURE),integer \
-- $(REGEX_STRING) $(REGEX_PATTERN)
.PHONY: dark_market # Run dark market example
dark_market: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example dark_market \
--features=integer,internal-keycache \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
-- fhe-modified fhe-parallel plain fhe
.PHONY: sha256_bool # Run sha256_bool example
sha256_bool: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example sha256_bool --features=boolean
--example sha256_bool \
--features=$(TARGET_ARCH_FEATURE),boolean
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
clippy_all check_compile_tests
# TFHE lints deactivated as it's incompatible with 1.83 - temporary
# tfhe_lints
clippy_all tfhe_lints check_compile_tests
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change

View File

@@ -70,8 +70,22 @@ production-ready library for all the advanced features of TFHE.
### Cargo.toml configuration
To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:
+ For x86_64-based machines running Unix-like OSes:
```toml
tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-unix"] }
```
+ For Apple Silicon or aarch64-based machines running Unix-like OSes:
```toml
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
```
+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
```toml
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
```
> [!Note]

View File

@@ -7,7 +7,14 @@ edition = "2021"
[dependencies]
rayon = { workspace = true }
tfhe = { path = "../../tfhe", features = [ "boolean", "shortint", "integer" ] }
[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
path = "../../tfhe"
features = [ "boolean", "shortint", "integer", "x86_64" ]
[target.'cfg(target_arch = "aarch64")'.dependencies.tfhe]
path = "../../tfhe"
features = [ "boolean", "shortint", "integer", "aarch64-unix" ]
[dev-dependencies]
criterion = { version = "0.5.1", features = [ "html_reports" ]}

View File

@@ -1,6 +1,6 @@
use criterion::Criterion;
use tfhe::prelude::*;
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::prelude::*;
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
@@ -12,12 +12,12 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -63,12 +63,12 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -109,12 +109,12 @@ pub fn kreyvium_shortint_trans(c: &mut Criterion) {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();

View File

@@ -1,6 +1,6 @@
use criterion::Criterion;
use tfhe::prelude::*;
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::prelude::*;
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
@@ -12,12 +12,12 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -63,12 +63,12 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -109,12 +109,12 @@ pub fn trivium_shortint_trans(c: &mut Criterion) {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();

View File

@@ -1,6 +1,6 @@
use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
use tfhe::prelude::*;
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
// Values for these tests come from the github repo renaud1239/Kreyvium,
// commit fd6828f68711276c25f55e605935028f5e843f43
@@ -222,12 +222,12 @@ fn kreyvium_test_shortint_long() {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();

View File

@@ -1,6 +1,6 @@
use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
use tfhe::prelude::*;
use tfhe::shortint::parameters::V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
// file testvectors/trivium-80.80.test-vectors
@@ -358,12 +358,12 @@ fn trivium_test_shortint_long() {
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-cuda-backend"
version = "0.7.0"
version = "0.6.0"
edition = "2021"
authors = ["Zama team"]
license = "BSD-3-Clause-Clear"

View File

@@ -38,7 +38,6 @@ template <typename Torus> struct int_compression {
scratch_packing_keyswitch_lwe_list_to_glwe_64(
streams[0], gpu_indexes[0], &fp_ks_buffer,
compression_params.small_lwe_dimension,
compression_params.glwe_dimension, compression_params.polynomial_size,
num_radix_blocks, true);
}
@@ -65,7 +64,7 @@ template <typename Torus> struct int_decompression {
Torus *tmp_extracted_lwe;
uint32_t *tmp_indexes_array;
int_radix_lut<Torus> *decompression_rescale_lut;
int_radix_lut<Torus> *carry_extract_lut;
int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, int_radix_params encryption_params,
@@ -84,7 +83,7 @@ template <typename Torus> struct int_decompression {
Torus lwe_accumulator_size = (compression_params.glwe_dimension *
compression_params.polynomial_size +
1);
decompression_rescale_lut = new int_radix_lut<Torus>(
carry_extract_lut = new int_radix_lut<Torus>(
streams, gpu_indexes, gpu_count, encryption_params, 1,
num_radix_blocks, allocate_gpu_memory);
@@ -97,28 +96,19 @@ template <typename Torus> struct int_decompression {
num_radix_blocks * lwe_accumulator_size * sizeof(Torus), streams[0],
gpu_indexes[0]);
// Rescale is done using an identity LUT
// Here we do not divide by message_modulus
// Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
// space, we want to keep the original 2 bits value in the 4 bits space,
// so we apply the identity and the encoding will rescale it for us.
auto decompression_rescale_f = [encryption_params](Torus x) -> Torus {
return x;
// Carry extract LUT
auto carry_extract_f = [encryption_params](Torus x) -> Torus {
return x / encryption_params.message_modulus;
};
auto effective_compression_message_modulus =
encryption_params.carry_modulus;
auto effective_compression_carry_modulus = 1;
generate_device_accumulator_with_encoding<Torus>(
streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0],
carry_extract_lut->get_lut(gpu_indexes[0], 0),
encryption_params.glwe_dimension, encryption_params.polynomial_size,
effective_compression_message_modulus,
effective_compression_carry_modulus,
encryption_params.message_modulus, encryption_params.carry_modulus,
decompression_rescale_f);
carry_extract_f);
decompression_rescale_lut->broadcast_lut(streams, gpu_indexes, 0);
carry_extract_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
}
}
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -127,8 +117,8 @@ template <typename Torus> struct int_decompression {
cuda_drop_async(tmp_extracted_lwe, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_indexes_array, streams[0], gpu_indexes[0]);
decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
delete decompression_rescale_lut;
carry_extract_lut->release(streams, gpu_indexes, gpu_count);
delete carry_extract_lut;
}
};
#endif

View File

@@ -46,14 +46,7 @@ void scratch_cuda_apply_univariate_lut_kb_64(
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory);
void scratch_cuda_apply_many_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, bool allocate_gpu_memory);
void cuda_apply_univariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count, void *output_radix_lwe,
@@ -447,41 +440,5 @@ void cleanup_cuda_integer_abs_inplace(void *const *streams,
uint32_t gpu_count,
int8_t **mem_ptr_void);
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory);
void cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
void cleanup_cuda_integer_are_all_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory);
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
} // extern C
#endif // CUDA_INTEGER_H

View File

@@ -21,8 +21,8 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory);
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
bool allocate_gpu_memory);
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, void *glwe_array_out,

View File

@@ -1,358 +0,0 @@
#ifndef CNCRT_FAST_KS_CUH
#define CNCRT_FAST_KS_CUH
#undef NDEBUG
#include <assert.h>
#include "device.h"
#include "gadget.cuh"
#include "helper_multi_gpu.h"
#include "keyswitch.cuh"
#include "polynomial/functions.cuh"
#include "polynomial/polynomial_math.cuh"
#include "torus.cuh"
#include "utils/helper.cuh"
#include "utils/kernel_dimensions.cuh"
#include <thread>
#include <vector>
#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
const int BLOCK_SIZE_GEMM = 64;
const int THREADS_GEMM = 8;
const int BLOCK_SIZE_DECOMP = 8;
template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
}
__host__ inline bool can_use_pks_fast_path(uint32_t lwe_dimension,
uint32_t num_lwe,
uint32_t polynomial_size,
uint32_t level_count,
uint32_t glwe_dimension) {
// TODO: activate it back, fix tests and extend to level_count > 1
return false;
}
// Initialize decomposition by performing rounding
// and decomposing one level of an array of Torus LWEs. Only
// decomposes the mask elements of the incoming LWEs.
template <typename Torus, typename TorusVec>
__global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
uint32_t lwe_dimension,
uint32_t num_lwe, uint32_t base_log,
uint32_t level_count) {
// index of this LWE ct in the buffer
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
// index of the LWE sample in the LWE ct
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
return;
// Input LWE array is [mask_0, .., mask_lwe_dim, message] and
// we only decompose the mask. Thus the stride for reading
// is lwe_dimension + 1, while for writing it is lwe_dimension
auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
Torus a_i = lwe_in[read_val_idx];
Torus state = init_decomposer_state(a_i, base_log, level_count);
Torus mod_b_mask = (1ll << base_log) - 1ll;
lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
}
// Continue decomposiion of an array of Torus elements in place. Supposes
// that the array contains already decomposed elements and
// computes the new decomposed level in place.
template <typename Torus, typename TorusVec>
__global__ void
decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
uint32_t num_lwe, uint32_t base_log,
uint32_t level_count) {
// index of this LWE ct in the buffer
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
// index of the LWE sample in the LWE ct
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
return;
auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
Torus state = buffer_in[val_idx];
Torus mod_b_mask = (1ll << base_log) - 1ll;
buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
}
// Multiply matrices A, B of size (M, K), (K, N) respectively
// with K as the inner dimension.
//
// A block of threads processeds blocks of size (BLOCK_SIZE_GEMM,
// BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
// THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
// BLOCK_SIZE_GEMM)-shaped tiles of values from B.
template <typename Torus, typename TorusVec>
__global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
int stride_B, Torus *C) {
const int BM = BLOCK_SIZE_GEMM;
const int BN = BLOCK_SIZE_GEMM;
const int BK = THREADS_GEMM;
const int TM = THREADS_GEMM;
const uint cRow = blockIdx.y;
const uint cCol = blockIdx.x;
const uint totalResultsBlocktile = BM * BN;
const int threadCol = threadIdx.x % BN;
const int threadRow = threadIdx.x / BN;
// Allocate space for the current block tile in shared memory
__shared__ Torus As[BM * BK];
__shared__ Torus Bs[BK * BN];
// Initialize the pointers to the input blocks from A, B
// Tiles from these blocks are loaded to shared memory
A += cRow * BM * K;
B += cCol * BN;
// Each thread will handle multiple sub-blocks
const uint innerColA = threadIdx.x % BK;
const uint innerRowA = threadIdx.x / BK;
const uint innerColB = threadIdx.x % BN;
const uint innerRowB = threadIdx.x / BN;
// allocate thread-local cache for results in registerfile
Torus threadResults[TM] = {0};
auto row_A = cRow * BM + innerRowA;
auto col_B = cCol * BN + innerColB;
// For each thread, loop over block tiles
for (uint bkIdx = 0; bkIdx < K; bkIdx += BK) {
auto col_A = bkIdx + innerColA;
auto row_B = bkIdx + innerRowB;
if (row_A < M && col_A < K) {
As[innerRowA * BK + innerColA] = A[innerRowA * K + innerColA];
} else {
As[innerRowA * BK + innerColA] = 0;
}
if (col_B < N && row_B < K) {
Bs[innerRowB * BN + innerColB] = B[innerRowB * stride_B + innerColB];
} else {
Bs[innerRowB * BN + innerColB] = 0;
}
__syncthreads();
// Advance blocktile for the next iteration of this loop
A += BK;
B += BK * stride_B;
// calculate per-thread results
for (uint dotIdx = 0; dotIdx < BK; ++dotIdx) {
// we make the dotproduct loop the outside loop, which facilitates
// reuse of the Bs entry, which we can cache in a tmp var.
Torus tmp = Bs[dotIdx * BN + threadCol];
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
threadResults[resIdx] +=
As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
}
}
__syncthreads();
}
// Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
// BLOCK_SIZE_GEMM)
C += cRow * BM * N + cCol * BN;
// write out the results
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
int outRow = cRow * BM + threadRow * TM + resIdx;
int outCol = cCol * BN + threadCol;
if (outRow >= M)
continue;
if (outCol >= N)
continue;
C[(threadRow * TM + resIdx) * N + threadCol] += threadResults[resIdx];
}
}
// Finish the keyswitching operation and prepare GLWEs for accumulation.
// 1. Finish the keyswitching computation partially performed with a GEMM:
// - negate the dot product between the GLWE and KSK polynomial
// - add the GLWE message for the N-th polynomial coeff in the message poly
// 2. Rotate each of the GLWE . KSK poly dot products to
// prepare them for accumulation into a single GLWE
template <typename Torus>
__global__ void polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C(
Torus *in_glwe_buffer, Torus *out_glwe_buffer, Torus const *lwe_array,
uint32_t lwe_dimension, uint32_t num_glwes, uint32_t polynomial_size,
uint32_t glwe_dimension) {
uint32_t glwe_id = blockIdx.x * blockDim.x + threadIdx.x;
uint32_t degree = glwe_id; // lwe 0 rotate 0, lwe 1 rotate 1, .. , lwe
// poly_size-1 rotate poly_size-1
uint32_t coeffIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (glwe_id >= num_glwes)
return;
if (coeffIdx >= polynomial_size)
return;
auto in_poly =
in_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
auto out_result =
out_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
if (coeffIdx == 0) {
// Add the message value of the input LWE (`C`) to the N-th coefficient
// in the GLWE . KSK dot product
// The C is added to the first position of the last polynomial in the GLWE
// which has (glwe_dimension+1) polynomials
// The C value is extracted as the last value of the LWE ct. (of index
// glwe_id) the LWEs have (polynomial_size + 1) values
in_poly[polynomial_size * glwe_dimension] =
lwe_array[glwe_id * (lwe_dimension + 1) + lwe_dimension] -
in_poly[polynomial_size * glwe_dimension];
for (int gi = 1; gi < glwe_dimension; ++gi)
in_poly[coeffIdx + gi * polynomial_size] =
-in_poly[coeffIdx + gi * polynomial_size];
} else {
// Otherwise simply negate the input coefficient
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
in_poly[coeffIdx + gi * polynomial_size] =
-in_poly[coeffIdx + gi * polynomial_size];
}
// Negate all the coefficients for rotation for the first poly
in_poly[coeffIdx] = -in_poly[coeffIdx];
// rotate the body
polynomial_accumulate_monic_monomial_mul<Torus>(
out_result, in_poly, degree, coeffIdx, polynomial_size, 1, true);
// rotate the mask too
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
polynomial_accumulate_monic_monomial_mul<Torus>(
out_result + gi * polynomial_size, in_poly + gi * polynomial_size,
degree, coeffIdx, polynomial_size, 1, true);
}
template <typename Torus, typename TorusVec>
__host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t base_log, uint32_t level_count, uint32_t num_lwes) {
// Optimization of packing keyswitch when packing many LWEs
if (level_count > 1) {
PANIC("Fast path PKS only supports level_count==1");
}
cudaSetDevice(gpu_index);
check_cuda_error(cudaGetLastError());
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
// The fast path of PKS uses the scratch buffer (d_mem) differently than the
// old path: it needs to store the decomposed masks in the first half of this
// buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
// scratch buffer for the fast path must determine the half-size of the
// scratch buffer as the max between the size of the GLWE and the size of the
// LWE-mask
int memory_unit = glwe_accumulator_size > lwe_dimension
? glwe_accumulator_size
: lwe_dimension;
// ping pong the buffer between successive calls
// split the buffer in two parts of this size
auto d_mem_0 = (Torus *)fp_ks_buffer;
auto d_mem_1 = d_mem_0 + num_lwes * memory_unit;
// Set the scratch buffer to 0 as it is used to accumulate
// decomposition temporary results
cuda_memset_async(d_mem_1, 0, num_lwes * memory_unit * sizeof(Torus), stream,
gpu_index);
check_cuda_error(cudaGetLastError());
// decompose LWEs
// don't decompose LWE body - the LWE has lwe_size + 1 elements. The last
// element, the body is ignored by rounding down the number of blocks assuming
// here that the LWE dimension is a multiple of the block size
dim3 grid_decomp(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
CEIL_DIV(lwe_dimension, BLOCK_SIZE_DECOMP));
dim3 threads_decomp(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
// decompose first level
decompose_vectorize_init<Torus, TorusVec>
<<<grid_decomp, threads_decomp, 0, stream>>>(lwe_array_in, d_mem_0,
lwe_dimension, num_lwes,
base_log, level_count);
check_cuda_error(cudaGetLastError());
// gemm to ks the individual LWEs to GLWEs
dim3 grid_gemm(CEIL_DIV(glwe_accumulator_size, BLOCK_SIZE_GEMM),
CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);
auto stride_KSK_buffer = glwe_accumulator_size;
uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0, fp_ksk_array,
stride_KSK_buffer, d_mem_1);
check_cuda_error(cudaGetLastError());
/*
TODO: transpose key to generalize to level_count > 1
for (int li = 1; li < level_count; ++li) {
decompose_vectorize_step_inplace<Torus, TorusVec>
<<<grid_decomp, threads_decomp, 0, stream>>>(
d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
check_cuda_error(cudaGetLastError());
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size,
stream>>>( num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
check_cuda_error(cudaGetLastError());
}
*/
// should we include the mask in the rotation ??
dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
CEIL_DIV(polynomial_size, BLOCK_SIZE_DECOMP));
dim3 threads_rotate(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
// rotate the GLWEs
polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C<Torus>
<<<grid_rotate, threads_rotate, 0, stream>>>(
d_mem_1, d_mem_0, lwe_array_in, lwe_dimension, num_lwes,
polynomial_size, glwe_dimension);
check_cuda_error(cudaGetLastError());
dim3 grid_accumulate(
CEIL_DIV(polynomial_size * (glwe_dimension + 1), BLOCK_SIZE_DECOMP));
dim3 threads_accum(BLOCK_SIZE_DECOMP);
// accumulate to a single glwe
accumulate_glwes<Torus><<<grid_accumulate, threads_accum, 0, stream>>>(
glwe_out, d_mem_0, glwe_dimension, polynomial_size, num_lwes);
check_cuda_error(cudaGetLastError());
}
#endif

View File

@@ -1,8 +1,6 @@
#include "fast_packing_keyswitch.cuh"
#include "keyswitch.cuh"
#include "keyswitch.h"
#include <cstdint>
#include <stdio.h>
/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
* Head out to the equivalent operation on 64 bits for more details.
@@ -55,17 +53,15 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory) {
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
bool allocate_gpu_memory) {
scratch_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer, lwe_dimension,
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer,
glwe_dimension, polynomial_size, num_lwes, allocate_gpu_memory);
}
/* Perform functional packing keyswitch on a batch of 64 bits input LWE
* ciphertexts.
*/
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, void *glwe_array_out,
void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
@@ -73,24 +69,13 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_lwes) {
if (can_use_pks_fast_path(input_lwe_dimension, num_lwes,
output_polynomial_size, level_count,
output_glwe_dimension)) {
host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
} else
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
}
void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,

View File

@@ -158,20 +158,16 @@ void execute_keyswitch_async(cudaStream_t const *streams,
template <typename Torus>
__host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory) {
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
bool allocate_gpu_memory) {
cudaSetDevice(gpu_index);
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
int memory_unit = glwe_accumulator_size > lwe_dimension
? glwe_accumulator_size
: lwe_dimension;
if (allocate_gpu_memory) {
if (allocate_gpu_memory)
*fp_ks_buffer = (int8_t *)cuda_malloc_async(
2 * num_lwes * memory_unit * sizeof(Torus), stream, gpu_index);
}
2 * num_lwes * glwe_accumulator_size * sizeof(Torus), stream,
gpu_index);
}
// public functional packing keyswitch for a single LWE ciphertext
@@ -245,7 +241,6 @@ __global__ void packing_keyswitch_lwe_list_to_glwe(
auto lwe_in = lwe_array_in + input_id * lwe_size;
auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
// KS LWE to GLWE
packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
@@ -298,18 +293,8 @@ __host__ void host_packing_keyswitch_lwe_list_to_glwe(
dim3 grid(num_blocks, num_lwes);
dim3 threads(num_threads);
// The fast path of PKS uses the scratch buffer (d_mem) differently:
// it needs to store the decomposed masks in the first half of this buffer
// and the keyswitched GLWEs in the second half of the buffer. Thus the
// scratch buffer for the fast path must determine the half-size of the
// scratch buffer as the max between the size of the GLWE and the size of the
// LWE-mask
int memory_unit = glwe_accumulator_size > lwe_dimension_in
? glwe_accumulator_size
: lwe_dimension_in;
auto d_mem = (Torus *)fp_ks_buffer;
auto d_tmp_glwe_array_out = d_mem + num_lwes * memory_unit;
auto d_tmp_glwe_array_out = d_mem + num_lwes * glwe_accumulator_size;
// individually keyswitch each lwe
packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(

View File

@@ -37,32 +37,39 @@ __host__ void host_integer_radix_cmux_kb(
uint32_t num_radix_blocks) {
auto params = mem_ptr->params;
Torus lwe_size = params.big_lwe_dimension + 1;
Torus radix_lwe_size = lwe_size * num_radix_blocks;
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in, lwe_array_true,
radix_lwe_size * sizeof(Torus), streams[0],
gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in + radix_lwe_size,
lwe_array_false, radix_lwe_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
cuda_memcpy_async_gpu_to_gpu(mem_ptr->condition_array + i * lwe_size,
lwe_condition, lwe_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
// Since our CPU threads will be working on different streams we shall assert
// the work in the main stream is completed
auto true_streams = mem_ptr->zero_if_true_buffer->true_streams;
auto false_streams = mem_ptr->zero_if_false_buffer->false_streams;
for (uint j = 0; j < gpu_count; j++) {
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
}
auto mem_true = mem_ptr->zero_if_true_buffer;
zero_out_if<Torus>(true_streams, gpu_indexes, gpu_count, mem_ptr->tmp_true_ct,
lwe_array_true, lwe_condition, mem_true,
mem_ptr->inverted_predicate_lut, bsks, ksks,
num_radix_blocks);
auto mem_false = mem_ptr->zero_if_false_buffer;
zero_out_if<Torus>(false_streams, gpu_indexes, gpu_count,
mem_ptr->tmp_false_ct, lwe_array_false, lwe_condition,
mem_false, mem_ptr->predicate_lut, bsks, ksks,
num_radix_blocks);
for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
}
for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
}
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
mem_ptr->condition_array, bsks, ksks, 2 * num_radix_blocks,
mem_ptr->predicate_lut, params.message_modulus);
// If the condition was true, true_ct will have kept its value and false_ct
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
// have kept its value
auto mem_true = mem_ptr->buffer_out;
auto mem_false = &mem_ptr->buffer_out[radix_lwe_size];
auto added_cts = mem_true;
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
mem_false, params.big_lwe_dimension, num_radix_blocks);
auto added_cts = mem_ptr->tmp_true_ct;
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts,
mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
params.big_lwe_dimension, num_radix_blocks);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,

View File

@@ -58,9 +58,6 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
case GE:
case LT:
case LE:
if (num_radix_blocks % 2 != 0)
PANIC("Cuda error (comparisons): the number of radix blocks has to be "
"even.")
host_integer_radix_difference_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
@@ -71,8 +68,6 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
break;
case MAX:
case MIN:
if (num_radix_blocks % 2 != 0)
PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
host_integer_radix_maxmin_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
@@ -94,91 +89,3 @@ void cleanup_cuda_integer_comparison(void *const *streams,
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus);
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
false, allocate_gpu_memory);
}
void cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
int_comparison_buffer<uint64_t> *buffer =
(int_comparison_buffer<uint64_t> *)mem_ptr;
host_integer_are_all_comparisons_block_true_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
(uint64_t **)(ksks), num_radix_blocks);
}
void cleanup_cuda_integer_are_all_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void) {
int_comparison_buffer<uint64_t> *mem_ptr =
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus);
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
false, allocate_gpu_memory);
}
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
int_comparison_buffer<uint64_t> *buffer =
(int_comparison_buffer<uint64_t> *)mem_ptr;
host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
(uint64_t **)(ksks), num_radix_blocks);
}
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void) {
int_comparison_buffer<uint64_t> *mem_ptr =
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}

View File

@@ -58,7 +58,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
template <typename Torus>
__host__ void are_all_comparisons_block_true(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
@@ -85,19 +85,16 @@ __host__ void are_all_comparisons_block_true(
while (remaining_blocks > 0) {
// Split in max_value chunks
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
uint32_t chunk_length = std::min(max_value, remaining_blocks);
int num_chunks = remaining_blocks / chunk_length;
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
// as in the worst case we will be adding `max_value` ones
auto input_blocks = tmp_out;
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
uint32_t chunk_lengths[num_chunks];
auto begin_remaining_blocks = remaining_blocks;
auto is_equal_to_num_blocks_map =
&are_all_block_true_buffer->is_equal_to_lut_map;
for (int i = 0; i < num_chunks; i++) {
uint32_t chunk_length =
std::min(max_value, begin_remaining_blocks - i * max_value);
chunk_lengths[i] = chunk_length;
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
input_blocks, big_lwe_dimension,
chunk_length);
@@ -114,33 +111,29 @@ __host__ void are_all_comparisons_block_true(
// is_non_zero_lut_buffer LUT
lut = mem_ptr->eq_buffer->is_non_zero_lut;
} else {
if (chunk_lengths[num_chunks - 1] != max_value) {
if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
(*is_equal_to_num_blocks_map).end()) {
// The LUT is already computed
lut = (*is_equal_to_num_blocks_map)[chunk_length];
} else {
// LUT needs to be computed
uint32_t chunk_length = chunk_lengths[num_chunks - 1];
auto new_lut =
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
max_value, num_radix_blocks, true);
auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
return x == chunk_length;
};
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
streams[0], gpu_indexes[0], new_lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
is_equal_to_num_blocks_lut_f);
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
for (int index = 0; index < num_chunks; index++) {
if (index == num_chunks - 1) {
h_lut_indexes[index] = 1;
} else {
h_lut_indexes[index] = 0;
}
}
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
h_lut_indexes, num_chunks * sizeof(Torus),
streams[0], gpu_indexes[0]);
is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
free(h_lut_indexes);
new_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
(*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
lut = new_lut;
}
lut = is_max_value_lut;
}
// Applies the LUT
@@ -167,7 +160,7 @@ __host__ void are_all_comparisons_block_true(
template <typename Torus>
__host__ void is_at_least_one_comparisons_block_true(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
@@ -189,18 +182,14 @@ __host__ void is_at_least_one_comparisons_block_true(
uint32_t remaining_blocks = num_radix_blocks;
while (remaining_blocks > 0) {
// Split in max_value chunks
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
uint32_t chunk_length = std::min(max_value, remaining_blocks);
int num_chunks = remaining_blocks / chunk_length;
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
// as in the worst case we will be adding `max_value` ones
auto input_blocks = mem_ptr->tmp_lwe_array_out;
auto accumulator = buffer->tmp_block_accumulated;
uint32_t chunk_lengths[num_chunks];
auto begin_remaining_blocks = remaining_blocks;
for (int i = 0; i < num_chunks; i++) {
uint32_t chunk_length =
std::min(max_value, begin_remaining_blocks - i * max_value);
chunk_lengths[i] = chunk_length;
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
input_blocks, big_lwe_dimension,
chunk_length);
@@ -460,9 +449,9 @@ __host__ void tree_sign_reduction(
f = sign_handler_f;
}
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus, f);
last_lut->broadcast_lut(streams, gpu_indexes, 0);
streams[0], gpu_indexes[0], last_lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus, f);
last_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
// Last leaf
integer_radix_apply_univariate_lookup_table_kb<Torus>(
@@ -492,9 +481,8 @@ __host__ void host_integer_radix_difference_check_kb(
if (carry_modulus >= message_modulus) {
// Packing is possible
// Pack inputs
Torus *packed_left = diff_buffer->tmp_packed;
Torus *packed_right =
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
Torus *packed_left = diff_buffer->tmp_packed_left;
Torus *packed_right = diff_buffer->tmp_packed_right;
// In case the ciphertext is signed, the sign block and the one before it
// are handled separately
if (mem_ptr->is_signed) {
@@ -513,7 +501,10 @@ __host__ void host_integer_radix_difference_check_kb(
auto identity_lut = mem_ptr->identity_lut;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
2 * packed_num_radix_blocks, identity_lut);
packed_num_radix_blocks, identity_lut);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
packed_num_radix_blocks, identity_lut);
lhs = packed_left;
rhs = packed_right;
@@ -542,13 +533,11 @@ __host__ void host_integer_radix_difference_check_kb(
// Compare the last block before the sign block separately
auto identity_lut = mem_ptr->identity_lut;
Torus *packed_left = diff_buffer->tmp_packed;
Torus *packed_right =
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
Torus *last_left_block_before_sign_block =
packed_left + packed_num_radix_blocks * big_lwe_size;
diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
Torus *last_right_block_before_sign_block =
packed_right + packed_num_radix_blocks * big_lwe_size;
diff_buffer->tmp_packed_right +
packed_num_radix_blocks * big_lwe_size;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
@@ -626,35 +615,4 @@ __host__ void host_integer_radix_maxmin_kb(
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
}
template <typename Torus>
__host__ void host_integer_are_all_comparisons_block_true_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
auto eq_buffer = mem_ptr->eq_buffer;
// It returns a block encrypting 1 if all input blocks are 1
// otherwise the block encrypts 0
are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
lwe_array_out, lwe_array_in, mem_ptr,
bsks, ksks, num_radix_blocks);
}
template <typename Torus>
__host__ void host_integer_is_at_least_one_comparisons_block_true_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
auto eq_buffer = mem_ptr->eq_buffer;
// It returns a block encrypting 1 if all input blocks are 1
// otherwise the block encrypts 0
is_at_least_one_comparisons_block_true<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_in, mem_ptr,
bsks, ksks, num_radix_blocks);
}
#endif

View File

@@ -2,7 +2,6 @@
#define CUDA_INTEGER_COMPRESSION_CUH
#include "ciphertext.h"
#include "crypto/fast_packing_keyswitch.cuh"
#include "crypto/keyswitch.cuh"
#include "device.h"
#include "integer/compression/compression.h"
@@ -117,21 +116,11 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
while (rem_lwes > 0) {
auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);
if (can_use_pks_fast_path(
input_lwe_dimension, chunk_size, compression_params.polynomial_size,
compression_params.ks_level, compression_params.glwe_dimension)) {
host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
} else {
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
}
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
rem_lwes -= chunk_size;
lwe_subset += chunk_size * lwe_in_size;
@@ -311,7 +300,7 @@ __host__ void host_integer_decompress(
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
/// dimension to a big LWE dimension
auto encryption_params = h_mem_ptr->encryption_params;
auto lut = h_mem_ptr->decompression_rescale_lut;
auto lut = h_mem_ptr->carry_extract_lut;
auto active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
if (active_gpu_count == 1) {
execute_pbs_async<Torus>(

View File

@@ -198,27 +198,6 @@ void scratch_cuda_apply_univariate_lut_kb_64(
allocate_gpu_memory);
}
void scratch_cuda_apply_many_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus);
scratch_cuda_apply_many_univariate_lut_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
num_many_lut, allocate_gpu_memory);
}
void cuda_apply_univariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count, void *output_radix_lwe,
@@ -258,7 +237,7 @@ void cuda_apply_many_univariate_lut_kb_64(
void scratch_cuda_apply_bivariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
int8_t **mem_ptr, void *input_lut, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
@@ -272,9 +251,8 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
scratch_cuda_apply_bivariate_lut_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
allocate_gpu_memory);
(int_radix_lut<uint64_t> **)mem_ptr, static_cast<uint64_t *>(input_lut),
num_radix_blocks, params, allocate_gpu_memory);
}
void cuda_apply_bivariate_lut_kb_64(

View File

@@ -627,48 +627,26 @@ void rotate_left(Torus *buffer, int mid, uint32_t array_length) {
std::rotate(buffer, buffer + mid, buffer + array_length);
}
/// Caller needs to ensure that the operation applied is coherent from an
/// encoding perspective.
///
/// For example:
///
/// Input encoding has 2 bits and output encoding has 4 bits, applying the
/// identity lut would map the following:
///
/// 0|00|xx -> 0|00|00
/// 0|01|xx -> 0|00|01
/// 0|10|xx -> 0|00|10
/// 0|11|xx -> 0|00|11
///
/// The reason is the identity function is computed in the input space but the
/// scaling is done in the output space, as there are more bits in the output
/// space, the delta is smaller hence the apparent "division" happening.
template <typename Torus>
void generate_lookup_table_with_encoding(Torus *acc, uint32_t glwe_dimension,
uint32_t polynomial_size,
uint32_t input_message_modulus,
uint32_t input_carry_modulus,
uint32_t output_message_modulus,
uint32_t output_carry_modulus,
std::function<Torus(Torus)> f) {
void generate_lookup_table(Torus *acc, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t message_modulus,
uint32_t carry_modulus,
std::function<Torus(Torus)> f) {
uint32_t input_modulus_sup = input_message_modulus * input_carry_modulus;
uint32_t output_modulus_sup = output_message_modulus * output_carry_modulus;
uint32_t box_size = polynomial_size / input_modulus_sup;
auto nbits = sizeof(Torus) * 8;
Torus output_delta =
(static_cast<Torus>(1) << (nbits - 1)) / output_modulus_sup;
uint32_t modulus_sup = message_modulus * carry_modulus;
uint32_t box_size = polynomial_size / modulus_sup;
Torus delta = (1ul << 63) / modulus_sup;
memset(acc, 0, glwe_dimension * polynomial_size * sizeof(Torus));
auto body = &acc[glwe_dimension * polynomial_size];
// This accumulator extracts the carry bits
for (int i = 0; i < input_modulus_sup; i++) {
for (int i = 0; i < modulus_sup; i++) {
int index = i * box_size;
for (int j = index; j < index + box_size; j++) {
auto f_eval = f(i);
body[j] = f_eval * output_delta;
body[j] = f_eval * delta;
}
}
@@ -682,16 +660,6 @@ void generate_lookup_table_with_encoding(Torus *acc, uint32_t glwe_dimension,
rotate_left<Torus>(body, half_box_size, polynomial_size);
}
template <typename Torus>
void generate_lookup_table(Torus *acc, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t message_modulus,
uint32_t carry_modulus,
std::function<Torus(Torus)> f) {
generate_lookup_table_with_encoding(acc, glwe_dimension, polynomial_size,
message_modulus, carry_modulus,
message_modulus, carry_modulus, f);
}
template <typename Torus>
void generate_many_lookup_table(
Torus *acc, uint32_t glwe_dimension, uint32_t polynomial_size,
@@ -700,8 +668,7 @@ void generate_many_lookup_table(
uint32_t modulus_sup = message_modulus * carry_modulus;
uint32_t box_size = polynomial_size / modulus_sup;
auto nbits = sizeof(Torus) * 8;
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) / modulus_sup;
Torus delta = (1ul << 63) / modulus_sup;
memset(acc, 0, glwe_dimension * polynomial_size * sizeof(Torus));
@@ -836,32 +803,6 @@ void generate_device_accumulator_bivariate_with_factor(
free(h_lut);
}
template <typename Torus>
void generate_device_accumulator_with_encoding(
cudaStream_t stream, uint32_t gpu_index, Torus *acc,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t input_message_modulus, uint32_t input_carry_modulus,
uint32_t output_message_modulus, uint32_t output_carry_modulus,
std::function<Torus(Torus)> f) {
// host lut
Torus *h_lut =
(Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus));
// fill accumulator
generate_lookup_table_with_encoding<Torus>(
h_lut, glwe_dimension, polynomial_size, input_message_modulus,
input_carry_modulus, output_message_modulus, output_carry_modulus, f);
// copy host lut and lut_indexes_vec to device
cuda_memcpy_async_to_gpu(
acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus),
stream, gpu_index);
cuda_synchronize_stream(stream, gpu_index);
free(h_lut);
}
/*
* generate accumulator for device pointer
* v_stream - cuda stream
@@ -877,9 +818,21 @@ void generate_device_accumulator(cudaStream_t stream, uint32_t gpu_index,
uint32_t carry_modulus,
std::function<Torus(Torus)> f) {
generate_device_accumulator_with_encoding(
stream, gpu_index, acc, glwe_dimension, polynomial_size, message_modulus,
carry_modulus, message_modulus, carry_modulus, f);
// host lut
Torus *h_lut =
(Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus));
// fill accumulator
generate_lookup_table<Torus>(h_lut, glwe_dimension, polynomial_size,
message_modulus, carry_modulus, f);
// copy host lut and lut_indexes_vec to device
cuda_memcpy_async_to_gpu(
acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus),
stream, gpu_index);
cuda_synchronize_stream(stream, gpu_index);
free(h_lut);
}
/*
@@ -1102,8 +1055,7 @@ void host_compute_propagation_simulators_and_group_carries(
message_modulus, carry_modulus);
uint32_t modulus_sup = message_modulus * carry_modulus;
auto nbits = sizeof(Torus) * 8;
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) / modulus_sup;
Torus delta = (1ull << 63) / modulus_sup;
auto simulators = mem->simulators;
auto grouping_pgns = mem->grouping_pgns;
host_radix_split_simulators_and_grouping_pgns<Torus>(
@@ -1430,8 +1382,8 @@ __host__ void
create_trivial_radix(cudaStream_t stream, uint32_t gpu_index,
Torus *lwe_array_out, Torus const *scalar_array,
uint32_t lwe_dimension, uint32_t num_radix_blocks,
uint32_t num_scalar_blocks, Torus message_modulus,
Torus carry_modulus) {
uint32_t num_scalar_blocks, uint64_t message_modulus,
uint64_t carry_modulus) {
cudaSetDevice(gpu_index);
size_t radix_size = (lwe_dimension + 1) * num_radix_blocks;
@@ -1451,9 +1403,7 @@ create_trivial_radix(cudaStream_t stream, uint32_t gpu_index,
// Value of the shift we multiply our messages by
// If message_modulus and carry_modulus are always powers of 2 we can simplify
// this
auto nbits = sizeof(Torus) * 8;
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) /
(message_modulus * carry_modulus);
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
device_create_trivial_radix<Torus><<<grid, thds, 0, stream>>>(
lwe_array_out, scalar_array, num_scalar_blocks, lwe_dimension, delta);
@@ -1513,10 +1463,10 @@ reduce_signs(cudaStream_t const *streams, uint32_t const *gpu_indexes,
if (num_sign_blocks > 2) {
auto lut = diff_buffer->reduce_signs_lut;
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus,
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
reduce_two_orderings_function);
lut->broadcast_lut(streams, gpu_indexes, 0);
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
while (num_sign_blocks > 2) {
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
@@ -1547,9 +1497,10 @@ reduce_signs(cudaStream_t const *streams, uint32_t const *gpu_indexes,
auto lut = diff_buffer->reduce_signs_lut;
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus, final_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
final_lut_f);
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
pack_blocks<Torus>(streams[0], gpu_indexes[0], signs_b, signs_a,
big_lwe_dimension, 2, 4);
@@ -1566,9 +1517,10 @@ reduce_signs(cudaStream_t const *streams, uint32_t const *gpu_indexes,
auto lut = mem_ptr->diff_buffer->reduce_signs_lut;
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus, final_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
final_lut_f);
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, signs_array_out, signs_a, bsks, ksks,
@@ -1587,11 +1539,11 @@ void scratch_cuda_apply_univariate_lut_kb(
1, num_radix_blocks, allocate_gpu_memory);
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
// 0
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
(params.glwe_dimension + 1) *
params.polynomial_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
cuda_memcpy_async_to_gpu(
(*mem_ptr)->get_lut(gpu_indexes[0], 0), (void *)input_lut,
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
}
template <typename Torus>
@@ -1607,25 +1559,6 @@ void host_apply_univariate_lut_kb(cudaStream_t const *streams,
num_blocks, mem);
}
template <typename Torus>
void scratch_cuda_apply_many_univariate_lut_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, int_radix_lut<Torus> **mem_ptr, Torus const *input_lut,
uint32_t num_radix_blocks, int_radix_params params, uint32_t num_many_lut,
bool allocate_gpu_memory) {
*mem_ptr = new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
1, num_radix_blocks, num_many_lut,
allocate_gpu_memory);
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
// 0
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
(params.glwe_dimension + 1) *
params.polynomial_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
}
template <typename Torus>
void host_apply_many_univariate_lut_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -1649,11 +1582,11 @@ void scratch_cuda_apply_bivariate_lut_kb(
1, num_radix_blocks, allocate_gpu_memory);
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
// 0
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
(params.glwe_dimension + 1) *
params.polynomial_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
cuda_memcpy_async_to_gpu(
(*mem_ptr)->get_lut(gpu_indexes[0], 0), (void *)input_lut,
(params.glwe_dimension + 1) * params.polynomial_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
}
template <typename Torus>
@@ -1693,12 +1626,13 @@ void host_propagate_single_carry(cudaStream_t const *streams,
auto params = mem->params;
auto glwe_dimension = params.glwe_dimension;
auto polynomial_size = params.polynomial_size;
auto message_modulus = params.message_modulus;
auto carry_modulus = params.carry_modulus;
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
auto lut_stride = mem->lut_stride;
auto num_many_lut = mem->num_many_lut;
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
if (requested_flag == outputFlag::FLAG_OVERFLOW)
PANIC("Cuda error: single carry propagation is not supported for overflow, "
"try using add_and_propagate_single_carry");
@@ -1715,7 +1649,7 @@ void host_propagate_single_carry(cudaStream_t const *streams,
if (requested_flag == outputFlag::FLAG_CARRY) {
cuda_memcpy_async_gpu_to_gpu(
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
}
// Step 2
@@ -1735,40 +1669,45 @@ void host_propagate_single_carry(cudaStream_t const *streams,
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
requested_flag == outputFlag::FLAG_CARRY) {
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
mem->output_flag,
mem->prop_simu_group_carries_mem->simulators +
(num_radix_blocks - 1) * big_lwe_size,
big_lwe_dimension, 1);
}
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
// Step 3
// Add carries and cleanup OutputFlag::None
host_radix_sum_in_groups<Torus>(
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
big_lwe_size, group_size);
auto message_extract = mem->lut_message_extract;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_1, gpu_indexes, gpu_count, lwe_array, prepared_blocks,
bsks, ksks, num_radix_blocks, message_extract);
if (requested_flag == outputFlag::FLAG_CARRY) {
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
mem->output_flag, mem->output_flag,
mem->prop_simu_group_carries_mem->resolved_carries +
(mem->num_groups - 1) * big_lwe_size,
big_lwe_dimension, 1);
cuda_memcpy_async_gpu_to_gpu(
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
cuda_memcpy_async_gpu_to_gpu(lwe_array, mem->output_flag,
big_lwe_size_bytes * num_radix_blocks,
streams[0], gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
} else {
auto message_extract = mem->lut_message_extract;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array, prepared_blocks, bsks, ksks,
num_radix_blocks, message_extract);
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
big_lwe_size_bytes, mem->sub_streams_2[0],
gpu_indexes[0]);
}
for (int j = 0; j < mem->active_gpu_count; j++) {
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
}
}
@@ -1784,12 +1723,13 @@ void host_add_and_propagate_single_carry(
auto params = mem->params;
auto glwe_dimension = params.glwe_dimension;
auto polynomial_size = params.polynomial_size;
auto message_modulus = params.message_modulus;
auto carry_modulus = params.carry_modulus;
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
auto lut_stride = mem->lut_stride;
auto num_many_lut = mem->num_many_lut;
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
cuda_memcpy_async_gpu_to_gpu(
@@ -1816,12 +1756,12 @@ void host_add_and_propagate_single_carry(
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
auto lut_overflow_prep = mem->lut_overflow_flag_prep;
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, output_flag, mem->last_lhs,
streams, gpu_indexes, gpu_count, mem->output_flag, mem->last_lhs,
mem->last_rhs, bsks, ksks, 1, lut_overflow_prep,
lut_overflow_prep->params.message_modulus);
} else if (requested_flag == outputFlag::FLAG_CARRY) {
cuda_memcpy_async_gpu_to_gpu(
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
}
@@ -1842,50 +1782,58 @@ void host_add_and_propagate_single_carry(
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
requested_flag == outputFlag::FLAG_CARRY) {
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
mem->output_flag,
mem->prop_simu_group_carries_mem->simulators +
(num_radix_blocks - 1) * big_lwe_size,
big_lwe_dimension, 1);
}
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
// Step 3
// Add carries and cleanup OutputFlag::None
host_radix_sum_in_groups<Torus>(
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
big_lwe_size, group_size);
auto message_extract = mem->lut_message_extract;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_1, gpu_indexes, gpu_count, lhs_array, prepared_blocks,
bsks, ksks, num_radix_blocks, message_extract);
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
requested_flag == outputFlag::FLAG_CARRY) {
if (num_radix_blocks == 1 && requested_flag == outputFlag::FLAG_OVERFLOW &&
uses_carry == 1) {
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
input_carries, big_lwe_dimension, 1);
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
mem->output_flag, mem->output_flag, input_carries,
big_lwe_dimension, 1);
} else {
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
mem->output_flag, mem->output_flag,
mem->prop_simu_group_carries_mem->resolved_carries +
(mem->num_groups - 1) * big_lwe_size,
big_lwe_dimension, 1);
}
cuda_memcpy_async_gpu_to_gpu(
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
cuda_memcpy_async_gpu_to_gpu(lhs_array, mem->output_flag,
big_lwe_size_bytes * num_radix_blocks,
streams[0], gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
} else {
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lhs_array, prepared_blocks, bsks, ksks,
num_radix_blocks, mem->lut_message_extract);
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
mem->output_flag, bsks, ksks, 1, mem->lut_overflow_flag_last);
} else {
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
}
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
big_lwe_size_bytes, mem->sub_streams_2[0],
gpu_indexes[0]);
}
for (int j = 0; j < mem->active_gpu_count; j++) {
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
}
}

View File

@@ -267,8 +267,8 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
streams, gpu_indexes, gpu_count, mem_ptr->params, 2,
2 * ch_amount * num_blocks, reused_lut);
}
auto message_acc = luts_message_carry->get_lut(0, 0);
auto carry_acc = luts_message_carry->get_lut(0, 1);
auto message_acc = luts_message_carry->get_lut(gpu_indexes[0], 0);
auto carry_acc = luts_message_carry->get_lut(gpu_indexes[0], 1);
// define functions for each accumulator
auto lut_f_message = [message_modulus](Torus x) -> Torus {
@@ -285,7 +285,7 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], carry_acc, glwe_dimension, polynomial_size,
message_modulus, carry_modulus, lut_f_carry);
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
luts_message_carry->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
while (r > 2) {
size_t cur_total_blocks = r * num_blocks;
@@ -334,10 +334,10 @@ __host__ void host_integer_partial_sum_ciphertexts_vec_kb(
if (carry_count > 0)
cuda_set_value_async<Torus>(
streams[0], gpu_indexes[0],
luts_message_carry->get_lut_indexes(0, message_count), 1,
luts_message_carry->get_lut_indexes(gpu_indexes[0], message_count), 1,
carry_count);
luts_message_carry->broadcast_lut(streams, gpu_indexes, 0);
luts_message_carry->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
/// For multi GPU execution we create vectors of pointers for inputs and
/// outputs

View File

@@ -31,10 +31,10 @@ __host__ void host_integer_radix_scalar_bitop_kb(
} else {
// We have all possible LUTs pre-computed and we use the decomposed scalar
// as index to recover the right one
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(0, 0), clear_blocks,
num_clear_blocks * sizeof(Torus), streams[0],
gpu_indexes[0]);
lut->broadcast_lut(streams, gpu_indexes, 0);
cuda_memcpy_async_gpu_to_gpu(lut->get_lut_indexes(gpu_indexes[0], 0),
clear_blocks, num_clear_blocks * sizeof(Torus),
streams[0], gpu_indexes[0]);
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_input, bsks,

View File

@@ -22,9 +22,6 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
case GE:
case LT:
case LE:
if (lwe_ciphertext_count % 2 != 0)
PANIC("Cuda error (scalar comparisons): the number of radix blocks has "
"to be even.")
host_integer_radix_scalar_difference_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
@@ -35,9 +32,6 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
break;
case MAX:
case MIN:
if (lwe_ciphertext_count % 2 != 0)
PANIC("Cuda error (scalar max/min): the number of radix blocks has to be "
"even.")
host_integer_radix_scalar_maxmin_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),

View File

@@ -110,11 +110,11 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
};
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
generate_device_accumulator<Torus>(streams[0], gpu_indexes[0],
lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus,
carry_modulus, scalar_last_leaf_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
scalar_last_leaf_lut_f);
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out,
@@ -141,9 +141,8 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
//////////////
// lsb
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks,
@@ -195,10 +194,10 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
auto lut = diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
generate_device_accumulator_bivariate<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus,
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
scalar_bivariate_last_leaf_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_lsb_out,
@@ -211,9 +210,8 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
uint32_t num_lsb_radix_blocks = total_num_radix_blocks;
uint32_t num_scalar_blocks = total_num_scalar_blocks;
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
pack_blocks<Torus>(streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks,
@@ -331,10 +329,10 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
auto lut = mem_ptr->diff_buffer->tree_buffer->tree_last_leaf_scalar_lut;
generate_device_accumulator_bivariate<Torus>(
streams[0], gpu_indexes[0], lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus,
streams[0], gpu_indexes[0], lut->get_lut(gpu_indexes[0], 0),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
scalar_bivariate_last_leaf_lut_f);
lut->broadcast_lut(streams, gpu_indexes, 0);
lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, are_all_msb_zeros,
@@ -360,9 +358,8 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
//////////////
// lsb
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks,
@@ -425,10 +422,11 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
auto signed_msb_lut = mem_ptr->signed_msb_lut;
generate_device_accumulator_bivariate<Torus>(
msb_streams[0], gpu_indexes[0], signed_msb_lut->get_lut(0, 0),
params.glwe_dimension, params.polynomial_size, params.message_modulus,
params.carry_modulus, lut_f);
signed_msb_lut->broadcast_lut(streams, gpu_indexes, 0);
msb_streams[0], gpu_indexes[0],
signed_msb_lut->get_lut(gpu_indexes[0], 0), params.glwe_dimension,
params.polynomial_size, params.message_modulus, params.carry_modulus,
lut_f);
signed_msb_lut->broadcast_lut(streams, gpu_indexes, gpu_indexes[0]);
Torus const *sign_block = msb + (num_msb_radix_blocks - 1) * big_lwe_size;
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
@@ -461,9 +459,8 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
auto lwe_array_ct_out = mem_ptr->tmp_lwe_array_out;
auto lwe_array_sign_out =
lwe_array_ct_out + (num_lsb_radix_blocks / 2) * big_lwe_size;
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks - 1,
@@ -679,10 +676,10 @@ __host__ void host_integer_radix_scalar_equality_check_kb(
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], packed_scalar,
scalar_blocks, 0, num_scalar_blocks, message_modulus);
cuda_memcpy_async_gpu_to_gpu(scalar_comparison_luts->get_lut_indexes(0, 0),
packed_scalar,
num_halved_scalar_blocks * sizeof(Torus),
lsb_streams[0], gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(
scalar_comparison_luts->get_lut_indexes(gpu_indexes[0], 0),
packed_scalar, num_halved_scalar_blocks * sizeof(Torus), lsb_streams[0],
gpu_indexes[0]);
scalar_comparison_luts->broadcast_lut(lsb_streams, gpu_indexes, 0);
integer_radix_apply_univariate_lookup_table_kb<Torus>(

View File

@@ -36,7 +36,7 @@ __host__ void scratch_cuda_integer_radix_scalar_mul_kb(
*mem_ptr =
new int_scalar_mul_buffer<T>(streams, gpu_indexes, gpu_count, params,
num_radix_blocks, allocate_gpu_memory, true);
num_radix_blocks, allocate_gpu_memory);
}
template <typename T, class params>
@@ -94,11 +94,9 @@ __host__ void host_integer_scalar_mul_radix(
}
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
if (mem->anticipated_buffers_drop) {
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
delete (mem->logical_scalar_shift_buffer);
}
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
delete (mem->logical_scalar_shift_buffer);
if (j == 0) {
// lwe array = 0

View File

@@ -136,7 +136,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 512:
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<512>>(
host_programmable_bootstrap_tbc<Torus, Degree<512>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -144,7 +144,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 1024:
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<1024>>(
host_programmable_bootstrap_tbc<Torus, Degree<1024>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -393,7 +393,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 512:
host_programmable_bootstrap_cg<Torus, AmortizedDegree<512>>(
host_programmable_bootstrap_cg<Torus, Degree<512>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -401,7 +401,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 1024:
host_programmable_bootstrap_cg<Torus, AmortizedDegree<1024>>(
host_programmable_bootstrap_cg<Torus, Degree<1024>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -468,7 +468,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 512:
host_programmable_bootstrap<Torus, AmortizedDegree<512>>(
host_programmable_bootstrap<Torus, Degree<512>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -476,7 +476,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 1024:
host_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
host_programmable_bootstrap<Torus, Degree<1024>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,

View File

@@ -237,7 +237,7 @@ TEST_P(ClassicalProgrammableBootstrapTestPrimitives_u64, bootstrap) {
(ClassicalProgrammableBootstrapTestParams){
887, 1, 2048, new_t_uniform(46), new_t_uniform(17), 22, 1, 4, 4,
100, 1, 1},
// V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64
// PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64
(ClassicalProgrammableBootstrapTestParams){
977, 1, 8192, new_gaussian_from_std_dev(3.0144389706858286e-07),
new_gaussian_from_std_dev(2.168404344971009e-19), 16, 2, 8, 8, 100,

View File

@@ -163,29 +163,6 @@ extern "C" {
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn scratch_cuda_apply_many_univariate_lut_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
input_lut: *const ffi::c_void,
lwe_dimension: u32,
glwe_dimension: u32,
polynomial_size: u32,
ks_level: u32,
ks_base_log: u32,
pbs_level: u32,
pbs_base_log: u32,
grouping_factor: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
num_many_lut: u32,
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn cuda_apply_univariate_lut_kb_64(
streams: *const *mut ffi::c_void,
@@ -1106,92 +1083,6 @@ extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
extern "C" {
pub fn scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
glwe_dimension: u32,
polynomial_size: u32,
big_lwe_dimension: u32,
small_lwe_dimension: u32,
ks_level: u32,
ks_base_log: u32,
pbs_level: u32,
pbs_base_log: u32,
grouping_factor: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn cuda_integer_are_all_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *const ffi::c_void,
mem_ptr: *mut i8,
bsks: *const *mut ffi::c_void,
ksks: *const *mut ffi::c_void,
num_radix_blocks: u32,
);
}
extern "C" {
pub fn cleanup_cuda_integer_are_all_comparisons_block_true(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
extern "C" {
pub fn scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
glwe_dimension: u32,
polynomial_size: u32,
big_lwe_dimension: u32,
small_lwe_dimension: u32,
ks_level: u32,
ks_base_log: u32,
pbs_level: u32,
pbs_base_log: u32,
grouping_factor: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *const ffi::c_void,
mem_ptr: *mut i8,
bsks: *const *mut ffi::c_void,
ksks: *const *mut ffi::c_void,
num_radix_blocks: u32,
);
}
extern "C" {
pub fn cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
extern "C" {
pub fn cuda_keyswitch_lwe_ciphertext_vector_32(
stream: *mut ffi::c_void,
@@ -1229,7 +1120,6 @@ extern "C" {
stream: *mut ffi::c_void,
gpu_index: u32,
fp_ks_buffer: *mut *mut i8,
lwe_dimension: u32,
glwe_dimension: u32,
polynomial_size: u32,
num_lwes: u32,

View File

@@ -91,8 +91,6 @@ if __name__ == "__main__":
"boolean_parameters_lattice_estimator.sage",
"shortint_classic_parameters_lattice_estimator.sage",
"shortint_multi_bit_parameters_lattice_estimator.sage",
"shortint_cpke_parameters_lattice_estimator.sage",
"shortint_list_compression_parameters_lattice_estimator.sage",
):
to_update, to_watch = check_security(params_filename)
params_to_update.extend(to_update)

View File

@@ -31,7 +31,7 @@ instance_type = "m6i.4xlarge"
[backend.hyperstack.gpu-test]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
flavor_name = "n3-L40x1"
flavor_name = "n3-RTX-A6000x1"
[backend.hyperstack.single-h100]
environment_name = "canada"
@@ -58,12 +58,6 @@ environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
flavor_name = "n3-H100x8-NVLink"
[backend.hyperstack.multi-h100-sxm5]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
flavor_name = "n3-H100-SXM5x8"
[backend.hyperstack.multi-a100-nvlink]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"

19
scripts/get_arch_feature.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
set -e
ARCH_FEATURE=x86_64
IS_AARCH64="$( (uname -a | grep -c "arm64\|aarch64") || true)"
if [[ "${IS_AARCH64}" != "0" ]]; then
ARCH_FEATURE=aarch64
fi
UNAME="$(uname)"
if [[ "${UNAME}" == "Linux" || "${UNAME}" == "Darwin" ]]; then
ARCH_FEATURE="${ARCH_FEATURE}-unix"
fi
echo "${ARCH_FEATURE}"

View File

@@ -10,9 +10,6 @@ function usage() {
echo "--multi-bit Run multi-bit tests only: default off"
echo "--unsigned-only Run only unsigned integer tests, by default both signed and unsigned tests are run"
echo "--signed-only Run only signed integer tests, by default both signed and unsigned tests are run"
echo "--nightly-tests Run integer tests configured for nightly runs (3_3 params)"
echo "--fast-tests Run integer set but skip a subset of longer tests"
echo "--long-tests Run only long run integer tests"
echo "--cargo-profile The cargo profile used to build tests"
echo "--backend Backend to use with tfhe-rs"
echo "--avx512-support Set to ON to enable avx512"
@@ -24,7 +21,6 @@ RUST_TOOLCHAIN="+stable"
multi_bit_argument=
sign_argument=
fast_tests_argument=
long_tests_argument=
nightly_tests_argument=
no_big_params_argument=
cargo_profile="release"
@@ -95,10 +91,6 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
fast_tests_argument=--fast-tests
fi
if [[ "${LONG_TESTS}" == TRUE ]]; then
long_tests_argument=--long-tests
fi
if [[ "${NIGHTLY_TESTS}" == TRUE ]]; then
nightly_tests_argument=--nightly-tests
fi
@@ -112,6 +104,7 @@ if [[ "${backend}" == "gpu" ]]; then
fi
CURR_DIR="$(dirname "$0")"
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
# TODO autodetect/have a finer CPU count depending on memory
num_cpu_threads="$("${CURR_DIR}"/cpu_count.sh)"
@@ -145,38 +138,32 @@ if [[ "${backend}" == "gpu" ]]; then
fi
fi
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${long_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
if [[ "${FAST_TESTS}" == "TRUE" ]]; then
echo "Running 'fast' test set"
elif [[ "${LONG_TESTS}" == "FALSE" ]]; then
else
echo "Running 'slow' test set"
fi
if [[ "${LONG_TESTS}" == "TRUE" ]]; then
echo "Running 'long run' test set"
fi
if [[ "${NIGHTLY_TESTS}" == "TRUE" ]]; then
echo "Running 'nightly' test set"
fi
echo "${filter_expression}"
cargo "${RUST_TOOLCHAIN}" nextest run \
--tests \
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features=integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
--features="${ARCH_FEATURE}",integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
--test-threads "${test_threads}" \
-E "$filter_expression"
if [[ -z ${multi_bit_argument} && -z ${long_tests_argument} ]]; then
if [[ -z ${multi_bit_argument} ]]; then
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
--features=integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
--features="${ARCH_FEATURE}",integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
--doc \
-- --test-threads="${doctest_threads}" integer::"${gpu_feature}"
fi

View File

@@ -65,6 +65,7 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
fi
CURR_DIR="$(dirname "$0")"
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
n_threads_small="$("${CURR_DIR}"/cpu_count.sh)"
n_threads_big="${n_threads_small}"
@@ -93,7 +94,7 @@ if [[ "${BIG_TESTS_INSTANCE}" != TRUE ]]; then
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features=shortint,internal-keycache,zk-pok,experimental \
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
--test-threads "${n_threads_small}" \
-E "${filter_expression_small_params}"
@@ -110,7 +111,7 @@ and not test(~smart_add_and_mul)"""
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features=shortint,internal-keycache,zk-pok,experimental \
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
--test-threads "${n_threads_big}" \
--no-tests=warn \
-E "${filter_expression_big_params}"
@@ -119,7 +120,7 @@ and not test(~smart_add_and_mul)"""
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
--features=shortint,internal-keycache,zk-pok,experimental \
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
--doc \
-- shortint::
fi
@@ -133,7 +134,7 @@ else
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features=shortint,internal-keycache,experimental \
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
--test-threads "${n_threads_big}" \
-E "${filter_expression}"
@@ -141,7 +142,7 @@ else
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
--features=shortint,internal-keycache,experimental \
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
--doc \
-- --test-threads="${n_threads_big}" shortint::
fi

View File

@@ -26,12 +26,6 @@ parser.add_argument(
action="store_true",
help="Run only a small subset of test suite",
)
parser.add_argument(
"--long-tests",
dest="long_tests",
action="store_true",
help="Run only the long tests suite",
)
parser.add_argument(
"--nightly-tests",
dest="nightly_tests",
@@ -86,7 +80,6 @@ EXCLUDED_INTEGER_TESTS = [
"/.*test_wopbs_bivariate_crt_wopbs_param_message_[34]_carry_[34]_ks_pbs_gaussian_2m64$/",
"/.*test_integer_smart_mul_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*test_integer_default_add_sequence_multi_thread_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*::tests_long_run::.*/",
]
# skip default_div, default_rem which are covered by default_div_rem
@@ -101,61 +94,55 @@ EXCLUDED_BIG_PARAMETERS = [
"/.*_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
]
def filter_integer_tests(input_args):
(multi_bit_filter, group_filter) = (
("_multi_bit", "_group_[0-9]") if input_args.multi_bit else ("", "")
)
backend_filter = ""
if not input_args.long_tests:
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")
if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")
if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")
if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")
if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
filter_expression.append(f"not test({pattern})")
if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
filter_expression.append(f"not test({pattern})")
else:
if input_args.backend == "gpu":
filter_expression = [f"test(/^integer::gpu::server_key::radix::tests_long_run.*/)"]
elif input_args.backend == "cpu":
filter_expression = [f"test(/^integer::server_key::radix_parallel::tests_long_run.*/)"]
if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
filter_expression.append(f"not test({pattern})")
return " and ".join(filter_expression)

View File

@@ -7,6 +7,7 @@ edition = "2021"
[dependencies]
clap = "=4.4.4"
lazy_static = "1.4"
log = "0.4"
simplelog = "0.12"
walkdir = "2.5.0"

View File

@@ -101,7 +101,7 @@ pub fn check_tfhe_docs_are_tested() -> Result<(), Error> {
.into_iter()
.filter_map(|entry| {
let path = entry.path().canonicalize().ok()?;
if path.is_file() && path.extension().is_some_and(|e| e == "md") {
if path.is_file() && path.extension().map_or(false, |e| e == "md") {
let file_content = std::fs::read_to_string(&path).ok()?;
if file_content.contains("```rust") {
Some(path.to_path_buf())

View File

@@ -1,4 +1,5 @@
use clap::{Arg, Command};
use lazy_static::lazy_static;
use log::LevelFilter;
use simplelog::{ColorChoice, CombinedLogger, Config, TermLogger, TerminalMode};
use std::sync::atomic::AtomicBool;
@@ -11,8 +12,9 @@ mod utils;
// -------------------------------------------------------------------------------------------------
// CONSTANTS
// -------------------------------------------------------------------------------------------------
static DRY_RUN: AtomicBool = AtomicBool::new(false);
lazy_static! {
static ref DRY_RUN: AtomicBool = AtomicBool::new(false);
}
// -------------------------------------------------------------------------------------------------
// MAIN

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-csprng"
version = "0.5.0"
version = "0.4.1"
edition = "2021"
license = "BSD-3-Clause-Clear"
description = "Cryptographically Secure PRNG used in the TFHE-rs library."
@@ -25,13 +25,29 @@ clap = "=4.4.4"
[features]
parallel = ["rayon"]
software-prng = []
seeder_x86_64_rdseed = []
seeder_unix = []
generator_x86_64_aesni = []
generator_fallback = []
generator_aarch64_aes = []
x86_64 = [
"parallel",
"seeder_x86_64_rdseed",
"generator_x86_64_aesni",
"generator_fallback",
]
x86_64-unix = ["x86_64", "seeder_unix"]
aarch64 = ["parallel", "generator_aarch64_aes", "generator_fallback"]
aarch64-unix = ["aarch64", "seeder_unix"]
[[bench]]
name = "benchmark"
path = "benches/benchmark.rs"
harness = false
required-features = ["seeder_x86_64_rdseed", "generator_x86_64_aesni"]
[[example]]
name = "generate"
path = "examples/generate.rs"
required-features = ["seeder_unix", "generator_fallback"]

View File

@@ -8,13 +8,13 @@ The implementation is based on the AES blockcipher used in CTR mode, as describe
Two implementations are available, an accelerated one on x86_64 CPUs with the `aes` feature and the `sse2` feature, and a pure software one that can be used on other platforms.
The crate also makes two seeders available, one needing the x86_64 instruction `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
The crate also makes two seeders available, one needing the x86_64 feature `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
## Running the benchmarks
To execute the benchmarks on an x86_64 platform:
```shell
RUSTFLAGS="-Ctarget-cpu=native" cargo bench
RUSTFLAGS="-Ctarget-cpu=native" cargo bench --features=seeder_x86_64_rdseed,generator_x86_64_aesni
```
## License

View File

@@ -1,53 +1,15 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use tfhe_csprng::generators::{
BytesPerChild, ChildrenCount, DefaultRandomGenerator, RandomGenerator,
AesniRandomGenerator, BytesPerChild, ChildrenCount, RandomGenerator,
};
#[cfg(target_os = "macos")]
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
#[cfg(all(
not(target_os = "macos"),
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
))]
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
use tfhe_csprng::seeders::Seeder;
use tfhe_csprng::seeders::{RdseedSeeder, Seeder};
// The number of bytes to generate during one benchmark iteration.
const N_GEN: usize = 1_000_000;
fn new_seeder() -> ActivatedSeeder {
#[cfg(target_os = "macos")]
{
ActivatedSeeder
}
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
{
ActivatedSeeder::new()
}
#[cfg(all(
not(target_os = "macos"),
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
))]
{
ActivatedSeeder::new(0)
}
}
fn parent_generate(c: &mut Criterion) {
let mut seeder = new_seeder();
let mut generator = DefaultRandomGenerator::new(seeder.seed());
let mut seeder = RdseedSeeder;
let mut generator = AesniRandomGenerator::new(seeder.seed());
c.bench_function("parent_generate", |b| {
b.iter(|| {
(0..N_GEN).for_each(|_| {
@@ -58,8 +20,8 @@ fn parent_generate(c: &mut Criterion) {
}
fn child_generate(c: &mut Criterion) {
let mut seeder = new_seeder();
let mut generator = DefaultRandomGenerator::new(seeder.seed());
let mut seeder = RdseedSeeder;
let mut generator = AesniRandomGenerator::new(seeder.seed());
let mut generator = generator
.try_fork(ChildrenCount(1), BytesPerChild(N_GEN * 10_000))
.unwrap()
@@ -75,8 +37,8 @@ fn child_generate(c: &mut Criterion) {
}
fn fork(c: &mut Criterion) {
let mut seeder = new_seeder();
let mut generator = DefaultRandomGenerator::new(seeder.seed());
let mut seeder = RdseedSeeder;
let mut generator = AesniRandomGenerator::new(seeder.seed());
c.bench_function("fork", |b| {
b.iter(|| {
black_box(

115
tfhe-csprng/build.rs Normal file
View File

@@ -0,0 +1,115 @@
// To have clear error messages during compilation about why some piece of code may not be available
// we decided to check the features compatibility with the target configuration in this script.
use std::collections::HashMap;
use std::env;
// See https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch for various
// compilation configuration
// Can be easily extended if needed
pub struct FeatureRequirement {
pub feature_name: &'static str,
// target_arch requirement
pub feature_req_target_arch: Option<&'static str>,
// target_family requirement
pub feature_req_target_family: Option<&'static str>,
}
// We implement a version of default that is const which is not possible through the Default trait
impl FeatureRequirement {
// As we cannot use cfg!(feature = "feature_name") with something else than a literal, we need
// a reference to the HashMap we populate with the enabled features
fn is_activated(&self, build_activated_features: &HashMap<&'static str, bool>) -> bool {
*build_activated_features.get(self.feature_name).unwrap()
}
// panics if the requirements are not met
fn check_requirements(&self) {
let target_arch = get_target_arch_cfg();
if let Some(feature_req_target_arch) = self.feature_req_target_arch {
if feature_req_target_arch != target_arch {
panic!(
"Feature `{}` requires target_arch `{}`, current cfg: `{}`",
self.feature_name, feature_req_target_arch, target_arch
)
}
}
let target_families = get_target_family_cfgs();
if let Some(feature_req_target_family) = self.feature_req_target_family {
if target_families
.split(',')
.all(|family| family != feature_req_target_family)
{
panic!(
"Feature `{}` requires target_family `{}`, current cfgs: `{}`",
self.feature_name, feature_req_target_family, target_families
)
}
}
}
}
// const vecs are not yet a thing so use a fixed size array (update the array size when adding
// requirements)
static FEATURE_REQUIREMENTS: [FeatureRequirement; 4] = [
FeatureRequirement {
feature_name: "seeder_x86_64_rdseed",
feature_req_target_arch: Some("x86_64"),
feature_req_target_family: None,
},
FeatureRequirement {
feature_name: "generator_x86_64_aesni",
feature_req_target_arch: Some("x86_64"),
feature_req_target_family: None,
},
FeatureRequirement {
feature_name: "seeder_unix",
feature_req_target_arch: None,
feature_req_target_family: Some("unix"),
},
FeatureRequirement {
feature_name: "generator_aarch64_aes",
feature_req_target_arch: Some("aarch64"),
feature_req_target_family: None,
},
];
// For a "feature_name" feature_cfg!("feature_name") expands to
// ("feature_name", cfg!(feature = "feature_name"))
macro_rules! feature_cfg {
($feat_name:literal) => {
($feat_name, cfg!(feature = $feat_name))
};
}
// Static HashMap would require an additional crate (phf or lazy static e.g.), so we just write a
// function that returns the HashMap we are interested in
fn get_feature_enabled_status() -> HashMap<&'static str, bool> {
HashMap::from([
feature_cfg!("seeder_x86_64_rdseed"),
feature_cfg!("generator_x86_64_aesni"),
feature_cfg!("seeder_unix"),
feature_cfg!("generator_aarch64_aes"),
])
}
// See https://stackoverflow.com/a/43435335/18088947 for the inspiration of this code
fn get_target_arch_cfg() -> String {
env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH is not set")
}
fn get_target_family_cfgs() -> String {
env::var("CARGO_CFG_TARGET_FAMILY").expect("CARGO_CFG_TARGET_FAMILY is not set")
}
fn main() {
let feature_enabled_status = get_feature_enabled_status();
// This will panic if some requirements for a feature are not met
FEATURE_REQUIREMENTS
.iter()
.filter(|&req| FeatureRequirement::is_activated(req, &feature_enabled_status))
.for_each(FeatureRequirement::check_requirements);
}

View File

@@ -2,29 +2,35 @@
//! the program stdout. It can also generate a fixed number of bytes by passing a value along the
//! optional argument `--bytes_total`. For testing purpose.
use clap::{value_parser, Arg, Command};
use tfhe_csprng::generators::{DefaultRandomGenerator, RandomGenerator};
#[cfg(feature = "generator_x86_64_aesni")]
use tfhe_csprng::generators::AesniRandomGenerator as ActivatedRandomGenerator;
#[cfg(feature = "generator_aarch64_aes")]
use tfhe_csprng::generators::NeonAesRandomGenerator as ActivatedRandomGenerator;
use tfhe_csprng::generators::RandomGenerator;
#[cfg(all(
not(feature = "generator_x86_64_aesni"),
not(feature = "generator_aarch64_aes"),
feature = "generator_fallback"
))]
use tfhe_csprng::generators::SoftwareRandomGenerator as ActivatedRandomGenerator;
use std::io::prelude::*;
use std::io::{stdout, StdoutLock};
#[cfg(target_os = "macos")]
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
#[cfg(all(not(target_os = "macos"), feature = "seeder_x86_64_rdseed"))]
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
use tfhe_csprng::seeders::Seeder;
#[cfg(all(
not(target_os = "macos"),
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
not(feature = "seeder_x86_64_rdseed"),
feature = "seeder_unix"
))]
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
fn write_bytes(
buffer: &mut [u8],
generator: &mut DefaultRandomGenerator,
generator: &mut ActivatedRandomGenerator,
stdout: &mut StdoutLock<'_>,
) -> std::io::Result<()> {
buffer.iter_mut().zip(generator).for_each(|(b, g)| *b = g);
@@ -33,7 +39,7 @@ fn write_bytes(
fn infinite_bytes_generation(
buffer: &mut [u8],
generator: &mut DefaultRandomGenerator,
generator: &mut ActivatedRandomGenerator,
stdout: &mut StdoutLock<'_>,
) {
while write_bytes(buffer, generator, stdout).is_ok() {}
@@ -42,7 +48,7 @@ fn infinite_bytes_generation(
fn bytes_generation(
bytes_total: usize,
buffer: &mut [u8],
generator: &mut DefaultRandomGenerator,
generator: &mut ActivatedRandomGenerator,
stdout: &mut StdoutLock<'_>,
) {
let quotient = bytes_total / buffer.len();
@@ -55,29 +61,6 @@ fn bytes_generation(
write_bytes(&mut buffer[0..remaining], generator, stdout).unwrap()
}
fn new_seeder() -> ActivatedSeeder {
#[cfg(target_os = "macos")]
{
ActivatedSeeder
}
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
{
ActivatedSeeder::new()
}
#[cfg(all(
not(target_os = "macos"),
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
))]
{
ActivatedSeeder::new(0)
}
}
pub fn main() {
let matches = Command::new(
"Generate a stream of random numbers, specify no flags for infinite generation",
@@ -91,11 +74,25 @@ pub fn main() {
)
.get_matches();
// Ugly hack to be able to use UnixSeeder
#[cfg(all(
not(target_os = "macos"),
not(feature = "seeder_x86_64_rdseed"),
feature = "seeder_unix"
))]
let new_seeder = || ActivatedSeeder::new(0);
#[cfg(not(all(
not(target_os = "macos"),
not(feature = "seeder_x86_64_rdseed"),
feature = "seeder_unix"
)))]
let new_seeder = || ActivatedSeeder;
let mut seeder = new_seeder();
let seed = seeder.seed();
// Don't print on std out
eprintln!("seed={seed:?}");
let mut generator = DefaultRandomGenerator::new(seed);
let mut generator = ActivatedRandomGenerator::new(seed);
let stdout = stdout();
let mut buffer = [0u8; 16];

View File

@@ -206,6 +206,7 @@ pub use index::*;
/// A module containing structures to manage table indices and buffer pointers together properly.
mod states;
pub use states::*;
/// A module containing an abstraction for aes block ciphers.
mod block_cipher;

View File

@@ -1,5 +1,6 @@
use crate::generators::aes_ctr::states::State;
use crate::generators::aes_ctr::{AesBlockCipher, AesCtrGenerator, ChildrenClosure, TableIndex};
use crate::generators::aes_ctr::{
AesBlockCipher, AesCtrGenerator, ChildrenClosure, State, TableIndex,
};
use crate::generators::{BytesPerChild, ChildrenCount, ForkError};
/// A type alias for the parallel children iterator type.

View File

@@ -1,9 +0,0 @@
#[cfg(all(target_arch = "x86_64", not(feature = "software-prng")))]
pub type DefaultRandomGenerator = super::AesniRandomGenerator;
#[cfg(all(target_arch = "aarch64", not(feature = "software-prng")))]
pub type DefaultRandomGenerator = super::NeonAesRandomGenerator;
#[cfg(any(
feature = "software-prng",
not(any(target_arch = "x86_64", target_arch = "aarch64"))
))]
pub type DefaultRandomGenerator = super::SoftwareRandomGenerator;

View File

@@ -25,8 +25,7 @@ impl AesBlockCipher for ArmAesBlockCipher {
if !(aes_detected && neon_detected) {
panic!(
"The ArmAesBlockCipher requires both aes and neon aarch64 CPU features.\n\
aes feature available: {}\nneon feature available: {}\n\
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
aes feature available: {}\nneon feature available: {}\n.",
aes_detected, neon_detected
)
}

View File

@@ -20,8 +20,7 @@ impl AesBlockCipher for AesniBlockCipher {
if !(aes_detected && sse2_detected) {
panic!(
"The AesniBlockCipher requires both aes and sse2 x86 CPU features.\n\
aes feature available: {}\nsse2 feature available: {}\n\
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
aes feature available: {}\nsse2 feature available: {}\n.",
aes_detected, sse2_detected
)
}

View File

@@ -1,12 +1,14 @@
#[cfg(target_arch = "x86_64")]
#[cfg(feature = "generator_x86_64_aesni")]
mod aesni;
#[cfg(target_arch = "x86_64")]
#[cfg(feature = "generator_x86_64_aesni")]
pub use aesni::*;
#[cfg(target_arch = "aarch64")]
#[cfg(feature = "generator_aarch64_aes")]
mod aarch64;
#[cfg(target_arch = "aarch64")]
#[cfg(feature = "generator_aarch64_aes")]
pub use aarch64::*;
#[cfg(feature = "generator_fallback")]
mod soft;
#[cfg(feature = "generator_fallback")]
pub use soft::*;

View File

@@ -123,10 +123,6 @@ mod aes_ctr;
mod implem;
pub use implem::*;
pub mod default;
/// Convenience alias for the most efficient CSPRNG implementation available.
pub use default::DefaultRandomGenerator;
#[cfg(test)]
#[allow(unused)] // to please clippy when tests are not activated
pub mod generator_generic_test {

View File

@@ -3,12 +3,12 @@ mod apple_secure_enclave_seeder;
#[cfg(target_os = "macos")]
pub use apple_secure_enclave_seeder::AppleSecureEnclaveSeeder;
#[cfg(target_arch = "x86_64")]
#[cfg(feature = "seeder_x86_64_rdseed")]
mod rdseed;
#[cfg(target_arch = "x86_64")]
#[cfg(feature = "seeder_x86_64_rdseed")]
pub use rdseed::RdseedSeeder;
#[cfg(target_family = "unix")]
#[cfg(feature = "seeder_unix")]
mod unix;
#[cfg(target_family = "unix")]
#[cfg(feature = "seeder_unix")]
pub use unix::UnixSeeder;

View File

@@ -4,23 +4,7 @@ use crate::seeders::{Seed, Seeder};
///
/// The `rdseed` instruction allows to deliver seeds from a hardware source of entropy see
/// <https://www.felixcloutier.com/x86/rdseed> .
pub struct RdseedSeeder(());
impl RdseedSeeder {
pub fn new() -> Self {
if Self::is_available() {
Self(())
} else {
panic!("Tried to use RdSeedSeeder but rdseed instruction is not enabled on the current machine");
}
}
}
impl Default for RdseedSeeder {
fn default() -> Self {
Self::new()
}
}
pub struct RdseedSeeder;
impl Seeder for RdseedSeeder {
fn seed(&mut self) -> Seed {
@@ -62,6 +46,6 @@ mod test {
#[test]
fn check_bounded_sequence_difference() {
check_seeder_fixed_sequences_different(|_| RdseedSeeder::new());
check_seeder_fixed_sequences_different(|_| RdseedSeeder);
}
}

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-fft"
version = "0.7.0"
version = "0.6.0"
edition = "2021"
description = "tfhe-fft is a pure Rust high performance fast Fourier transform library."
readme = "README.md"

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-ntt"
version = "0.4.0"
version = "0.3.0"
edition = "2021"
description = "tfhe-ntt is a pure Rust high performance number theoretic transform library."
readme = "README.md"

Some files were not shown because too many files have changed in this diff Show More