Compare commits

..

67 Commits

Author SHA1 Message Date
Pedro Alves
f9e94d9b6b chore(gpu): replace cudaStreamCaptureModeGlobal by cudaStreamCaptureModeThreadLocal to avoid CPU multi-thread issues 2025-01-06 18:01:21 -03:00
Pedro Alves
d66e36b529 feat(gpu): implement CUDA Graph to accelerate default classical and
multibit PBS
2025-01-06 17:29:48 -03:00
Agnes Leroy
33ca2c2fab chore(gpu): update multi-bit params, add noise test for the classical & multi-bit PBS 2025-01-06 18:17:27 +01:00
Agnes Leroy
b22e369166 chore(ci): switch gpu tests on push to l40 and deactivate fast pks 2025-01-06 16:54:40 +01:00
dependabot[bot]
90edfdbbe7 chore(deps): bump zgosalvez/github-actions-ensure-sha-pinned-actions
Bumps [zgosalvez/github-actions-ensure-sha-pinned-actions](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions) from 3.0.18 to 3.0.19.
- [Release notes](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions/releases)
- [Commits](6441882669...6ae615f647)

---
updated-dependencies:
- dependency-name: zgosalvez/github-actions-ensure-sha-pinned-actions
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-06 13:09:43 +01:00
dependabot[bot]
f998f00580 chore(deps): bump tj-actions/changed-files from 45.0.5 to 45.0.6
Bumps [tj-actions/changed-files](https://github.com/tj-actions/changed-files) from 45.0.5 to 45.0.6.
- [Release notes](https://github.com/tj-actions/changed-files/releases)
- [Changelog](https://github.com/tj-actions/changed-files/blob/main/HISTORY.md)
- [Commits](bab30c2299...d6e91a2266)

---
updated-dependencies:
- dependency-name: tj-actions/changed-files
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-06 13:09:30 +01:00
Mayeul@Zama
57a31d19a7 test(shortint): remove oprf test flakiness 2025-01-03 18:34:02 +01:00
David Testé
9c43c30e66 chore(ci): fix concurrency group format on pull request event
Since the addition of pull_request_target event, github.ref context
object return name of the base branch. So when a workflow was
triggered on the base branch during an execution in a pull request,
the latter would be cancelled.
Using github.head_ref, when available, fixes this behavior.
On any other event than pull_request or pull_request_target,
github.ref will still be used and work as before.
2025-01-03 17:18:41 +01:00
Arthur Meyre
a9d48c7e35 chore: force wasm-bindgen-rayon to 1.2.2
the new 1.3.0 version changes the way some files are bundled, I don't want
to discover during the release that nothing works properly anymore.
2025-01-02 14:50:58 +01:00
Nicolas Sarlin
863d51feaf chore(hl): remove unused traits 2025-01-02 13:52:37 +01:00
Nicolas Sarlin
ae2aeb3b6b chore(core_crypto): remove unused cfg(bench) 2025-01-02 13:52:37 +01:00
dependabot[bot]
5c44ffad27 chore(deps): bump codecov/codecov-action from 5.1.1 to 5.1.2
Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 5.1.1 to 5.1.2.
- [Release notes](https://github.com/codecov/codecov-action/releases)
- [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md)
- [Commits](7f8b4b4bde...1e68e06f1d)

---
updated-dependencies:
- dependency-name: codecov/codecov-action
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-02 13:12:35 +01:00
dependabot[bot]
e42d203fc5 chore(deps): bump zgosalvez/github-actions-ensure-sha-pinned-actions
Bumps [zgosalvez/github-actions-ensure-sha-pinned-actions](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions) from 3.0.17 to 3.0.18.
- [Release notes](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions/releases)
- [Commits](5d6ac37a4c...6441882669)

---
updated-dependencies:
- dependency-name: zgosalvez/github-actions-ensure-sha-pinned-actions
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-02 13:12:27 +01:00
dependabot[bot]
37f25c0ce5 chore(deps): bump actions/upload-artifact from 4.4.3 to 4.5.0
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.4.3 to 4.5.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](b4b15b8c7c...6f51ac03b9)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-02 13:12:20 +01:00
Guillermo Oyarzun
cd03b7eef7 feat(gpu): implement vector comparisons gpu 2024-12-23 17:06:55 +01:00
Andrei Stoian
2c8f0ce7de feat(gpu): optimize packing keyswitch in ML special case 2024-12-23 10:32:23 -03:00
Agnes Leroy
e3a93c7d87 chore(ci): add randomized long run tests on CPU and GPU 2024-12-20 17:13:58 +01:00
Agnes Leroy
9b43a9459a chore(gpu): add option to pre-release some buffers in scalar mul 2024-12-20 11:53:31 +01:00
David Testé
33d5091025 chore(deps): bump zama-ai/slab-github-runner 2024-12-20 10:56:22 +01:00
Guillermo Oyarzun
70ff0f726c feat(gpu): implement subarray search 2024-12-20 08:54:35 +01:00
Agnes Leroy
13d55f31ac chore(gpu): minor fix in core crypto comments 2024-12-19 15:54:40 +01:00
Agnes Leroy
7e871e54e1 chore(gpu): fix inconsistency in the use of AmortizedDegree 2024-12-19 15:54:40 +01:00
Agnes Leroy
012585204a chore(gpu): add inputs to erc20 throughput bench with multiple GPUs 2024-12-19 15:03:11 +01:00
Agnes Leroy
d6e45858c1 chore(gpu): rework single carry proip to avoid using local streams 2024-12-19 10:02:14 +01:00
Mayeul@Zama
ae832c158f chore(csprng): cleanup conditional imports 2024-12-19 09:59:04 +01:00
Mayeul@Zama
8504d79180 chore(core): remove unused file 2024-12-19 09:59:04 +01:00
David Testé
c306e63430 chore(ci): fix secret token naming to avoid collision 2024-12-18 19:56:36 +01:00
David Testé
9195753273 chore(ci): verify triggering actor on pull request from fork
If a contributor that open a Pull Request from a fork is not part
of the repository collaborators, then the workflow using
check_triggering_actor subworkflowwill exit with a failure.
It could be re-run later by a collaborator who has a write access.

This allows reviewers to read the code proposition before running
the CI, ensuring no secrets are leaked outside the repository.
2024-12-18 18:44:52 +01:00
David Testé
bda8ab028e chore(ci): allow external contribution in fast aws tests workflow 2024-12-18 17:17:58 +01:00
Beka Barbakadze
9e8db2179e fix(gpu): fix noise level calculation in full propagation 2024-12-18 14:26:56 +01:00
dependabot[bot]
950cece2a9 chore(deps): bump dtolnay/rust-toolchain
Bumps [dtolnay/rust-toolchain](https://github.com/dtolnay/rust-toolchain) from 315e265cd78dad1e1dcf3a5074f6d6c47029d5aa to a54c7afa936fefeb4456b2dd8068152669aa8203.
- [Release notes](https://github.com/dtolnay/rust-toolchain/releases)
- [Commits](315e265cd7...a54c7afa93)

---
updated-dependencies:
- dependency-name: dtolnay/rust-toolchain
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2024-12-18 11:39:47 +01:00
Agnes Leroy
aee53d3fae fix(gpu): fix lut size in are_all_comparison_blocks_true 2024-12-18 09:11:07 +01:00
Nicolas Sarlin
4e2db929da chore(csprng): prepare release 0.5.0 2024-12-17 09:22:08 +01:00
Nicolas Sarlin
d2c13e4593 chore(doc): fix c_api setup command using removed make target 2024-12-17 09:22:08 +01:00
Nicolas Sarlin
c41b76f892 chore(csprng)!: moved the RandomGenerator detector into tfhe-csprng 2024-12-17 09:22:08 +01:00
Nicolas Sarlin
1ede004e9a chore(tfhe)!: remove arch specific features
BREAKING_CHANGE:
-	The x86_64, x86_64-unix, aarch64, aarch64-unix have been removed, the target
architecture and os family are now automatically detected. A `software_prng`
feature has been added to force the use of a software implementation on older
CPUs
2024-12-17 09:22:08 +01:00
Nicolas Sarlin
1df331d246 chore(csprng)!: remove generator_aarch64_aes feature
BREAKING_CHANGE:
- The `generator_aarch64_aes` feature is no longer supported for tfhe-csprng
2024-12-17 09:22:08 +01:00
Nicolas Sarlin
0f2451e3b7 chore(csprng)!: remove generator_x86_64_aesni feature
BREAKING_CHANGE:
- The `generator_x86_64_aesni` feature is no longer supported for tfhe-csprng
2024-12-17 09:22:08 +01:00
Nicolas Sarlin
3de23d14a2 chore(csprng)!: remove seeder_x86_64_rdseed feature
BREAKING_CHANGE:
- The `seeder_x86_64_rdseed` feature is no longer supported for tfhe-csprng
2024-12-17 09:22:08 +01:00
Nicolas Sarlin
e0ee8af1ac chore(csprng)!: remove seeder_unix feature
BREAKING_CHANGE:
- The `seeder_unix` feature is no longer supported for tfhe-csprng
2024-12-17 09:22:08 +01:00
Agnes Leroy
072005d521 fix(gpu): fix memory leak 2024-12-17 08:58:16 +01:00
Agnes Leroy
241b73704c fix(gpu): fix ct degree and noise level after some ops 2024-12-17 08:58:03 +01:00
Agnes Leroy
8687b69769 fix(gpu): fix single gpu on device other than 0 2024-12-17 08:57:40 +01:00
Nicolas Sarlin
cdb81dd262 doc(shortint): add some clarification about shortint size 2024-12-16 16:28:54 +01:00
Nicolas Sarlin
03956a9a24 chore(zk): check that k <= d for zk crs 2024-12-16 16:00:15 +01:00
Nicolas Sarlin
ef684649f9 chore(backward): move allow(dead_code) to dispatch variants
This allows to detect unused dispatch enums
2024-12-16 16:00:15 +01:00
Nicolas Sarlin
fc642c6f26 chore(zk)!: update parameters for zk v2 2024-12-16 16:00:15 +01:00
Nicolas Sarlin
c2a999d300 feat(zk)!: plug zk v2
BREAKING CHANGE:
- The object ZkVerificationOutCome has been renamed ZkVerificationOutcome.
- Conformance of proofs now checks the scheme version of the CRS. This is
breaking at the shortint and core_crypto levels, and for manually built integer
conformance params.

New CRS will be generated with the V2 Scheme by default, but V1 CRS and proofs
are still accepted, so this is not breaking. New methods have been added to
generate a V1 CRS.
2024-12-16 16:00:15 +01:00
Nicolas Sarlin
ae3e5f1a32 fix(zk-pok): missing Versionize for ComputeLoadProofFields 2024-12-16 16:00:15 +01:00
Nicolas Sarlin
3dcb982a0b feat(versionable): "Version" macro now handles transparent attribute 2024-12-16 16:00:15 +01:00
Agnes Leroy
e9c901b3a9 chore(gpu): rework select to avoid using local streams 2024-12-16 15:26:14 +01:00
Mayeul@Zama
2d8907dfed chore: fix clippy lints after toolchain update 2024-12-16 14:17:20 +01:00
Agnes Leroy
06f8fc8962 chore(gpu): make artifact name unique across different machines 2024-12-16 14:09:30 +01:00
Nicolas Sarlin
381aeb572f chore(all): remove the dependency to lazy_static 2024-12-16 11:24:20 +01:00
Arthur Meyre
3a99ee9718 chore: remove aliases for gaussian parameters for compact PK
- we are TUniform by default so no more aliases for gaussian parameters
2024-12-16 09:50:12 +01:00
Agnes Leroy
86f07045fe chore(gpu): run pbs in parallel in difference_check 2024-12-16 09:23:41 +01:00
Mayeul@Zama
b1ce34f8a7 chore(hlapi): stabilize FheTypes 2024-12-13 18:31:30 +01:00
Agnes Leroy
4388a3dc99 chore(gpu): add sxm5 vm target 2024-12-13 17:25:55 +01:00
Arthur Meyre
805436839d fix(shortint): fix compression encoding change not being taken into account
- this maps better to what was optimized and will dramatically diminish the
pfail as we now have 2 more bits for the LUT redundancy
2024-12-13 16:41:13 +01:00
Arthur Meyre
bdbec55e84 chore: do not crash when ark-ff or wasm_bindgen macros have cfg issues 2024-12-13 16:31:25 +01:00
Arthur Meyre
33131c664a chore(ci): toolchain update 2024-12-13 16:31:25 +01:00
Arthur Meyre
1151bb267e chore: update dependencies 2024-12-13 16:31:25 +01:00
Agnes Leroy
ce9679f1ee doc(gpu): add an example to use arrays on GPU 2024-12-13 10:46:28 +01:00
Agnes Leroy
23b43c33c7 fix(gpu): fix scalar ne 2024-12-12 11:26:51 +01:00
Agnes Leroy
6feaf49906 chore(gpu): remove stream sync in broadcast lut 2024-12-12 10:19:02 +01:00
Agnes Leroy
25f4e5f279 fix(gpu): fix equal 2024-12-12 09:21:44 +01:00
David Testé
c1f05cbf85 chore(ci): use composite action to setup hyperstack instance 2024-12-12 09:18:33 +01:00
358 changed files with 14879 additions and 4152 deletions

View File

@@ -0,0 +1,53 @@
name: Setup Cuda
description: Setup Cuda on Hyperstack instance
inputs:
cuda-version:
description: Version of Cuda to use
required: true
gcc-version:
description: Version of GCC to use
required: true
cmake-version:
description: Version of cmake to use
default: 3.29.6
runs:
using: "composite"
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
shell: bash
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ inputs.cmake-version }}/cmake-${{ inputs.cmake-version }}.tar.gz
tar -zxvf cmake-${{ inputs.cmake-version }}.tar.gz
cd cmake-${{ inputs.cmake-version }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Export CUDA variables
shell: bash
run: |
CUDA_PATH=/usr/local/cuda-${{ inputs.cuda-version }}
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ inputs.cuda-version }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
shell: bash
run: |
{
echo "CC=/usr/bin/gcc-${{ inputs.gcc-version }}";
echo "CXX=/usr/bin/g++-${{ inputs.gcc-version }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ inputs.gcc-version }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
shell: bash
run: nvidia-smi

View File

@@ -26,7 +26,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -50,7 +50,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -100,7 +100,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -11,16 +11,26 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
pull_request_target:
jobs:
check-user-permission:
if: github.event_name == 'pull_request_target'
uses: ./.github/workflows/check_triggering_actor.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
should-run:
runs-on: ubuntu-latest
needs: check-user-permission
if: github.event_name != 'pull_request_target' ||
needs.check-user-permission.result == 'success'
permissions:
pull-requests: write
outputs:
@@ -55,10 +65,11 @@ jobs:
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
ref: ${{ github.event.pull_request.head.sha }}
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -133,7 +144,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -148,7 +159,7 @@ jobs:
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{ github.ref }}
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -157,9 +168,10 @@ jobs:
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
ref: ${{ github.event.pull_request.head.sha }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -270,7 +282,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -75,7 +75,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -140,7 +140,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -75,7 +75,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -144,7 +144,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -67,7 +67,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -142,7 +142,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -168,7 +168,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -250,7 +250,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -51,7 +51,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -119,7 +119,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -29,7 +29,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -62,7 +62,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -93,7 +93,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_boolean
path: ${{ env.RESULTS_FILENAME }}
@@ -127,7 +127,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -26,7 +26,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -58,7 +58,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -81,7 +81,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}
@@ -115,7 +115,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -29,7 +29,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -63,7 +63,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -97,7 +97,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_erc20
path: ${{ env.RESULTS_FILENAME }}
@@ -124,7 +124,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -54,7 +54,7 @@ jobs:
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -82,7 +82,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_integer_multi_bit_gpu_default
path: ${{ env.RESULTS_FILENAME }}
@@ -127,7 +127,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -157,7 +157,7 @@ jobs:
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -48,28 +48,19 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Get benchmark details
run: |
{
@@ -84,31 +75,10 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Run benchmarks with AVX512
run: |
make bench_pbs_gpu
@@ -128,7 +98,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}
@@ -167,7 +137,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -12,7 +12,10 @@ on:
- "l40 (n3-L40x1)"
- "single-h100 (n3-H100x1)"
- "2-h100 (n3-H100x2)"
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
jobs:
parse-inputs:

View File

@@ -54,7 +54,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -75,28 +75,19 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Get benchmark details
run: |
{
@@ -111,34 +102,10 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run benchmarks
run: |
make bench_hlapi_erc20_gpu
@@ -157,9 +124,9 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_erc20
name: ${{ github.sha }}_erc20_${{ inputs.profile }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
@@ -196,7 +163,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -15,6 +15,7 @@ on:
- "4-h100 (n3-H100x4)"
- "multi-h100 (n3-H100x8)"
- "multi-h100-nvlink (n3-H100x8-NVLink)"
- "multi-h100-sxm5 (n3-H100x8-SXM5)"
- "multi-a100-nvlink (n3-A100x8-NVLink)"
command:
description: "Benchmark command to run"

View File

@@ -118,7 +118,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -145,28 +145,19 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Get benchmark details
run: |
{
@@ -181,41 +172,10 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
} >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
} >> "${GITHUB_ENV}"
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Should run benchmarks with all precisions
if: inputs.all_precisions
run: |
@@ -240,11 +200,18 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
repository: zama-ai/slab
path: slab
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Send data to Slab
shell: bash
run: |
@@ -272,7 +239,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -90,7 +90,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -130,7 +130,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -170,7 +170,7 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -197,7 +197,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -56,7 +56,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -93,7 +93,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -136,7 +136,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
path: ${{ env.RESULTS_FILENAME }}
@@ -163,7 +163,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -90,7 +90,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -130,7 +130,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -164,7 +164,7 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -191,7 +191,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -32,7 +32,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_fft
path: ${{ env.RESULTS_FILENAME }}
@@ -126,7 +126,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -32,7 +32,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -84,7 +84,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_ntt
path: ${{ env.RESULTS_FILENAME }}
@@ -126,7 +126,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -36,7 +36,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -58,7 +58,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -91,7 +91,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -121,7 +121,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_tfhe_zk_pok
path: ${{ env.RESULTS_FILENAME }}
@@ -155,7 +155,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -40,7 +40,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -65,7 +65,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -99,7 +99,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -166,7 +166,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
path: ${{ env.RESULTS_FILENAME }}
@@ -200,7 +200,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -47,7 +47,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -104,7 +104,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -141,7 +141,7 @@ jobs:
} >> "${GITHUB_ENV}"
- name: Install rust
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: nightly
@@ -177,7 +177,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
with:
name: ${{ github.sha }}_integer_zk
path: ${{ env.RESULTS_FILENAME }}
@@ -211,7 +211,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -28,7 +28,7 @@ jobs:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable

View File

@@ -0,0 +1,29 @@
# Check if triggering actor is a collaborator and has write access
name: Check Triggering Actor
on:
workflow_call:
secrets:
TOKEN:
required: true
jobs:
check-actor-permission:
runs-on: ubuntu-latest
steps:
- name: Get User Permission
id: check-access
uses: actions-cool/check-user-permission@956b2e73cdfe3bcb819bb7225e490cb3b18fd76e # v2.2.1
with:
require: write
username: ${{ github.triggering_actor }}
env:
GITHUB_TOKEN: ${{ secrets.TOKEN }}
- name: Check User Permission
if: steps.check-access.outputs.require-result == 'false'
run: |
echo "${{ github.triggering_actor }} does not have permissions on this repo."
echo "Current permission level is ${{ steps.check-access.outputs.user-permission }}"
echo "Job originally triggered by ${{ github.actor }}"
exit 1

View File

@@ -27,7 +27,7 @@ jobs:
make lint_workflow
- name: Ensure SHA pinned actions
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@5d6ac37a4cef8b8df67f482a8e384987766f0213 # v3.0.17
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6ae615f6475d2ede5ad88bea6baa7a1d5e93ffaa # v3.0.19
with:
allowlist: |
slsa-framework/slsa-github-generator

View File

@@ -25,7 +25,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -47,13 +47,13 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
files_yaml: |
tfhe:
@@ -83,7 +83,7 @@ jobs:
make test_shortint_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}
@@ -97,7 +97,7 @@ jobs:
make test_integer_cov
- name: Upload tfhe coverage to Codecov
uses: codecov/codecov-action@7f8b4b4bde536c465e797be725718b88c5d95e0e
uses: codecov/codecov-action@1e68e06f1dbfde0e4cefc87efeba9e4643565303
if: steps.changed-files.outputs.tfhe_any_changed == 'true'
with:
token: ${{ secrets.CODECOV_TOKEN }}
@@ -121,7 +121,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -27,7 +27,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -51,7 +51,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -75,7 +75,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -40,7 +40,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,60 +94,28 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run core crypto and internal CUDA backend tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
@@ -187,7 +155,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -34,7 +34,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -66,7 +66,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -92,60 +92,28 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run core crypto and internal CUDA backend tests
run: |
make test_core_crypto_gpu
@@ -185,7 +153,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -25,7 +25,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -49,9 +49,6 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
@@ -71,38 +68,21 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run core crypto, integer and internal CUDA backend tests
run: |
make test_gpu
@@ -139,7 +119,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,60 +94,28 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run multi-bit CUDA integer compression tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
@@ -190,7 +158,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -1,4 +1,4 @@
name: AWS Long Run Tests on GPU
name: Long Run Tests on GPU
env:
CARGO_TERM_COLOR: always
@@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"
jobs:
setup-instance:
@@ -29,17 +29,17 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: 2-h100
profile: multi-gpu-test
cuda-tests:
name: Long run GPU H100 tests
name: Long run GPU tests
needs: [ setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
@@ -53,57 +53,26 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
timeout-minutes: 4320 # 72 hours
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run tests
run: |
make test_integer_long_run_gpu
@@ -119,7 +88,7 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ needs.cuda-tests.result }}
SLACK_MESSAGE: "Integer GPU H100 long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Integer GPU long run tests finished with status: ${{ needs.cuda-tests.result }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (gpu-tests)
@@ -129,7 +98,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -24,7 +24,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -63,7 +63,7 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -110,7 +110,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,58 +94,25 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run signed integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
@@ -171,7 +138,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,58 +94,25 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run signed integer multi-bit tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
@@ -171,7 +138,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -42,7 +42,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -75,7 +75,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -101,57 +101,28 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Should run nightly tests
if: github.event_name == 'schedule'
run: |
@@ -160,10 +131,6 @@ jobs:
echo "NIGHTLY_TESTS=TRUE";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run signed integer multi-bit tests
run: |
make test_signed_integer_multi_bit_gpu_ci
@@ -189,7 +156,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,58 +94,25 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run unsigned integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
@@ -171,7 +138,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -35,7 +35,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -68,7 +68,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -94,58 +94,25 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run unsigned integer multi-bit tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
@@ -171,7 +138,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -41,7 +41,7 @@ jobs:
- name: Check for file changes
id: changed-files
uses: tj-actions/changed-files@bab30c2299617f6615ec02a68b9a40d10bd21366
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f
with:
since_last_remote_commit: true
files_yaml: |
@@ -74,7 +74,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -100,54 +100,25 @@ jobs:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
CMAKE_VERSION: 3.29.6
steps:
# Mandatory on hyperstack since a bootable volume is not re-usable yet.
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y checkinstall zlib1g-dev libssl-dev libclang-dev
wget https://github.com/Kitware/CMake/releases/download/v${{ env.CMAKE_VERSION }}/cmake-${{ env.CMAKE_VERSION }}.tar.gz
tar -zxvf cmake-${{ env.CMAKE_VERSION }}.tar.gz
cd cmake-${{ env.CMAKE_VERSION }}
./bootstrap
make -j"$(nproc)"
sudo make install
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- name: Setup Hyperstack dependencies
uses: ./.github/actions/hyperstack_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
- name: Set up home
run: |
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
- name: Export CUDA variables
if: ${{ !cancelled() }}
run: |
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Should run nightly tests
if: github.event_name == 'schedule'
run: |
@@ -156,10 +127,6 @@ jobs:
echo "NIGHTLY_TESTS=TRUE";
} >> "${GITHUB_ENV}"
- name: Check device is detected
if: ${{ !cancelled() }}
run: nvidia-smi
- name: Run unsigned integer multi-bit tests
run: |
make test_unsigned_integer_multi_bit_gpu_ci
@@ -185,7 +152,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -15,8 +15,8 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
schedule:
# Weekly tests will be triggered each Friday at 1a.m.
- cron: '0 1 * * FRI'
# Weekly tests will be triggered each Friday at 9p.m.
- cron: "0 21 * * 5"
jobs:
setup-instance:
@@ -29,7 +29,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -45,6 +45,7 @@ jobs:
group: ${{ github.workflow }}_${{github.event_name}}_${{ github.ref }}
cancel-in-progress: true
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 4320 # 72 hours
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
@@ -53,7 +54,7 @@ jobs:
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -77,7 +78,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -39,7 +39,7 @@ jobs:
persist-credentials: "false"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable

View File

@@ -50,7 +50,7 @@ jobs:
- name: Prepare package
run: |
cargo package -p tfhe
- uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: crate
path: target/package/*.crate

View File

@@ -36,7 +36,7 @@ jobs:
steps:
- name: Start instance
id: start-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
@@ -71,7 +71,7 @@ jobs:
echo "HOME=/home/ubuntu" >> "${GITHUB_ENV}"
- name: Install latest stable
uses: dtolnay/rust-toolchain@315e265cd78dad1e1dcf3a5074f6d6c47029d5aa
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
with:
toolchain: stable
@@ -120,7 +120,7 @@ jobs:
steps:
- name: Stop instance
id: stop-instance
uses: zama-ai/slab-github-runner@98f0788261a7323d5d695a883e20df36591a92b7
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}

View File

@@ -19,11 +19,14 @@ exclude = [
"utils/cargo-tfhe-lints"
]
[workspace.dependencies]
aligned-vec = { version = "0.5", default-features = false }
aligned-vec = { version = "0.6", default-features = false }
bytemuck = "1.14.3"
dyn-stack = { version = "0.10", default-features = false }
dyn-stack = { version = "0.11", default-features = false }
itertools = "0.13"
num-complex = "0.4"
pulp = { version = "0.18.22", default-features = false }
pulp = { version = "0.20.0", default-features = false }
rand = "0.8"
rayon = "1"
serde = { version = "1.0", default-features = false }
wasm-bindgen = ">=0.2.86,<0.2.94"

238
Makefile
View File

@@ -2,7 +2,6 @@ SHELL:=$(shell /usr/bin/env which bash)
OS:=$(shell uname)
RS_CHECK_TOOLCHAIN:=$(shell cat toolchain.txt | tr -d '\n')
CARGO_RS_CHECK_TOOLCHAIN:=+$(RS_CHECK_TOOLCHAIN)
TARGET_ARCH_FEATURE:=$(shell ./scripts/get_arch_feature.sh)
CPU_COUNT=$(shell ./scripts/cpu_count.sh)
RS_BUILD_TOOLCHAIN:=stable
CARGO_RS_BUILD_TOOLCHAIN:=+$(RS_BUILD_TOOLCHAIN)
@@ -25,6 +24,7 @@ BACKWARD_COMPAT_DATA_BRANCH?=v0.4
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
TFHE_SPEC:=tfhe
WASM_PACK_VERSION="0.13.1"
# We are kind of hacking the cut here, the version cannot contain a quote '"'
WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
WEB_RUNNER_DIR=web-test-runner
@@ -116,8 +116,8 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
.PHONY: install_wasm_pack # Install wasm-pack to build JS packages
install_wasm_pack: install_rs_build_toolchain
@wasm-pack --version > /dev/null 2>&1 || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.0 || \
@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )
.PHONY: install_node # Install last version of NodeJS via nvm
@@ -281,14 +281,14 @@ check_typos: install_typos_checker
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
clippy_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
--features=boolean,shortint,integer,internal-keycache,gpu \
--all-targets \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
check_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu \
--features=boolean,shortint,integer,internal-keycache,gpu \
--all-targets \
-p $(TFHE_SPEC)
@@ -307,52 +307,51 @@ lint_workflow: check_actionlint_installed
.PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
clippy_core: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE) \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),experimental \
--features=experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 \
--features=nightly-avx512 \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 \
--features=experimental,nightly-avx512 \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),zk-pok \
--features=zk-pok \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_boolean # Run clippy lints enabling the boolean features
clippy_boolean: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),boolean \
--features=boolean \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_shortint # Run clippy lints enabling the shortint features
clippy_shortint: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),shortint \
--features=shortint \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),shortint,experimental \
--features=shortint,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),zk-pok,shortint \
--features=zk-pok,shortint \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_integer # Run clippy lints enabling the integer features
clippy_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),integer \
--features=integer \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),integer,experimental \
--features=integer,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
clippy: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer \
--features=boolean,shortint,integer \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
@@ -363,13 +362,13 @@ clippy_rustdoc: install_rs_check_toolchain
fi && \
CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok,pbs-stats,strings \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings \
-p $(TFHE_SPEC)
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
clippy_c_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
--features=boolean-c-api,shortint-c-api,high-level-c-api \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
@@ -394,17 +393,16 @@ clippy_trivium: install_rs_check_toolchain
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
clippy_all_targets: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
clippy_tfhe_csprng: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=$(TARGET_ARCH_FEATURE) \
-p tfhe-csprng -- --no-deps -D warnings
--features=parallel,software-prng -p tfhe-csprng -- --no-deps -D warnings
.PHONY: clippy_zk_pok # Run clippy lints on tfhe-zk-pok
clippy_zk_pok: install_rs_check_toolchain
@@ -443,67 +441,67 @@ check_rust_bindings_did_not_change:
.PHONY: tfhe_lints # Run custom tfhe-rs lints
tfhe_lints: install_tfhe_lints
cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,zk-pok -- -D warnings
--features=boolean,shortint,integer,zk-pok -- -D warnings
.PHONY: build_core # Build core_crypto without experimental features
build_core: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE) -p $(TFHE_SPEC)
-p $(TFHE_SPEC)
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),nightly-avx512 -p $(TFHE_SPEC); \
--features=nightly-avx512 -p $(TFHE_SPEC); \
fi
.PHONY: build_core_experimental # Build core_crypto with experimental features
build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),experimental -p $(TFHE_SPEC)
--features=experimental -p $(TFHE_SPEC)
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),experimental,nightly-avx512 -p $(TFHE_SPEC); \
--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
fi
.PHONY: build_boolean # Build with boolean enabled
build_boolean: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) --all-targets
--features=boolean -p $(TFHE_SPEC) --all-targets
.PHONY: build_shortint # Build with shortint enabled
build_shortint: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),shortint -p $(TFHE_SPEC) --all-targets
--features=shortint -p $(TFHE_SPEC) --all-targets
.PHONY: build_integer # Build with integer enabled
build_integer: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) --all-targets
--features=integer -p $(TFHE_SPEC) --all-targets
.PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
build_tfhe_full: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
.PHONY: build_tfhe_coverage # Build with test coverage enabled
build_tfhe_coverage: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
build_c_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok \
-p $(TFHE_SPEC)
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
build_c_api_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,gpu \
-p $(TFHE_SPEC)
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
-p $(TFHE_SPEC)
.PHONY: build_web_js_api # Build the js API targeting the web browser
@@ -534,15 +532,15 @@ build_node_js_api: install_rs_build_toolchain install_wasm_pack
.PHONY: build_tfhe_csprng # Build tfhe_csprng
build_tfhe_csprng: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng --all-targets
-p tfhe-csprng --all-targets
.PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
fi
.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
@@ -550,13 +548,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache \
--features=experimental,internal-keycache \
-p $(TFHE_SPEC) -- core_crypto::
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache,nightly-avx512 \
--features=experimental,internal-keycache,nightly-avx512 \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
fi
@@ -574,35 +572,38 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
test_core_crypto_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
test_integer_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=6
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
.PHONY: test_integer_long_run_gpu # Run the tests of the integer module including experimental on the gpu backend
test_integer_long_run_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,gpu,__long_run_tests -p $(TFHE_SPEC) -- integer::gpu::server_key::radix::tests_long_run --test-threads=6
.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)" --backend "gpu"
.PHONY: test_integer_compression
test_integer_compression: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
.PHONY: test_integer_compression_gpu
test_integer_compression_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
.PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -661,20 +662,20 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
.PHONY: test_boolean # Run the tests of the boolean module
test_boolean: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean -p $(TFHE_SPEC) -- boolean::
--features=boolean -p $(TFHE_SPEC) -- boolean::
.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
test_boolean_cov: install_rs_check_toolchain install_tarpaulin
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
$(COVERAGE_EXCLUDED_FILES) \
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache \
--features=boolean,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::
.PHONY: test_c_api_rs # Run the rust tests for the C API
test_c_api_rs: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean-c-api,shortint-c-api,high-level-c-api \
--features=boolean-c-api,shortint-c-api,high-level-c-api \
-p $(TFHE_SPEC) \
c_api
@@ -706,14 +707,14 @@ test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
.PHONY: test_shortint # Run all the tests for shortint
test_shortint: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
test_shortint_cov: install_rs_check_toolchain install_tarpaulin
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
$(COVERAGE_EXCLUDED_FILES) \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
--features=shortint,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::
.PHONY: test_integer_ci # Run the tests for integer ci
@@ -770,26 +771,28 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
.PHONY: test_integer_long_run # Run the long run tests for integer
test_integer_long_run: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,__long_run_tests -p $(TFHE_SPEC) -- integer::server_key::radix_parallel::tests_long_run
.PHONY: test_integer_long_run # Run the long run integer tests
test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"
.PHONY: test_safe_serialization # Run the tests for safe serialization
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
.PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
test_zk: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),shortint,zk-pok -p $(TFHE_SPEC) -- zk::
--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::
.PHONY: test_integer # Run all the tests for integer
test_integer: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache -p $(TFHE_SPEC) -- integer::
--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::
.PHONY: test_integer_cov # Run the tests of the integer module with code coverage
test_integer_cov: install_rs_check_toolchain install_tarpaulin
@@ -797,38 +800,38 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
--out xml --output-dir coverage/integer --line --engine llvm --timeout 500 \
--implicit-test-threads \
--exclude-files $(COVERAGE_EXCLUDED_FILES) \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
--features=integer,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::
.PHONY: test_high_level_api # Run all the tests for high_level_api
test_high_level_api: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
--features=boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
-- high_level_api::
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) \
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) \
-E "test(/high_level_api::.*gpu.*/)"
.PHONY: test_strings # Run the tests for strings ci
test_strings: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),shortint,integer,strings -p $(TFHE_SPEC) \
--features=shortint,integer,strings -p $(TFHE_SPEC) \
-- strings::
.PHONY: test_user_doc # Run tests from the .md documentation
test_user_doc: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
-p $(TFHE_SPEC) \
-- test_user_docs::
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
test_user_doc_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
-- test_user_docs::
@@ -836,14 +839,12 @@ test_user_doc_gpu: install_rs_build_toolchain
.PHONY: test_regex_engine # Run tests for regex_engine example
test_regex_engine: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--example regex_engine \
--features=$(TARGET_ARCH_FEATURE),integer
--example regex_engine --features=integer
.PHONY: test_sha256_bool # Run tests for sha256_bool example
test_sha256_bool: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--example sha256_bool \
--features=$(TARGET_ARCH_FEATURE),boolean
--example sha256_bool --features=boolean
.PHONY: test_examples # Run tests for examples
test_examples: test_sha256_bool test_regex_engine
@@ -861,7 +862,7 @@ test_kreyvium: install_rs_build_toolchain
.PHONY: test_tfhe_csprng # Run tfhe-csprng tests
test_tfhe_csprng: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE) -p tfhe-csprng
-p tfhe-csprng
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
test_zk_pok: install_rs_build_toolchain
@@ -879,7 +880,7 @@ test_zk_wasm_x86_compat_ci: check_nvm_installed
test_zk_wasm_x86_compat: install_rs_build_toolchain build_node_js_api
cd tfhe/tests/zk_wasm_x86_test && npm install
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-p tfhe --test zk_wasm_x86_test --features=$(TARGET_ARCH_FEATURE),integer,zk-pok
-p tfhe --test zk_wasm_x86_test --features=integer,zk-pok
.PHONY: test_versionable # Run tests for tfhe-versionable subcrate
test_versionable: install_rs_build_toolchain
@@ -892,7 +893,7 @@ test_versionable: install_rs_build_toolchain
test_backward_compatibility_ci: install_rs_build_toolchain
TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
--features=$(TARGET_ARCH_FEATURE),shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
--features=shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
@@ -907,7 +908,7 @@ doc: install_rs_check_toolchain
DOCS_RS=1 \
RUSTDOCFLAGS="--html-in-header katex-header.html" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
.PHONY: docs # Build rust doc alias for doc
docs: doc
@@ -918,7 +919,7 @@ lint_doc: install_rs_check_toolchain
DOCS_RS=1 \
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
lint_docs: lint_doc
@@ -947,7 +948,7 @@ check_md_links: install_mlc
.PHONY: check_compile_tests # Build tests in debug without running them
check_compile_tests: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache \
--features=experimental,boolean,shortint,integer,internal-keycache \
-p $(TFHE_SPEC)
@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
@@ -958,7 +959,7 @@ check_compile_tests: install_rs_build_toolchain
.PHONY: check_compile_tests_benches_gpu # Build tests in debug without running them
check_compile_tests_benches_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--features=$(TARGET_ARCH_FEATURE),experimental,boolean,shortint,integer,internal-keycache,gpu \
--features=experimental,boolean,shortint,integer,internal-keycache,gpu \
-p $(TFHE_SPEC)
mkdir -p "$(TFHECUDA_BUILD)" && \
cd "$(TFHECUDA_BUILD)" && \
@@ -1037,42 +1038,42 @@ dieharder_csprng: install_dieharder build_tfhe_csprng
.PHONY: print_doc_bench_parameters # Print parameters used in doc benchmarks
print_doc_bench_parameters:
RUSTFLAGS="" cargo run --example print_doc_bench_parameters \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache -p tfhe
--features=shortint,internal-keycache -p tfhe
.PHONY: bench_integer # Run benchmarks for unsigned integer
bench_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer # Run benchmarks for signed integer
bench_signed_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
bench_integer_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
bench_integer_compression: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression_gpu
bench_integer_compression_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,gpu -p $(TFHE_SPEC) --
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
bench_integer_multi_bit: install_rs_check_toolchain
@@ -1080,7 +1081,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -1088,7 +1089,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
bench_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1096,7 +1097,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1104,14 +1105,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
bench_integer_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-pke-bench \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,zk-pok,nightly-avx512 \
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
-p $(TFHE_SPEC) --
.PHONY: bench_shortint # Run benchmarks for shortint
@@ -1119,14 +1120,14 @@ bench_shortint: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench shortint-bench \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_shortint_oprf # Run benchmarks for shortint
bench_shortint_oprf: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench oprf-shortint-bench \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_shortint_multi_bit # Run benchmarks for shortint using multi-bit parameters
bench_shortint_multi_bit: install_rs_check_toolchain
@@ -1134,43 +1135,43 @@ bench_shortint_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench shortint-bench \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_boolean # Run benchmarks for boolean
bench_boolean: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench boolean-bench \
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=boolean,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_pbs # Run benchmarks for PBS
bench_pbs: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench pbs-bench \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_pbs128 # Run benchmarks for PBS using FFT 128 bits
bench_pbs128: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench pbs128-bench \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_pbs_gpu # Run benchmarks for PBS on GPU backend
bench_pbs_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_FAST_BENCH=$(FAST_BENCH) cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench pbs-bench \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_ks # Run benchmarks for keyswitch
bench_ks: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench ks-bench \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=boolean,shortint,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
.PHONY: bench_ks_gpu # Run benchmarks for PBS on GPU backend
bench_ks_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench ks-bench \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
--features=boolean,shortint,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC)
bench_web_js_api_parallel_chrome: browser_path = "$(WEB_RUNNER_DIR)/chrome/chrome-linux64/chrome"
bench_web_js_api_parallel_chrome: driver_path = "$(WEB_RUNNER_DIR)/chrome/chromedriver-linux64/chromedriver"
@@ -1206,13 +1207,13 @@ bench_web_js_api_parallel_firefox_ci: setup_venv
bench_hlapi_erc20: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_hlapi_erc20_gpu # Run benchmarks for ECR20 operations on GPU
bench_hlapi_erc20_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-erc20 \
--features=$(TARGET_ARCH_FEATURE),integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p $(TFHE_SPEC) --
.PHONY: bench_tfhe_zk_pok # Run benchmarks for the tfhe_zk_pok crate
bench_tfhe_zk_pok: install_rs_check_toolchain
@@ -1227,32 +1228,32 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
gen_key_cache: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example generates_test_keys \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
.PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
gen_key_cache_core_crypto: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
--features=$(TARGET_ARCH_FEATURE),experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
core_crypto::keycache::generate_keys
.PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
measure_hlapi_compact_pk_ct_sizes: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example hlapi_compact_pk_ct_sizes \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache
--features=integer,internal-keycache
.PHONY: measure_shortint_key_sizes # Measure sizes of bootstrapping and key switching keys for shortint
measure_shortint_key_sizes: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example shortint_key_sizes \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache
--features=shortint,internal-keycache
.PHONY: measure_boolean_key_sizes # Measure sizes of bootstrapping and key switching keys for boolean
measure_boolean_key_sizes: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example boolean_key_sizes \
--features=$(TARGET_ARCH_FEATURE),boolean,internal-keycache
--features=boolean,internal-keycache
.PHONY: parse_integer_benches # Run python parser to output a csv containing integer benches data
parse_integer_benches:
@@ -1264,14 +1265,13 @@ parse_integer_benches:
parse_wasm_benchmarks: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example wasm_benchmarks_parser \
--features=$(TARGET_ARCH_FEATURE),shortint,internal-keycache \
--features=shortint,internal-keycache \
-- wasm_benchmark_results.json
.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
write_params_to_file: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example write_params_to_file \
--features=$(TARGET_ARCH_FEATURE),boolean,shortint,internal-keycache
--example write_params_to_file --features=boolean,shortint,internal-keycache
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
clone_backward_compat_data:
@@ -1286,26 +1286,26 @@ tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
.PHONY: regex_engine # Run regex_engine example
regex_engine: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example regex_engine \
--features=$(TARGET_ARCH_FEATURE),integer \
--example regex_engine --features=integer \
-- $(REGEX_STRING) $(REGEX_PATTERN)
.PHONY: dark_market # Run dark market example
dark_market: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example dark_market \
--features=$(TARGET_ARCH_FEATURE),integer,internal-keycache \
--features=integer,internal-keycache \
-- fhe-modified fhe-parallel plain fhe
.PHONY: sha256_bool # Run sha256_bool example
sha256_bool: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example sha256_bool \
--features=$(TARGET_ARCH_FEATURE),boolean
--example sha256_bool --features=boolean
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
clippy_all tfhe_lints check_compile_tests
clippy_all check_compile_tests
# TFHE lints deactivated as it's incompatible with 1.83 - temporary
# tfhe_lints
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change

View File

@@ -70,22 +70,8 @@ production-ready library for all the advanced features of TFHE.
### Cargo.toml configuration
To use the latest version of `TFHE-rs` in your project, you first need to add it as a dependency in your `Cargo.toml`:
+ For x86_64-based machines running Unix-like OSes:
```toml
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64-unix"] }
```
+ For Apple Silicon or aarch64-based machines running Unix-like OSes:
```toml
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "aarch64-unix"] }
```
+ For x86_64-based machines with the [`rdseed instruction`](https://en.wikipedia.org/wiki/RDRAND) running Windows:
```toml
tfhe = { version = "*", features = ["boolean", "shortint", "integer", "x86_64"] }
tfhe = { version = "*", features = ["boolean", "shortint", "integer"] }
```
> [!Note]

View File

@@ -6,15 +6,8 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
rayon = { version = "1.7.0"}
[target.'cfg(target_arch = "x86_64")'.dependencies.tfhe]
path = "../../tfhe"
features = [ "boolean", "shortint", "integer", "x86_64" ]
[target.'cfg(target_arch = "aarch64")'.dependencies.tfhe]
path = "../../tfhe"
features = [ "boolean", "shortint", "integer", "aarch64-unix" ]
rayon = { workspace = true }
tfhe = { path = "../../tfhe", features = [ "boolean", "shortint", "integer" ] }
[dev-dependencies]
criterion = { version = "0.5.1", features = [ "html_reports" ]}

View File

@@ -38,6 +38,7 @@ template <typename Torus> struct int_compression {
scratch_packing_keyswitch_lwe_list_to_glwe_64(
streams[0], gpu_indexes[0], &fp_ks_buffer,
compression_params.small_lwe_dimension,
compression_params.glwe_dimension, compression_params.polynomial_size,
num_radix_blocks, true);
}

View File

@@ -46,7 +46,14 @@ void scratch_cuda_apply_univariate_lut_kb_64(
uint32_t grouping_factor, uint32_t input_lwe_ciphertext_count,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory);
void scratch_cuda_apply_many_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, bool allocate_gpu_memory);
void cuda_apply_univariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count, void *output_radix_lwe,
@@ -440,5 +447,41 @@ void cleanup_cuda_integer_abs_inplace(void *const *streams,
uint32_t gpu_count,
int8_t **mem_ptr_void);
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory);
void cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
void cleanup_cuda_integer_are_all_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory);
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks);
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
} // extern C
#endif // CUDA_INTEGER_H

View File

@@ -479,7 +479,6 @@ template <typename Torus> struct int_radix_lut {
cuda_memcpy_async_gpu_to_gpu(dst_lut_indexes, src_lut_indexes,
num_blocks * sizeof(Torus), streams[i],
gpu_indexes[i]);
cuda_synchronize_stream(streams[i], gpu_indexes[i]);
}
}
}
@@ -1540,10 +1539,12 @@ template <typename Torus> struct int_prop_simu_group_carries_memory {
cuda_memset_async(grouping_pgns, 0, num_groups * big_lwe_size_bytes,
streams[0], gpu_indexes[0]);
prepared_blocks = (Torus *)cuda_malloc_async(
num_radix_blocks * big_lwe_size_bytes, streams[0], gpu_indexes[0]);
cuda_memset_async(prepared_blocks, 0, num_radix_blocks * big_lwe_size_bytes,
streams[0], gpu_indexes[0]);
prepared_blocks =
(Torus *)cuda_malloc_async((num_radix_blocks + 1) * big_lwe_size_bytes,
streams[0], gpu_indexes[0]);
cuda_memset_async(prepared_blocks, 0,
(num_radix_blocks + 1) * big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
resolved_carries = (Torus *)cuda_malloc_async(
(num_groups + 1) * big_lwe_size_bytes, streams[0], gpu_indexes[0]);
@@ -1773,7 +1774,6 @@ template <typename Torus> struct int_sc_prop_memory {
uint32_t num_many_lut;
uint32_t lut_stride;
uint32_t group_size;
uint32_t num_groups;
Torus *output_flag;
Torus *last_lhs;
@@ -1781,8 +1781,6 @@ template <typename Torus> struct int_sc_prop_memory {
int_radix_lut<Torus> *lut_message_extract;
int_radix_lut<Torus> *lut_overflow_flag_prep;
int_radix_lut<Torus> *lut_overflow_flag_last;
int_radix_lut<Torus> *lut_carry_flag_last;
int_shifted_blocks_and_states_memory<Torus> *shifted_blocks_state_mem;
int_prop_simu_group_carries_memory<Torus> *prop_simu_group_carries_mem;
@@ -1792,8 +1790,6 @@ template <typename Torus> struct int_sc_prop_memory {
uint32_t requested_flag;
uint32_t active_gpu_count;
cudaStream_t *sub_streams_1;
cudaStream_t *sub_streams_2;
cudaEvent_t *incoming_events1;
cudaEvent_t *incoming_events2;
@@ -1818,7 +1814,6 @@ template <typename Torus> struct int_sc_prop_memory {
uint32_t block_modulus = message_modulus * carry_modulus;
uint32_t num_bits_in_block = std::log2(block_modulus);
uint32_t grouping_size = num_bits_in_block;
group_size = grouping_size;
num_groups = (num_radix_blocks + grouping_size - 1) / grouping_size;
num_many_lut = 2; // many luts apply 2 luts
@@ -1835,8 +1830,8 @@ template <typename Torus> struct int_sc_prop_memory {
// Step 3 elements
lut_message_extract =
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
num_radix_blocks, allocate_gpu_memory);
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 2,
num_radix_blocks + 1, allocate_gpu_memory);
// lut for the first block in the first grouping
auto f_message_extract = [message_modulus](Torus block) -> Torus {
return (block >> 1) % message_modulus;
@@ -1852,8 +1847,9 @@ template <typename Torus> struct int_sc_prop_memory {
// This store a single block that with be used to store the overflow or
// carry results
output_flag = (Torus *)cuda_malloc_async(big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
output_flag =
(Torus *)cuda_malloc_async(big_lwe_size_bytes * (num_radix_blocks + 1),
streams[0], gpu_indexes[0]);
cuda_memset_async(output_flag, 0, big_lwe_size_bytes, streams[0],
gpu_indexes[0]);
@@ -1912,9 +1908,6 @@ template <typename Torus> struct int_sc_prop_memory {
// It seems that this lut could be apply together with the other one but for
// now we won't do it
if (requested_flag == outputFlag::FLAG_OVERFLOW) { // Overflow case
lut_overflow_flag_last = new int_radix_lut<Torus>(
streams, gpu_indexes, gpu_count, params, 1, 1, allocate_gpu_memory);
auto f_overflow_last = [num_radix_blocks,
requested_flag_in](Torus block) -> Torus {
uint32_t position = (num_radix_blocks == 1 &&
@@ -1930,39 +1923,57 @@ template <typename Torus> struct int_sc_prop_memory {
return does_overflow_if_carry_is_0;
}
};
auto overflow_flag_last = lut_overflow_flag_last->get_lut(0, 0);
auto overflow_flag_last = lut_message_extract->get_lut(0, 1);
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], overflow_flag_last, glwe_dimension,
polynomial_size, message_modulus, carry_modulus, f_overflow_last);
lut_overflow_flag_last->broadcast_lut(streams, gpu_indexes, 0);
Torus *h_lut_indexes =
(Torus *)malloc((num_radix_blocks + 1) * sizeof(Torus));
for (int index = 0; index < num_radix_blocks + 1; index++) {
if (index < num_radix_blocks) {
h_lut_indexes[index] = 0;
} else {
h_lut_indexes[index] = 1;
}
}
cuda_memcpy_async_to_gpu(
lut_message_extract->get_lut_indexes(0, 0), h_lut_indexes,
(num_radix_blocks + 1) * sizeof(Torus), streams[0], gpu_indexes[0]);
lut_message_extract->broadcast_lut(streams, gpu_indexes, 0);
free(h_lut_indexes);
}
if (requested_flag == outputFlag::FLAG_CARRY) { // Carry case
lut_carry_flag_last = new int_radix_lut<Torus>(
streams, gpu_indexes, gpu_count, params, 1, 1, allocate_gpu_memory);
auto f_carry_last = [](Torus block) -> Torus {
return ((block >> 2) & 1);
};
auto carry_flag_last = lut_carry_flag_last->get_lut(0, 0);
auto carry_flag_last = lut_message_extract->get_lut(0, 1);
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], carry_flag_last, glwe_dimension,
polynomial_size, message_modulus, carry_modulus, f_carry_last);
lut_carry_flag_last->broadcast_lut(streams, gpu_indexes, 0);
Torus *h_lut_indexes =
(Torus *)malloc((num_radix_blocks + 1) * sizeof(Torus));
for (int index = 0; index < num_radix_blocks + 1; index++) {
if (index < num_radix_blocks) {
h_lut_indexes[index] = 0;
} else {
h_lut_indexes[index] = 1;
}
}
cuda_memcpy_async_to_gpu(
lut_message_extract->get_lut_indexes(0, 0), h_lut_indexes,
(num_radix_blocks + 1) * sizeof(Torus), streams[0], gpu_indexes[0]);
lut_message_extract->broadcast_lut(streams, gpu_indexes, 0);
free(h_lut_indexes);
}
active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
sub_streams_1 =
(cudaStream_t *)malloc(active_gpu_count * sizeof(cudaStream_t));
sub_streams_2 =
(cudaStream_t *)malloc(active_gpu_count * sizeof(cudaStream_t));
for (uint j = 0; j < active_gpu_count; j++) {
sub_streams_1[j] = cuda_create_stream(gpu_indexes[j]);
sub_streams_2[j] = cuda_create_stream(gpu_indexes[j]);
}
incoming_events1 =
(cudaEvent_t *)malloc(active_gpu_count * sizeof(cudaEvent_t));
@@ -1998,24 +2009,10 @@ template <typename Torus> struct int_sc_prop_memory {
if (requested_flag == outputFlag::FLAG_OVERFLOW) { // In case of overflow
lut_overflow_flag_prep->release(streams, gpu_indexes, gpu_count);
lut_overflow_flag_last->release(streams, gpu_indexes, gpu_count);
delete lut_overflow_flag_prep;
delete lut_overflow_flag_last;
cuda_drop_async(last_lhs, streams[0], gpu_indexes[0]);
cuda_drop_async(last_rhs, streams[0], gpu_indexes[0]);
}
if (requested_flag == outputFlag::FLAG_CARRY) { // In case of carry
lut_carry_flag_last->release(streams, gpu_indexes, gpu_count);
delete lut_carry_flag_last;
}
// release sub streams
for (uint i = 0; i < active_gpu_count; i++) {
cuda_destroy_stream(sub_streams_1[i], gpu_indexes[i]);
cuda_destroy_stream(sub_streams_2[i], gpu_indexes[i]);
}
free(sub_streams_1);
free(sub_streams_2);
// release events
for (uint j = 0; j < active_gpu_count; j++) {
@@ -2955,14 +2952,11 @@ template <typename Torus> struct int_arithmetic_scalar_shift_buffer {
template <typename Torus> struct int_cmux_buffer {
int_radix_lut<Torus> *predicate_lut;
int_radix_lut<Torus> *inverted_predicate_lut;
int_radix_lut<Torus> *message_extract_lut;
Torus *tmp_true_ct;
Torus *tmp_false_ct;
int_zero_out_if_buffer<Torus> *zero_if_true_buffer;
int_zero_out_if_buffer<Torus> *zero_if_false_buffer;
Torus *buffer_in;
Torus *buffer_out;
Torus *condition_array;
int_radix_params params;
@@ -2978,17 +2972,12 @@ template <typename Torus> struct int_cmux_buffer {
Torus big_size =
(params.big_lwe_dimension + 1) * num_radix_blocks * sizeof(Torus);
tmp_true_ct =
(Torus *)cuda_malloc_async(big_size, streams[0], gpu_indexes[0]);
tmp_false_ct =
(Torus *)cuda_malloc_async(big_size, streams[0], gpu_indexes[0]);
zero_if_true_buffer = new int_zero_out_if_buffer<Torus>(
streams, gpu_indexes, gpu_count, params, num_radix_blocks,
allocate_gpu_memory);
zero_if_false_buffer = new int_zero_out_if_buffer<Torus>(
streams, gpu_indexes, gpu_count, params, num_radix_blocks,
allocate_gpu_memory);
buffer_in =
(Torus *)cuda_malloc_async(2 * big_size, streams[0], gpu_indexes[0]);
buffer_out =
(Torus *)cuda_malloc_async(2 * big_size, streams[0], gpu_indexes[0]);
condition_array =
(Torus *)cuda_malloc_async(2 * big_size, streams[0], gpu_indexes[0]);
auto lut_f = [predicate_lut_f](Torus block, Torus condition) -> Torus {
return predicate_lut_f(condition) ? 0 : block;
@@ -3002,12 +2991,8 @@ template <typename Torus> struct int_cmux_buffer {
};
predicate_lut =
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
num_radix_blocks, allocate_gpu_memory);
inverted_predicate_lut =
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
num_radix_blocks, allocate_gpu_memory);
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 2,
2 * num_radix_blocks, allocate_gpu_memory);
message_extract_lut =
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 1,
@@ -3016,21 +3001,33 @@ template <typename Torus> struct int_cmux_buffer {
generate_device_accumulator_bivariate<Torus>(
streams[0], gpu_indexes[0], predicate_lut->get_lut(0, 0),
params.glwe_dimension, params.polynomial_size, params.message_modulus,
params.carry_modulus, lut_f);
params.carry_modulus, inverted_lut_f);
generate_device_accumulator_bivariate<Torus>(
streams[0], gpu_indexes[0], inverted_predicate_lut->get_lut(0, 0),
streams[0], gpu_indexes[0], predicate_lut->get_lut(0, 1),
params.glwe_dimension, params.polynomial_size, params.message_modulus,
params.carry_modulus, inverted_lut_f);
params.carry_modulus, lut_f);
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], message_extract_lut->get_lut(0, 0),
params.glwe_dimension, params.polynomial_size, params.message_modulus,
params.carry_modulus, message_extract_lut_f);
Torus *h_lut_indexes =
(Torus *)malloc(2 * num_radix_blocks * sizeof(Torus));
for (int index = 0; index < 2 * num_radix_blocks; index++) {
if (index < num_radix_blocks) {
h_lut_indexes[index] = 0;
} else {
h_lut_indexes[index] = 1;
}
}
cuda_memcpy_async_to_gpu(
predicate_lut->get_lut_indexes(0, 0), h_lut_indexes,
2 * num_radix_blocks * sizeof(Torus), streams[0], gpu_indexes[0]);
predicate_lut->broadcast_lut(streams, gpu_indexes, 0);
inverted_predicate_lut->broadcast_lut(streams, gpu_indexes, 0);
message_extract_lut->broadcast_lut(streams, gpu_indexes, 0);
free(h_lut_indexes);
}
}
@@ -3038,18 +3035,12 @@ template <typename Torus> struct int_cmux_buffer {
uint32_t gpu_count) {
predicate_lut->release(streams, gpu_indexes, gpu_count);
delete predicate_lut;
inverted_predicate_lut->release(streams, gpu_indexes, gpu_count);
delete inverted_predicate_lut;
message_extract_lut->release(streams, gpu_indexes, gpu_count);
delete message_extract_lut;
zero_if_true_buffer->release(streams, gpu_indexes, gpu_count);
delete zero_if_true_buffer;
zero_if_false_buffer->release(streams, gpu_indexes, gpu_count);
delete zero_if_false_buffer;
cuda_drop_async(tmp_true_ct, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_false_ct, streams[0], gpu_indexes[0]);
cuda_drop_async(buffer_in, streams[0], gpu_indexes[0]);
cuda_drop_async(buffer_out, streams[0], gpu_indexes[0]);
cuda_drop_async(condition_array, streams[0], gpu_indexes[0]);
}
};
@@ -3063,7 +3054,7 @@ template <typename Torus> struct int_are_all_block_true_buffer {
// This map store LUTs that checks the equality between some input and values
// of interest in are_all_block_true(), as with max_value (the maximum message
// value).
std::unordered_map<int, int_radix_lut<Torus> *> is_equal_to_lut_map;
int_radix_lut<Torus> *is_max_value;
int_are_all_block_true_buffer(cudaStream_t const *streams,
uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -3084,16 +3075,26 @@ template <typename Torus> struct int_are_all_block_true_buffer {
tmp_out = (Torus *)cuda_malloc_async((params.big_lwe_dimension + 1) *
num_radix_blocks * sizeof(Torus),
streams[0], gpu_indexes[0]);
is_max_value =
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params, 2,
max_chunks, allocate_gpu_memory);
auto is_max_value_f = [max_value](Torus x) -> Torus {
return x == max_value;
};
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], is_max_value->get_lut(0, 0),
params.glwe_dimension, params.polynomial_size, params.message_modulus,
params.carry_modulus, is_max_value_f);
is_max_value->broadcast_lut(streams, gpu_indexes, 0);
}
}
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count) {
for (auto &lut : is_equal_to_lut_map) {
lut.second->release(streams, gpu_indexes, gpu_count);
delete (lut.second);
}
is_equal_to_lut_map.clear();
is_max_value->release(streams, gpu_indexes, gpu_count);
delete (is_max_value);
cuda_drop_async(tmp_block_accumulated, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_out, streams[0], gpu_indexes[0]);
@@ -3270,8 +3271,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
int_radix_params params;
COMPARISON_TYPE op;
Torus *tmp_packed_left;
Torus *tmp_packed_right;
Torus *tmp_packed;
std::function<Torus(Torus)> operator_f;
@@ -3308,11 +3308,8 @@ template <typename Torus> struct int_comparison_diff_buffer {
Torus big_size = (params.big_lwe_dimension + 1) * sizeof(Torus);
tmp_packed_left = (Torus *)cuda_malloc_async(
big_size * (num_radix_blocks / 2), streams[0], gpu_indexes[0]);
tmp_packed_right = (Torus *)cuda_malloc_async(
big_size * (num_radix_blocks / 2), streams[0], gpu_indexes[0]);
tmp_packed = (Torus *)cuda_malloc_async(big_size * num_radix_blocks,
streams[0], gpu_indexes[0]);
tree_buffer = new int_tree_sign_reduction_buffer<Torus>(
streams, gpu_indexes, gpu_count, operator_f, params, num_radix_blocks,
@@ -3335,8 +3332,7 @@ template <typename Torus> struct int_comparison_diff_buffer {
reduce_signs_lut->release(streams, gpu_indexes, gpu_count);
delete reduce_signs_lut;
cuda_drop_async(tmp_packed_left, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_packed_right, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_packed, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_signs_a, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_signs_b, streams[0], gpu_indexes[0]);
}
@@ -3685,9 +3681,9 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
[shifted_mask](Torus x) -> Torus { return x & shifted_mask; };
masking_luts_1[i] = new int_radix_lut<Torus>(
streams, gpu_indexes, 1, params, 1, 1, true);
streams, gpu_indexes, gpu_count, params, 1, 1, true);
masking_luts_2[i] = new int_radix_lut<Torus>(
streams, gpu_indexes, 1, params, 1, num_blocks, true);
streams, gpu_indexes, gpu_count, params, 1, num_blocks, true);
int_radix_lut<Torus> *luts[2] = {masking_luts_1[i], masking_luts_2[i]};
@@ -3704,7 +3700,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
// both of them are equal but because they are used in two different
// executions in parallel we need two different pbs_buffers.
message_extract_lut_1 = new int_radix_lut<Torus>(
streams, gpu_indexes, 1, params, 1, num_blocks, true);
streams, gpu_indexes, gpu_count, params, 1, num_blocks, true);
message_extract_lut_2 = new int_radix_lut<Torus>(
streams, gpu_indexes, gpu_count, params, 1, num_blocks, true);
@@ -3816,16 +3812,16 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
this->params = params;
shift_mem_1 = new int_logical_scalar_shift_buffer<Torus>(
streams, gpu_indexes, 1, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
streams, gpu_indexes, gpu_count, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
params, 2 * num_blocks, true);
shift_mem_2 = new int_logical_scalar_shift_buffer<Torus>(
streams, gpu_indexes, 1, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
streams, gpu_indexes, gpu_count, SHIFT_OR_ROTATE_TYPE::LEFT_SHIFT,
params, 2 * num_blocks, true);
uint32_t compute_overflow = 1;
overflow_sub_mem = new int_borrow_prop_memory<Torus>(
streams, gpu_indexes, 1, params, num_blocks, compute_overflow,
streams, gpu_indexes, gpu_count, params, num_blocks, compute_overflow,
true);
uint32_t group_size = overflow_sub_mem->group_size;
bool use_seq = overflow_sub_mem->prop_simu_group_carries_mem
@@ -3834,7 +3830,7 @@ template <typename Torus> struct unsigned_int_div_rem_memory {
group_size, use_seq);
comparison_buffer = new int_comparison_buffer<Torus>(
streams, gpu_indexes, 1, COMPARISON_TYPE::NE, params,
streams, gpu_indexes, gpu_count, COMPARISON_TYPE::NE, params,
num_blocks, false, true);
init_lookup_tables(streams, gpu_indexes, gpu_count, num_blocks);
@@ -4275,12 +4271,15 @@ template <typename Torus> struct int_scalar_mul_buffer {
Torus *preshifted_buffer;
Torus *all_shifted_buffer;
int_sc_prop_memory<Torus> *sc_prop_mem;
bool anticipated_buffers_drop;
int_scalar_mul_buffer(cudaStream_t const *streams,
uint32_t const *gpu_indexes, uint32_t gpu_count,
int_radix_params params, uint32_t num_radix_blocks,
bool allocate_gpu_memory) {
bool allocate_gpu_memory,
bool anticipated_buffer_drop) {
this->params = params;
this->anticipated_buffers_drop = anticipated_buffer_drop;
if (allocate_gpu_memory) {
uint32_t msg_bits = (uint32_t)std::log2(params.message_modulus);
@@ -4328,6 +4327,11 @@ template <typename Torus> struct int_scalar_mul_buffer {
delete sum_ciphertexts_vec_mem;
delete sc_prop_mem;
cuda_drop_async(all_shifted_buffer, streams[0], gpu_indexes[0]);
if (!anticipated_buffers_drop) {
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
delete (logical_scalar_shift_buffer);
}
}
};

View File

@@ -21,8 +21,8 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
bool allocate_gpu_memory);
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory);
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, void *glwe_array_out,

View File

@@ -0,0 +1,358 @@
#ifndef CNCRT_FAST_KS_CUH
#define CNCRT_FAST_KS_CUH
#undef NDEBUG
#include <assert.h>
#include "device.h"
#include "gadget.cuh"
#include "helper_multi_gpu.h"
#include "keyswitch.cuh"
#include "polynomial/functions.cuh"
#include "polynomial/polynomial_math.cuh"
#include "torus.cuh"
#include "utils/helper.cuh"
#include "utils/kernel_dimensions.cuh"
#include <thread>
#include <vector>
#define CEIL_DIV(M, N) ((M) + (N)-1) / (N)
const int BLOCK_SIZE_GEMM = 64;
const int THREADS_GEMM = 8;
const int BLOCK_SIZE_DECOMP = 8;
template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
}
__host__ inline bool can_use_pks_fast_path(uint32_t lwe_dimension,
uint32_t num_lwe,
uint32_t polynomial_size,
uint32_t level_count,
uint32_t glwe_dimension) {
// TODO: activate it back, fix tests and extend to level_count > 1
return false;
}
// Initialize decomposition by performing rounding
// and decomposing one level of an array of Torus LWEs. Only
// decomposes the mask elements of the incoming LWEs.
template <typename Torus, typename TorusVec>
__global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
uint32_t lwe_dimension,
uint32_t num_lwe, uint32_t base_log,
uint32_t level_count) {
// index of this LWE ct in the buffer
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
// index of the LWE sample in the LWE ct
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
return;
// Input LWE array is [mask_0, .., mask_lwe_dim, message] and
// we only decompose the mask. Thus the stride for reading
// is lwe_dimension + 1, while for writing it is lwe_dimension
auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
Torus a_i = lwe_in[read_val_idx];
Torus state = init_decomposer_state(a_i, base_log, level_count);
Torus mod_b_mask = (1ll << base_log) - 1ll;
lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
}
// Continue decomposiion of an array of Torus elements in place. Supposes
// that the array contains already decomposed elements and
// computes the new decomposed level in place.
template <typename Torus, typename TorusVec>
__global__ void
decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
uint32_t num_lwe, uint32_t base_log,
uint32_t level_count) {
// index of this LWE ct in the buffer
auto lwe_idx = blockIdx.x * blockDim.x + threadIdx.x;
// index of the LWE sample in the LWE ct
auto lwe_sample_idx = blockIdx.y * blockDim.y + threadIdx.y;
if (lwe_idx >= num_lwe || lwe_sample_idx >= lwe_dimension)
return;
auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
Torus state = buffer_in[val_idx];
Torus mod_b_mask = (1ll << base_log) - 1ll;
buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
}
// Multiply matrices A, B of size (M, K), (K, N) respectively
// with K as the inner dimension.
//
// A block of threads processeds blocks of size (BLOCK_SIZE_GEMM,
// BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
// THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
// BLOCK_SIZE_GEMM)-shaped tiles of values from B.
template <typename Torus, typename TorusVec>
__global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
int stride_B, Torus *C) {
const int BM = BLOCK_SIZE_GEMM;
const int BN = BLOCK_SIZE_GEMM;
const int BK = THREADS_GEMM;
const int TM = THREADS_GEMM;
const uint cRow = blockIdx.y;
const uint cCol = blockIdx.x;
const uint totalResultsBlocktile = BM * BN;
const int threadCol = threadIdx.x % BN;
const int threadRow = threadIdx.x / BN;
// Allocate space for the current block tile in shared memory
__shared__ Torus As[BM * BK];
__shared__ Torus Bs[BK * BN];
// Initialize the pointers to the input blocks from A, B
// Tiles from these blocks are loaded to shared memory
A += cRow * BM * K;
B += cCol * BN;
// Each thread will handle multiple sub-blocks
const uint innerColA = threadIdx.x % BK;
const uint innerRowA = threadIdx.x / BK;
const uint innerColB = threadIdx.x % BN;
const uint innerRowB = threadIdx.x / BN;
// allocate thread-local cache for results in registerfile
Torus threadResults[TM] = {0};
auto row_A = cRow * BM + innerRowA;
auto col_B = cCol * BN + innerColB;
// For each thread, loop over block tiles
for (uint bkIdx = 0; bkIdx < K; bkIdx += BK) {
auto col_A = bkIdx + innerColA;
auto row_B = bkIdx + innerRowB;
if (row_A < M && col_A < K) {
As[innerRowA * BK + innerColA] = A[innerRowA * K + innerColA];
} else {
As[innerRowA * BK + innerColA] = 0;
}
if (col_B < N && row_B < K) {
Bs[innerRowB * BN + innerColB] = B[innerRowB * stride_B + innerColB];
} else {
Bs[innerRowB * BN + innerColB] = 0;
}
__syncthreads();
// Advance blocktile for the next iteration of this loop
A += BK;
B += BK * stride_B;
// calculate per-thread results
for (uint dotIdx = 0; dotIdx < BK; ++dotIdx) {
// we make the dotproduct loop the outside loop, which facilitates
// reuse of the Bs entry, which we can cache in a tmp var.
Torus tmp = Bs[dotIdx * BN + threadCol];
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
threadResults[resIdx] +=
As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
}
}
__syncthreads();
}
// Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
// BLOCK_SIZE_GEMM)
C += cRow * BM * N + cCol * BN;
// write out the results
for (uint resIdx = 0; resIdx < TM; ++resIdx) {
int outRow = cRow * BM + threadRow * TM + resIdx;
int outCol = cCol * BN + threadCol;
if (outRow >= M)
continue;
if (outCol >= N)
continue;
C[(threadRow * TM + resIdx) * N + threadCol] += threadResults[resIdx];
}
}
// Finish the keyswitching operation and prepare GLWEs for accumulation.
// 1. Finish the keyswitching computation partially performed with a GEMM:
// - negate the dot product between the GLWE and KSK polynomial
// - add the GLWE message for the N-th polynomial coeff in the message poly
// 2. Rotate each of the GLWE . KSK poly dot products to
// prepare them for accumulation into a single GLWE
template <typename Torus>
__global__ void polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C(
Torus *in_glwe_buffer, Torus *out_glwe_buffer, Torus const *lwe_array,
uint32_t lwe_dimension, uint32_t num_glwes, uint32_t polynomial_size,
uint32_t glwe_dimension) {
uint32_t glwe_id = blockIdx.x * blockDim.x + threadIdx.x;
uint32_t degree = glwe_id; // lwe 0 rotate 0, lwe 1 rotate 1, .. , lwe
// poly_size-1 rotate poly_size-1
uint32_t coeffIdx = blockIdx.y * blockDim.y + threadIdx.y;
if (glwe_id >= num_glwes)
return;
if (coeffIdx >= polynomial_size)
return;
auto in_poly =
in_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
auto out_result =
out_glwe_buffer + glwe_id * polynomial_size * (glwe_dimension + 1);
if (coeffIdx == 0) {
// Add the message value of the input LWE (`C`) to the N-th coefficient
// in the GLWE . KSK dot product
// The C is added to the first position of the last polynomial in the GLWE
// which has (glwe_dimension+1) polynomials
// The C value is extracted as the last value of the LWE ct. (of index
// glwe_id) the LWEs have (polynomial_size + 1) values
in_poly[polynomial_size * glwe_dimension] =
lwe_array[glwe_id * (lwe_dimension + 1) + lwe_dimension] -
in_poly[polynomial_size * glwe_dimension];
for (int gi = 1; gi < glwe_dimension; ++gi)
in_poly[coeffIdx + gi * polynomial_size] =
-in_poly[coeffIdx + gi * polynomial_size];
} else {
// Otherwise simply negate the input coefficient
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
in_poly[coeffIdx + gi * polynomial_size] =
-in_poly[coeffIdx + gi * polynomial_size];
}
// Negate all the coefficients for rotation for the first poly
in_poly[coeffIdx] = -in_poly[coeffIdx];
// rotate the body
polynomial_accumulate_monic_monomial_mul<Torus>(
out_result, in_poly, degree, coeffIdx, polynomial_size, 1, true);
// rotate the mask too
for (int gi = 1; gi < glwe_dimension + 1; ++gi)
polynomial_accumulate_monic_monomial_mul<Torus>(
out_result + gi * polynomial_size, in_poly + gi * polynomial_size,
degree, coeffIdx, polynomial_size, 1, true);
}
template <typename Torus, typename TorusVec>
__host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t base_log, uint32_t level_count, uint32_t num_lwes) {
// Optimization of packing keyswitch when packing many LWEs
if (level_count > 1) {
PANIC("Fast path PKS only supports level_count==1");
}
cudaSetDevice(gpu_index);
check_cuda_error(cudaGetLastError());
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
// The fast path of PKS uses the scratch buffer (d_mem) differently than the
// old path: it needs to store the decomposed masks in the first half of this
// buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
// scratch buffer for the fast path must determine the half-size of the
// scratch buffer as the max between the size of the GLWE and the size of the
// LWE-mask
int memory_unit = glwe_accumulator_size > lwe_dimension
? glwe_accumulator_size
: lwe_dimension;
// ping pong the buffer between successive calls
// split the buffer in two parts of this size
auto d_mem_0 = (Torus *)fp_ks_buffer;
auto d_mem_1 = d_mem_0 + num_lwes * memory_unit;
// Set the scratch buffer to 0 as it is used to accumulate
// decomposition temporary results
cuda_memset_async(d_mem_1, 0, num_lwes * memory_unit * sizeof(Torus), stream,
gpu_index);
check_cuda_error(cudaGetLastError());
// decompose LWEs
// don't decompose LWE body - the LWE has lwe_size + 1 elements. The last
// element, the body is ignored by rounding down the number of blocks assuming
// here that the LWE dimension is a multiple of the block size
dim3 grid_decomp(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
CEIL_DIV(lwe_dimension, BLOCK_SIZE_DECOMP));
dim3 threads_decomp(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
// decompose first level
decompose_vectorize_init<Torus, TorusVec>
<<<grid_decomp, threads_decomp, 0, stream>>>(lwe_array_in, d_mem_0,
lwe_dimension, num_lwes,
base_log, level_count);
check_cuda_error(cudaGetLastError());
// gemm to ks the individual LWEs to GLWEs
dim3 grid_gemm(CEIL_DIV(glwe_accumulator_size, BLOCK_SIZE_GEMM),
CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);
auto stride_KSK_buffer = glwe_accumulator_size;
uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0, fp_ksk_array,
stride_KSK_buffer, d_mem_1);
check_cuda_error(cudaGetLastError());
/*
TODO: transpose key to generalize to level_count > 1
for (int li = 1; li < level_count; ++li) {
decompose_vectorize_step_inplace<Torus, TorusVec>
<<<grid_decomp, threads_decomp, 0, stream>>>(
d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
check_cuda_error(cudaGetLastError());
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size,
stream>>>( num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
check_cuda_error(cudaGetLastError());
}
*/
// should we include the mask in the rotation ??
dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),
CEIL_DIV(polynomial_size, BLOCK_SIZE_DECOMP));
dim3 threads_rotate(BLOCK_SIZE_DECOMP, BLOCK_SIZE_DECOMP);
// rotate the GLWEs
polynomial_accumulate_monic_monomial_mul_many_neg_and_add_C<Torus>
<<<grid_rotate, threads_rotate, 0, stream>>>(
d_mem_1, d_mem_0, lwe_array_in, lwe_dimension, num_lwes,
polynomial_size, glwe_dimension);
check_cuda_error(cudaGetLastError());
dim3 grid_accumulate(
CEIL_DIV(polynomial_size * (glwe_dimension + 1), BLOCK_SIZE_DECOMP));
dim3 threads_accum(BLOCK_SIZE_DECOMP);
// accumulate to a single glwe
accumulate_glwes<Torus><<<grid_accumulate, threads_accum, 0, stream>>>(
glwe_out, d_mem_0, glwe_dimension, polynomial_size, num_lwes);
check_cuda_error(cudaGetLastError());
}
#endif

View File

@@ -1,6 +1,8 @@
#include "fast_packing_keyswitch.cuh"
#include "keyswitch.cuh"
#include "keyswitch.h"
#include <cstdint>
#include <stdio.h>
/* Perform keyswitch on a batch of 32 bits input LWE ciphertexts.
* Head out to the equivalent operation on 64 bits for more details.
@@ -53,15 +55,17 @@ void cuda_keyswitch_lwe_ciphertext_vector_64(
void scratch_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
bool allocate_gpu_memory) {
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory) {
scratch_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer,
static_cast<cudaStream_t>(stream), gpu_index, fp_ks_buffer, lwe_dimension,
glwe_dimension, polynomial_size, num_lwes, allocate_gpu_memory);
}
/* Perform functional packing keyswitch on a batch of 64 bits input LWE
* ciphertexts.
*/
void cuda_packing_keyswitch_lwe_list_to_glwe_64(
void *stream, uint32_t gpu_index, void *glwe_array_out,
void const *lwe_array_in, void const *fp_ksk_array, int8_t *fp_ks_buffer,
@@ -69,13 +73,24 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_lwes) {
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
if (can_use_pks_fast_path(input_lwe_dimension, num_lwes,
output_polynomial_size, level_count,
output_glwe_dimension)) {
host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
} else
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
}
void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,

View File

@@ -158,16 +158,20 @@ void execute_keyswitch_async(cudaStream_t const *streams,
template <typename Torus>
__host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
cudaStream_t stream, uint32_t gpu_index, int8_t **fp_ks_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t num_lwes,
bool allocate_gpu_memory) {
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t num_lwes, bool allocate_gpu_memory) {
cudaSetDevice(gpu_index);
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
if (allocate_gpu_memory)
int memory_unit = glwe_accumulator_size > lwe_dimension
? glwe_accumulator_size
: lwe_dimension;
if (allocate_gpu_memory) {
*fp_ks_buffer = (int8_t *)cuda_malloc_async(
2 * num_lwes * glwe_accumulator_size * sizeof(Torus), stream,
gpu_index);
2 * num_lwes * memory_unit * sizeof(Torus), stream, gpu_index);
}
}
// public functional packing keyswitch for a single LWE ciphertext
@@ -241,6 +245,7 @@ __global__ void packing_keyswitch_lwe_list_to_glwe(
auto lwe_in = lwe_array_in + input_id * lwe_size;
auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
// KS LWE to GLWE
packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
@@ -293,8 +298,18 @@ __host__ void host_packing_keyswitch_lwe_list_to_glwe(
dim3 grid(num_blocks, num_lwes);
dim3 threads(num_threads);
// The fast path of PKS uses the scratch buffer (d_mem) differently:
// it needs to store the decomposed masks in the first half of this buffer
// and the keyswitched GLWEs in the second half of the buffer. Thus the
// scratch buffer for the fast path must determine the half-size of the
// scratch buffer as the max between the size of the GLWE and the size of the
// LWE-mask
int memory_unit = glwe_accumulator_size > lwe_dimension_in
? glwe_accumulator_size
: lwe_dimension_in;
auto d_mem = (Torus *)fp_ks_buffer;
auto d_tmp_glwe_array_out = d_mem + num_lwes * glwe_accumulator_size;
auto d_tmp_glwe_array_out = d_mem + num_lwes * memory_unit;
// individually keyswitch each lwe
packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(

View File

@@ -37,39 +37,32 @@ __host__ void host_integer_radix_cmux_kb(
uint32_t num_radix_blocks) {
auto params = mem_ptr->params;
// Since our CPU threads will be working on different streams we shall assert
// the work in the main stream is completed
auto true_streams = mem_ptr->zero_if_true_buffer->true_streams;
auto false_streams = mem_ptr->zero_if_false_buffer->false_streams;
for (uint j = 0; j < gpu_count; j++) {
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
}
auto mem_true = mem_ptr->zero_if_true_buffer;
zero_out_if<Torus>(true_streams, gpu_indexes, gpu_count, mem_ptr->tmp_true_ct,
lwe_array_true, lwe_condition, mem_true,
mem_ptr->inverted_predicate_lut, bsks, ksks,
num_radix_blocks);
auto mem_false = mem_ptr->zero_if_false_buffer;
zero_out_if<Torus>(false_streams, gpu_indexes, gpu_count,
mem_ptr->tmp_false_ct, lwe_array_false, lwe_condition,
mem_false, mem_ptr->predicate_lut, bsks, ksks,
num_radix_blocks);
for (uint j = 0; j < mem_ptr->zero_if_true_buffer->active_gpu_count; j++) {
cuda_synchronize_stream(true_streams[j], gpu_indexes[j]);
}
for (uint j = 0; j < mem_ptr->zero_if_false_buffer->active_gpu_count; j++) {
cuda_synchronize_stream(false_streams[j], gpu_indexes[j]);
Torus lwe_size = params.big_lwe_dimension + 1;
Torus radix_lwe_size = lwe_size * num_radix_blocks;
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in, lwe_array_true,
radix_lwe_size * sizeof(Torus), streams[0],
gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(mem_ptr->buffer_in + radix_lwe_size,
lwe_array_false, radix_lwe_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
for (uint i = 0; i < 2 * num_radix_blocks; i++) {
cuda_memcpy_async_gpu_to_gpu(mem_ptr->condition_array + i * lwe_size,
lwe_condition, lwe_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
}
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem_ptr->buffer_out, mem_ptr->buffer_in,
mem_ptr->condition_array, bsks, ksks, 2 * num_radix_blocks,
mem_ptr->predicate_lut, params.message_modulus);
// If the condition was true, true_ct will have kept its value and false_ct
// will be 0 If the condition was false, true_ct will be 0 and false_ct will
// have kept its value
auto added_cts = mem_ptr->tmp_true_ct;
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts,
mem_ptr->tmp_true_ct, mem_ptr->tmp_false_ct,
params.big_lwe_dimension, num_radix_blocks);
auto mem_true = mem_ptr->buffer_out;
auto mem_false = &mem_ptr->buffer_out[radix_lwe_size];
auto added_cts = mem_true;
host_addition<Torus>(streams[0], gpu_indexes[0], added_cts, mem_true,
mem_false, params.big_lwe_dimension, num_radix_blocks);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, added_cts, bsks, ksks,

View File

@@ -58,6 +58,9 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
case GE:
case LT:
case LE:
if (num_radix_blocks % 2 != 0)
PANIC("Cuda error (comparisons): the number of radix blocks has to be "
"even.")
host_integer_radix_difference_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
@@ -68,6 +71,8 @@ void cuda_comparison_integer_radix_ciphertext_kb_64(
break;
case MAX:
case MIN:
if (num_radix_blocks % 2 != 0)
PANIC("Cuda error (max/min): the number of radix blocks has to be even.")
host_integer_radix_maxmin_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
@@ -89,3 +94,91 @@ void cleanup_cuda_integer_comparison(void *const *streams,
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}
void scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus);
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
false, allocate_gpu_memory);
}
void cuda_integer_are_all_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
int_comparison_buffer<uint64_t> *buffer =
(int_comparison_buffer<uint64_t> *)mem_ptr;
host_integer_are_all_comparisons_block_true_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
(uint64_t **)(ksks), num_radix_blocks);
}
void cleanup_cuda_integer_are_all_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void) {
int_comparison_buffer<uint64_t> *mem_ptr =
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}
void scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
big_lwe_dimension, small_lwe_dimension, ks_level,
ks_base_log, pbs_level, pbs_base_log, grouping_factor,
message_modulus, carry_modulus);
scratch_cuda_integer_radix_comparison_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_comparison_buffer<uint64_t> **)mem_ptr, num_radix_blocks, params, EQ,
false, allocate_gpu_memory);
}
void cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *lwe_array_in, int8_t *mem_ptr,
void *const *bsks, void *const *ksks, uint32_t num_radix_blocks) {
int_comparison_buffer<uint64_t> *buffer =
(int_comparison_buffer<uint64_t> *)mem_ptr;
host_integer_is_at_least_one_comparisons_block_true_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_in), buffer, bsks,
(uint64_t **)(ksks), num_radix_blocks);
}
void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void) {
int_comparison_buffer<uint64_t> *mem_ptr =
(int_comparison_buffer<uint64_t> *)(*mem_ptr_void);
mem_ptr->release((cudaStream_t *)(streams), gpu_indexes, gpu_count);
}

View File

@@ -58,7 +58,7 @@ __host__ void accumulate_all_blocks(cudaStream_t stream, uint32_t gpu_index,
template <typename Torus>
__host__ void are_all_comparisons_block_true(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
@@ -85,16 +85,19 @@ __host__ void are_all_comparisons_block_true(
while (remaining_blocks > 0) {
// Split in max_value chunks
uint32_t chunk_length = std::min(max_value, remaining_blocks);
int num_chunks = remaining_blocks / chunk_length;
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
// as in the worst case we will be adding `max_value` ones
auto input_blocks = tmp_out;
auto accumulator = are_all_block_true_buffer->tmp_block_accumulated;
auto is_equal_to_num_blocks_map =
&are_all_block_true_buffer->is_equal_to_lut_map;
auto is_max_value_lut = are_all_block_true_buffer->is_max_value;
uint32_t chunk_lengths[num_chunks];
auto begin_remaining_blocks = remaining_blocks;
for (int i = 0; i < num_chunks; i++) {
uint32_t chunk_length =
std::min(max_value, begin_remaining_blocks - i * max_value);
chunk_lengths[i] = chunk_length;
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
input_blocks, big_lwe_dimension,
chunk_length);
@@ -107,53 +110,50 @@ __host__ void are_all_comparisons_block_true(
// Selects a LUT
int_radix_lut<Torus> *lut;
auto broadcast_lut_should_be_called = false;
if (are_all_block_true_buffer->op == COMPARISON_TYPE::NE) {
// is_non_zero_lut_buffer LUT
lut = mem_ptr->eq_buffer->is_non_zero_lut;
} else {
if ((*is_equal_to_num_blocks_map).find(chunk_length) !=
(*is_equal_to_num_blocks_map).end()) {
// The LUT is already computed
lut = (*is_equal_to_num_blocks_map)[chunk_length];
} else {
if (chunk_lengths[num_chunks - 1] != max_value) {
// LUT needs to be computed
auto new_lut =
new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
max_value, num_radix_blocks, true);
uint32_t chunk_length = chunk_lengths[num_chunks - 1];
auto is_equal_to_num_blocks_lut_f = [chunk_length](Torus x) -> Torus {
return x == chunk_length;
};
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], new_lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus,
streams[0], gpu_indexes[0], is_max_value_lut->get_lut(0, 1),
glwe_dimension, polynomial_size, message_modulus, carry_modulus,
is_equal_to_num_blocks_lut_f);
// new_lut->broadcast_lut(streams, gpu_indexes, 0);
broadcast_lut_should_be_called = true;
(*is_equal_to_num_blocks_map)[chunk_length] = new_lut;
lut = new_lut;
Torus *h_lut_indexes = (Torus *)malloc(num_chunks * sizeof(Torus));
for (int index = 0; index < num_chunks; index++) {
if (index == num_chunks - 1) {
h_lut_indexes[index] = 1;
} else {
h_lut_indexes[index] = 0;
}
}
cuda_memcpy_async_to_gpu(is_max_value_lut->get_lut_indexes(0, 0),
h_lut_indexes, num_chunks * sizeof(Torus),
streams[0], gpu_indexes[0]);
is_max_value_lut->broadcast_lut(streams, gpu_indexes, 0);
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
free(h_lut_indexes);
}
lut = is_max_value_lut;
}
// Applies the LUT
if (remaining_blocks == 1) {
// In the last iteration we copy the output to the final address
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, 1, lwe_array_out, accumulator, bsks, ksks, 1,
lut);
streams, gpu_indexes, gpu_count, lwe_array_out, accumulator, bsks,
ksks, 1, lut);
return;
} else {
if (broadcast_lut_should_be_called)
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, 1, tmp_out, accumulator, bsks, ksks,
num_chunks, lut);
else
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
num_chunks, lut);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, tmp_out, accumulator, bsks, ksks,
num_chunks, lut);
}
}
}
@@ -167,7 +167,7 @@ __host__ void are_all_comparisons_block_true(
template <typename Torus>
__host__ void is_at_least_one_comparisons_block_true(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus *lwe_array_in,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
@@ -189,14 +189,18 @@ __host__ void is_at_least_one_comparisons_block_true(
uint32_t remaining_blocks = num_radix_blocks;
while (remaining_blocks > 0) {
// Split in max_value chunks
uint32_t chunk_length = std::min(max_value, remaining_blocks);
int num_chunks = remaining_blocks / chunk_length;
int num_chunks = (remaining_blocks + max_value - 1) / max_value;
// Since all blocks encrypt either 0 or 1, we can sum max_value of them
// as in the worst case we will be adding `max_value` ones
auto input_blocks = mem_ptr->tmp_lwe_array_out;
auto accumulator = buffer->tmp_block_accumulated;
uint32_t chunk_lengths[num_chunks];
auto begin_remaining_blocks = remaining_blocks;
for (int i = 0; i < num_chunks; i++) {
uint32_t chunk_length =
std::min(max_value, begin_remaining_blocks - i * max_value);
chunk_lengths[i] = chunk_length;
accumulate_all_blocks<Torus>(streams[0], gpu_indexes[0], accumulator,
input_blocks, big_lwe_dimension,
chunk_length);
@@ -458,10 +462,12 @@ __host__ void tree_sign_reduction(
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], last_lut->get_lut(0, 0), glwe_dimension,
polynomial_size, message_modulus, carry_modulus, f);
last_lut->broadcast_lut(streams, gpu_indexes, 0);
// Last leaf
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, 1, lwe_array_out, y, bsks, ksks, 1, last_lut);
streams, gpu_indexes, gpu_count, lwe_array_out, y, bsks, ksks, 1,
last_lut);
}
template <typename Torus>
@@ -486,8 +492,9 @@ __host__ void host_integer_radix_difference_check_kb(
if (carry_modulus >= message_modulus) {
// Packing is possible
// Pack inputs
Torus *packed_left = diff_buffer->tmp_packed_left;
Torus *packed_right = diff_buffer->tmp_packed_right;
Torus *packed_left = diff_buffer->tmp_packed;
Torus *packed_right =
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
// In case the ciphertext is signed, the sign block and the one before it
// are handled separately
if (mem_ptr->is_signed) {
@@ -506,10 +513,7 @@ __host__ void host_integer_radix_difference_check_kb(
auto identity_lut = mem_ptr->identity_lut;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, packed_left, packed_left, bsks, ksks,
packed_num_radix_blocks, identity_lut);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, packed_right, packed_right, bsks, ksks,
packed_num_radix_blocks, identity_lut);
2 * packed_num_radix_blocks, identity_lut);
lhs = packed_left;
rhs = packed_right;
@@ -538,11 +542,13 @@ __host__ void host_integer_radix_difference_check_kb(
// Compare the last block before the sign block separately
auto identity_lut = mem_ptr->identity_lut;
Torus *packed_left = diff_buffer->tmp_packed;
Torus *packed_right =
diff_buffer->tmp_packed + num_radix_blocks / 2 * big_lwe_size;
Torus *last_left_block_before_sign_block =
diff_buffer->tmp_packed_left + packed_num_radix_blocks * big_lwe_size;
packed_left + packed_num_radix_blocks * big_lwe_size;
Torus *last_right_block_before_sign_block =
diff_buffer->tmp_packed_right +
packed_num_radix_blocks * big_lwe_size;
packed_right + packed_num_radix_blocks * big_lwe_size;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, last_left_block_before_sign_block,
lwe_array_left + (num_radix_blocks - 2) * big_lwe_size, bsks, ksks, 1,
@@ -620,4 +626,35 @@ __host__ void host_integer_radix_maxmin_kb(
mem_ptr->cmux_buffer, bsks, ksks, total_num_radix_blocks);
}
template <typename Torus>
__host__ void host_integer_are_all_comparisons_block_true_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
auto eq_buffer = mem_ptr->eq_buffer;
// It returns a block encrypting 1 if all input blocks are 1
// otherwise the block encrypts 0
are_all_comparisons_block_true<Torus>(streams, gpu_indexes, gpu_count,
lwe_array_out, lwe_array_in, mem_ptr,
bsks, ksks, num_radix_blocks);
}
template <typename Torus>
__host__ void host_integer_is_at_least_one_comparisons_block_true_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, Torus *lwe_array_out, Torus const *lwe_array_in,
int_comparison_buffer<Torus> *mem_ptr, void *const *bsks,
Torus *const *ksks, uint32_t num_radix_blocks) {
auto eq_buffer = mem_ptr->eq_buffer;
// It returns a block encrypting 1 if all input blocks are 1
// otherwise the block encrypts 0
is_at_least_one_comparisons_block_true<Torus>(
streams, gpu_indexes, gpu_count, lwe_array_out, lwe_array_in, mem_ptr,
bsks, ksks, num_radix_blocks);
}
#endif

View File

@@ -2,6 +2,7 @@
#define CUDA_INTEGER_COMPRESSION_CUH
#include "ciphertext.h"
#include "crypto/fast_packing_keyswitch.cuh"
#include "crypto/keyswitch.cuh"
#include "device.h"
#include "integer/compression/compression.h"
@@ -116,11 +117,21 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
while (rem_lwes > 0) {
auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
if (can_use_pks_fast_path(
input_lwe_dimension, chunk_size, compression_params.polynomial_size,
compression_params.ks_level, compression_params.glwe_dimension)) {
host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
} else {
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
}
rem_lwes -= chunk_size;
lwe_subset += chunk_size * lwe_in_size;

View File

@@ -286,7 +286,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
uint32_t shifted_mask = full_message_mask >> shift_amount;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, 1, interesting_divisor.last_block(),
streams, gpu_indexes, gpu_count, interesting_divisor.last_block(),
interesting_divisor.last_block(), bsks, ksks, 1,
mem_ptr->masking_luts_1[shifted_mask]);
}; // trim_last_interesting_divisor_bits
@@ -315,7 +315,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
shifted_mask = shifted_mask & full_message_mask;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, 1, divisor_ms_blocks.first_block(),
streams, gpu_indexes, gpu_count, divisor_ms_blocks.first_block(),
divisor_ms_blocks.first_block(), bsks, ksks, 1,
mem_ptr->masking_luts_2[shifted_mask]);
}; // trim_first_divisor_ms_bits
@@ -340,7 +340,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
streams[0], gpu_indexes[0]);
host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
streams, gpu_indexes, 1, interesting_remainder1.data, 1,
streams, gpu_indexes, gpu_count, interesting_remainder1.data, 1,
mem_ptr->shift_mem_1, bsks, ksks, interesting_remainder1.len);
tmp_radix.clone_from(interesting_remainder1, 0,
@@ -370,13 +370,13 @@ __host__ void host_unsigned_integer_div_rem_kb(
uint32_t const *gpu_indexes,
uint32_t gpu_count) {
host_integer_radix_logical_scalar_shift_kb_inplace<Torus>(
streams, gpu_indexes, 1, interesting_remainder2.data, 1,
streams, gpu_indexes, gpu_count, interesting_remainder2.data, 1,
mem_ptr->shift_mem_2, bsks, ksks, interesting_remainder2.len);
}; // left_shift_interesting_remainder2
//for (uint j = 0; j < gpu_count; j++) {
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
//}
for (uint j = 0; j < gpu_count; j++) {
cuda_synchronize_stream(streams[j], gpu_indexes[j]);
}
// interesting_divisor
trim_last_interesting_divisor_bits(mem_ptr->sub_streams_1, gpu_indexes,
gpu_count);
@@ -389,12 +389,12 @@ __host__ void host_unsigned_integer_div_rem_kb(
// interesting_remainder2
left_shift_interesting_remainder2(mem_ptr->sub_streams_4, gpu_indexes,
gpu_count);
// for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
cuda_synchronize_stream(mem_ptr->sub_streams_1[0], gpu_indexes[0]);
cuda_synchronize_stream(mem_ptr->sub_streams_2[0], gpu_indexes[0]);
cuda_synchronize_stream(mem_ptr->sub_streams_3[0], gpu_indexes[0]);
cuda_synchronize_stream(mem_ptr->sub_streams_4[0], gpu_indexes[0]);
// }
for (uint j = 0; j < mem_ptr->active_gpu_count; j++) {
cuda_synchronize_stream(mem_ptr->sub_streams_1[j], gpu_indexes[j]);
cuda_synchronize_stream(mem_ptr->sub_streams_2[j], gpu_indexes[j]);
cuda_synchronize_stream(mem_ptr->sub_streams_3[j], gpu_indexes[j]);
cuda_synchronize_stream(mem_ptr->sub_streams_4[j], gpu_indexes[j]);
}
// if interesting_remainder1 != 0 -> interesting_remainder2 == 0
// if interesting_remainder1 == 0 -> interesting_remainder2 != 0
@@ -438,7 +438,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
streams, gpu_indexes, first_indexes, second_indexes, scalar_indexes,
merged_interesting_remainder.len);
host_integer_overflowing_sub<uint64_t>(
streams, gpu_indexes, 1, new_remainder.data,
streams, gpu_indexes, gpu_count, new_remainder.data,
(uint64_t *)merged_interesting_remainder.data,
interesting_divisor.data, subtraction_overflowed.data,
(const Torus *)nullptr, mem_ptr->overflow_sub_mem, bsks, ksks,
@@ -460,7 +460,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
// But we are in the special case where scalar == 0
// So we can skip some stuff
host_compare_with_zero_equality<Torus>(
streams, gpu_indexes, 1, tmp_1.data, trivial_blocks.data,
streams, gpu_indexes, gpu_count, tmp_1.data, trivial_blocks.data,
mem_ptr->comparison_buffer, bsks, ksks, trivial_blocks.len,
mem_ptr->comparison_buffer->eq_buffer->is_non_zero_lut);
@@ -468,7 +468,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
ceil_div(trivial_blocks.len, message_modulus * carry_modulus - 1);
is_at_least_one_comparisons_block_true<Torus>(
streams, gpu_indexes, 1,
streams, gpu_indexes, gpu_count,
at_least_one_upper_block_is_non_zero.data, tmp_1.data,
mem_ptr->comparison_buffer, bsks, ksks, tmp_1.len);
}
@@ -482,7 +482,7 @@ __host__ void host_unsigned_integer_div_rem_kb(
[&](cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count) {
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, 1,
streams, gpu_indexes, gpu_count,
cleaned_merged_interesting_remainder.data,
cleaned_merged_interesting_remainder.data, bsks, ksks,
cleaned_merged_interesting_remainder.len,

View File

@@ -198,6 +198,27 @@ void scratch_cuda_apply_univariate_lut_kb_64(
allocate_gpu_memory);
}
void scratch_cuda_apply_many_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t num_many_lut, bool allocate_gpu_memory) {
int_radix_params params(pbs_type, glwe_dimension, polynomial_size,
glwe_dimension * polynomial_size, lwe_dimension,
ks_level, ks_base_log, pbs_level, pbs_base_log,
grouping_factor, message_modulus, carry_modulus);
scratch_cuda_apply_many_univariate_lut_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
num_many_lut, allocate_gpu_memory);
}
void cuda_apply_univariate_lut_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count, void *output_radix_lwe,
@@ -237,7 +258,7 @@ void cuda_apply_many_univariate_lut_kb_64(
void scratch_cuda_apply_bivariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void *input_lut, uint32_t lwe_dimension,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_radix_blocks,
@@ -251,8 +272,9 @@ void scratch_cuda_apply_bivariate_lut_kb_64(
scratch_cuda_apply_bivariate_lut_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
(int_radix_lut<uint64_t> **)mem_ptr, static_cast<uint64_t *>(input_lut),
num_radix_blocks, params, allocate_gpu_memory);
(int_radix_lut<uint64_t> **)mem_ptr,
static_cast<const uint64_t *>(input_lut), num_radix_blocks, params,
allocate_gpu_memory);
}
void cuda_apply_bivariate_lut_kb_64(

View File

@@ -1557,6 +1557,25 @@ void host_apply_univariate_lut_kb(cudaStream_t const *streams,
num_blocks, mem);
}
template <typename Torus>
void scratch_cuda_apply_many_univariate_lut_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, int_radix_lut<Torus> **mem_ptr, Torus const *input_lut,
uint32_t num_radix_blocks, int_radix_params params, uint32_t num_many_lut,
bool allocate_gpu_memory) {
*mem_ptr = new int_radix_lut<Torus>(streams, gpu_indexes, gpu_count, params,
1, num_radix_blocks, num_many_lut,
allocate_gpu_memory);
// It is safe to do this copy on GPU 0, because all LUTs always reside on GPU
// 0
cuda_memcpy_async_to_gpu((*mem_ptr)->get_lut(0, 0), (void *)input_lut,
(params.glwe_dimension + 1) *
params.polynomial_size * sizeof(Torus),
streams[0], gpu_indexes[0]);
(*mem_ptr)->broadcast_lut(streams, gpu_indexes, 0);
}
template <typename Torus>
void host_apply_many_univariate_lut_kb(
cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -1624,13 +1643,12 @@ void host_propagate_single_carry(cudaStream_t const *streams,
auto params = mem->params;
auto glwe_dimension = params.glwe_dimension;
auto polynomial_size = params.polynomial_size;
auto message_modulus = params.message_modulus;
auto carry_modulus = params.carry_modulus;
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
auto lut_stride = mem->lut_stride;
auto num_many_lut = mem->num_many_lut;
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
if (requested_flag == outputFlag::FLAG_OVERFLOW)
PANIC("Cuda error: single carry propagation is not supported for overflow, "
"try using add_and_propagate_single_carry");
@@ -1647,7 +1665,7 @@ void host_propagate_single_carry(cudaStream_t const *streams,
if (requested_flag == outputFlag::FLAG_CARRY) {
cuda_memcpy_async_gpu_to_gpu(
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
}
// Step 2
@@ -1667,45 +1685,40 @@ void host_propagate_single_carry(cudaStream_t const *streams,
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
requested_flag == outputFlag::FLAG_CARRY) {
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
mem->output_flag,
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
mem->prop_simu_group_carries_mem->simulators +
(num_radix_blocks - 1) * big_lwe_size,
big_lwe_dimension, 1);
}
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
// Step 3
// Add carries and cleanup OutputFlag::None
host_radix_sum_in_groups<Torus>(
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
big_lwe_size, group_size);
auto message_extract = mem->lut_message_extract;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_1, gpu_indexes, gpu_count, lwe_array, prepared_blocks,
bsks, ksks, num_radix_blocks, message_extract);
if (requested_flag == outputFlag::FLAG_CARRY) {
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
mem->output_flag, mem->output_flag,
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
mem->prop_simu_group_carries_mem->resolved_carries +
(mem->num_groups - 1) * big_lwe_size,
big_lwe_dimension, 1);
cuda_memcpy_async_gpu_to_gpu(
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
big_lwe_size_bytes, mem->sub_streams_2[0],
gpu_indexes[0]);
}
for (int j = 0; j < mem->active_gpu_count; j++) {
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
cuda_memcpy_async_gpu_to_gpu(lwe_array, mem->output_flag,
big_lwe_size_bytes * num_radix_blocks,
streams[0], gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
} else {
auto message_extract = mem->lut_message_extract;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lwe_array, prepared_blocks, bsks, ksks,
num_radix_blocks, message_extract);
}
}
@@ -1721,13 +1734,12 @@ void host_add_and_propagate_single_carry(
auto params = mem->params;
auto glwe_dimension = params.glwe_dimension;
auto polynomial_size = params.polynomial_size;
auto message_modulus = params.message_modulus;
auto carry_modulus = params.carry_modulus;
uint32_t big_lwe_size = glwe_dimension * polynomial_size + 1;
auto big_lwe_size_bytes = big_lwe_size * sizeof(Torus);
auto big_lwe_dimension = big_lwe_size - 1; // For host addition
auto lut_stride = mem->lut_stride;
auto num_many_lut = mem->num_many_lut;
auto output_flag = mem->output_flag + big_lwe_size * num_radix_blocks;
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
cuda_memcpy_async_gpu_to_gpu(
@@ -1754,12 +1766,12 @@ void host_add_and_propagate_single_carry(
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
auto lut_overflow_prep = mem->lut_overflow_flag_prep;
integer_radix_apply_bivariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem->output_flag, mem->last_lhs,
streams, gpu_indexes, gpu_count, output_flag, mem->last_lhs,
mem->last_rhs, bsks, ksks, 1, lut_overflow_prep,
lut_overflow_prep->params.message_modulus);
} else if (requested_flag == outputFlag::FLAG_CARRY) {
cuda_memcpy_async_gpu_to_gpu(
mem->output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
output_flag, block_states + (num_radix_blocks - 1) * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
}
@@ -1780,58 +1792,50 @@ void host_add_and_propagate_single_carry(
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
requested_flag == outputFlag::FLAG_CARRY) {
host_addition<Torus>(streams[0], gpu_indexes[0], mem->output_flag,
mem->output_flag,
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
mem->prop_simu_group_carries_mem->simulators +
(num_radix_blocks - 1) * big_lwe_size,
big_lwe_dimension, 1);
}
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
// Step 3
// Add carries and cleanup OutputFlag::None
host_radix_sum_in_groups<Torus>(
mem->sub_streams_1[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
streams[0], gpu_indexes[0], prepared_blocks, prepared_blocks,
mem->prop_simu_group_carries_mem->resolved_carries, num_radix_blocks,
big_lwe_size, group_size);
auto message_extract = mem->lut_message_extract;
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_1, gpu_indexes, gpu_count, lhs_array, prepared_blocks,
bsks, ksks, num_radix_blocks, message_extract);
if (requested_flag == outputFlag::FLAG_OVERFLOW ||
requested_flag == outputFlag::FLAG_CARRY) {
if (num_radix_blocks == 1 && requested_flag == outputFlag::FLAG_OVERFLOW &&
uses_carry == 1) {
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
mem->output_flag, mem->output_flag, input_carries,
big_lwe_dimension, 1);
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
input_carries, big_lwe_dimension, 1);
} else {
host_addition<Torus>(mem->sub_streams_2[0], gpu_indexes[0],
mem->output_flag, mem->output_flag,
host_addition<Torus>(streams[0], gpu_indexes[0], output_flag, output_flag,
mem->prop_simu_group_carries_mem->resolved_carries +
(mem->num_groups - 1) * big_lwe_size,
big_lwe_dimension, 1);
}
if (requested_flag == outputFlag::FLAG_OVERFLOW) {
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
mem->output_flag, bsks, ksks, 1, mem->lut_overflow_flag_last);
} else {
integer_radix_apply_univariate_lookup_table_kb<Torus>(
mem->sub_streams_2, gpu_indexes, gpu_count, mem->output_flag,
mem->output_flag, bsks, ksks, 1, mem->lut_carry_flag_last);
}
cuda_memcpy_async_gpu_to_gpu(carry_out, mem->output_flag,
big_lwe_size_bytes, mem->sub_streams_2[0],
gpu_indexes[0]);
}
for (int j = 0; j < mem->active_gpu_count; j++) {
cuda_synchronize_stream(mem->sub_streams_1[j], gpu_indexes[j]);
cuda_synchronize_stream(mem->sub_streams_2[j], gpu_indexes[j]);
cuda_memcpy_async_gpu_to_gpu(
prepared_blocks + num_radix_blocks * big_lwe_size, output_flag,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, mem->output_flag, prepared_blocks,
bsks, ksks, num_radix_blocks + 1, mem->lut_message_extract);
cuda_memcpy_async_gpu_to_gpu(lhs_array, mem->output_flag,
big_lwe_size_bytes * num_radix_blocks,
streams[0], gpu_indexes[0]);
cuda_memcpy_async_gpu_to_gpu(
carry_out, mem->output_flag + num_radix_blocks * big_lwe_size,
big_lwe_size_bytes, streams[0], gpu_indexes[0]);
} else {
integer_radix_apply_univariate_lookup_table_kb<Torus>(
streams, gpu_indexes, gpu_count, lhs_array, prepared_blocks, bsks, ksks,
num_radix_blocks, mem->lut_message_extract);
}
}

View File

@@ -22,6 +22,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
case GE:
case LT:
case LE:
if (lwe_ciphertext_count % 2 != 0)
PANIC("Cuda error (scalar comparisons): the number of radix blocks has "
"to be even.")
host_integer_radix_scalar_difference_check_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),
@@ -32,6 +35,9 @@ void cuda_scalar_comparison_integer_radix_ciphertext_kb_64(
break;
case MAX:
case MIN:
if (lwe_ciphertext_count % 2 != 0)
PANIC("Cuda error (scalar max/min): the number of radix blocks has to be "
"even.")
host_integer_radix_scalar_maxmin_kb<uint64_t>(
(cudaStream_t *)(streams), gpu_indexes, gpu_count,
static_cast<uint64_t *>(lwe_array_out),

View File

@@ -141,8 +141,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
//////////////
// lsb
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks,
@@ -210,8 +211,9 @@ __host__ void integer_radix_unsigned_scalar_difference_check_kb(
uint32_t num_lsb_radix_blocks = total_num_radix_blocks;
uint32_t num_scalar_blocks = total_num_scalar_blocks;
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
pack_blocks<Torus>(streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks,
@@ -358,8 +360,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
//////////////
// lsb
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks,
@@ -458,8 +461,9 @@ __host__ void integer_radix_signed_scalar_difference_check_kb(
auto lwe_array_ct_out = mem_ptr->tmp_lwe_array_out;
auto lwe_array_sign_out =
lwe_array_ct_out + (num_lsb_radix_blocks / 2) * big_lwe_size;
Torus *lhs = diff_buffer->tmp_packed_left;
Torus *rhs = diff_buffer->tmp_packed_right;
Torus *lhs = diff_buffer->tmp_packed;
Torus *rhs =
diff_buffer->tmp_packed + total_num_radix_blocks / 2 * big_lwe_size;
pack_blocks<Torus>(lsb_streams[0], gpu_indexes[0], lhs, lwe_array_in,
big_lwe_dimension, num_lsb_radix_blocks - 1,

View File

@@ -36,7 +36,7 @@ __host__ void scratch_cuda_integer_radix_scalar_mul_kb(
*mem_ptr =
new int_scalar_mul_buffer<T>(streams, gpu_indexes, gpu_count, params,
num_radix_blocks, allocate_gpu_memory);
num_radix_blocks, allocate_gpu_memory, true);
}
template <typename T, class params>
@@ -94,9 +94,11 @@ __host__ void host_integer_scalar_mul_radix(
}
cuda_synchronize_stream(streams[0], gpu_indexes[0]);
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
delete (mem->logical_scalar_shift_buffer);
if (mem->anticipated_buffers_drop) {
cuda_drop_async(preshifted_buffer, streams[0], gpu_indexes[0]);
mem->logical_scalar_shift_buffer->release(streams, gpu_indexes, gpu_count);
delete (mem->logical_scalar_shift_buffer);
}
if (j == 0) {
// lwe array = 0

View File

@@ -136,7 +136,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 512:
host_programmable_bootstrap_tbc<Torus, Degree<512>>(
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<512>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -144,7 +144,7 @@ void cuda_programmable_bootstrap_tbc_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 1024:
host_programmable_bootstrap_tbc<Torus, Degree<1024>>(
host_programmable_bootstrap_tbc<Torus, AmortizedDegree<1024>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -393,7 +393,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 512:
host_programmable_bootstrap_cg<Torus, Degree<512>>(
host_programmable_bootstrap_cg<Torus, AmortizedDegree<512>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -401,7 +401,7 @@ void cuda_programmable_bootstrap_cg_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 1024:
host_programmable_bootstrap_cg<Torus, Degree<1024>>(
host_programmable_bootstrap_cg<Torus, AmortizedDegree<1024>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -468,7 +468,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 512:
host_programmable_bootstrap<Torus, Degree<512>>(
host_programmable_bootstrap<Torus, AmortizedDegree<512>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,
@@ -476,7 +476,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector(
num_many_lut, lut_stride);
break;
case 1024:
host_programmable_bootstrap<Torus, Degree<1024>>(
host_programmable_bootstrap<Torus, AmortizedDegree<1024>>(
static_cast<cudaStream_t>(stream), gpu_index, lwe_array_out,
lwe_output_indexes, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, buffer, glwe_dimension,

View File

@@ -480,20 +480,30 @@ __host__ void host_programmable_bootstrap(
double2 *global_join_buffer = pbs_buffer->global_join_buffer;
int8_t *d_mem = pbs_buffer->d_mem;
bool graphCreated = false;
cudaGraph_t graph;
cudaGraphExec_t instance;
for (int i = 0; i < lwe_dimension; i++) {
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_one, full_sm_step_one, full_dm_step_one);
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, lut_vector,
lut_vector_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
num_many_lut, lut_stride);
if (!graphCreated) {
cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_one, full_sm_step_one, full_dm_step_one);
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, lut_vector,
lut_vector_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
num_many_lut, lut_stride);
cudaStreamEndCapture(stream, &graph);
cudaGraphInstantiate(&instance, graph, NULL, NULL, 0);
graphCreated = true;
}
cudaGraphLaunch(instance, stream);
}
}

View File

@@ -649,29 +649,41 @@ __host__ void host_multi_bit_programmable_bootstrap(
auto lwe_chunk_size = buffer->lwe_chunk_size;
bool graphCreated = false;
cudaGraph_t graph;
cudaGraphExec_t instance;
for (uint32_t lwe_offset = 0; lwe_offset < (lwe_dimension / grouping_factor);
lwe_offset += lwe_chunk_size) {
// Compute a keybundle
execute_compute_keybundle<Torus, params>(
stream, gpu_index, lwe_array_in, lwe_input_indexes, bootstrapping_key,
buffer, num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, lwe_offset);
// Accumulate
uint32_t chunk_size = std::min(
lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset);
for (uint32_t j = 0; j < chunk_size; j++) {
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, buffer, num_samples, lwe_dimension, glwe_dimension,
polynomial_size, base_log, level_count, j, lwe_offset);
if (!graphCreated) {
cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
// Compute a keybundle
execute_compute_keybundle<Torus, params>(
stream, gpu_index, lwe_array_in, lwe_input_indexes, bootstrapping_key,
buffer, num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, lwe_offset);
// Accumulate
uint32_t chunk_size = std::min(
lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset);
for (uint32_t j = 0; j < chunk_size; j++) {
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, buffer, num_samples, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, j,
lwe_offset);
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, j, lwe_offset, num_many_lut,
lut_stride);
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, j, lwe_offset, num_many_lut,
lut_stride);
}
cudaStreamEndCapture(stream, &graph);
cudaGraphInstantiate(&instance, graph, NULL, NULL, 0);
graphCreated = true;
}
cudaGraphLaunch(instance, stream);
}
}
#endif // MULTIBIT_PBS_H

View File

@@ -163,6 +163,29 @@ extern "C" {
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn scratch_cuda_apply_many_univariate_lut_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
input_lut: *const ffi::c_void,
lwe_dimension: u32,
glwe_dimension: u32,
polynomial_size: u32,
ks_level: u32,
ks_base_log: u32,
pbs_level: u32,
pbs_base_log: u32,
grouping_factor: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
num_many_lut: u32,
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn cuda_apply_univariate_lut_kb_64(
streams: *const *mut ffi::c_void,
@@ -1083,6 +1106,92 @@ extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
extern "C" {
pub fn scratch_cuda_integer_are_all_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
glwe_dimension: u32,
polynomial_size: u32,
big_lwe_dimension: u32,
small_lwe_dimension: u32,
ks_level: u32,
ks_base_log: u32,
pbs_level: u32,
pbs_base_log: u32,
grouping_factor: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn cuda_integer_are_all_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *const ffi::c_void,
mem_ptr: *mut i8,
bsks: *const *mut ffi::c_void,
ksks: *const *mut ffi::c_void,
num_radix_blocks: u32,
);
}
extern "C" {
pub fn cleanup_cuda_integer_are_all_comparisons_block_true(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
extern "C" {
pub fn scratch_cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr: *mut *mut i8,
glwe_dimension: u32,
polynomial_size: u32,
big_lwe_dimension: u32,
small_lwe_dimension: u32,
ks_level: u32,
ks_base_log: u32,
pbs_level: u32,
pbs_base_log: u32,
grouping_factor: u32,
num_radix_blocks: u32,
message_modulus: u32,
carry_modulus: u32,
pbs_type: PBS_TYPE,
allocate_gpu_memory: bool,
);
}
extern "C" {
pub fn cuda_integer_is_at_least_one_comparisons_block_true_kb_64(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *const ffi::c_void,
mem_ptr: *mut i8,
bsks: *const *mut ffi::c_void,
ksks: *const *mut ffi::c_void,
num_radix_blocks: u32,
);
}
extern "C" {
pub fn cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
streams: *const *mut ffi::c_void,
gpu_indexes: *const u32,
gpu_count: u32,
mem_ptr_void: *mut *mut i8,
);
}
extern "C" {
pub fn cuda_keyswitch_lwe_ciphertext_vector_32(
stream: *mut ffi::c_void,
@@ -1120,6 +1229,7 @@ extern "C" {
stream: *mut ffi::c_void,
gpu_index: u32,
fp_ks_buffer: *mut *mut i8,
lwe_dimension: u32,
glwe_dimension: u32,
polynomial_size: u32,
num_lwes: u32,

View File

@@ -31,7 +31,7 @@ instance_type = "m6i.4xlarge"
[backend.hyperstack.gpu-test]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
flavor_name = "n3-RTX-A6000x1"
flavor_name = "n3-L40x1"
[backend.hyperstack.single-h100]
environment_name = "canada"
@@ -58,6 +58,12 @@ environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
flavor_name = "n3-H100x8-NVLink"
[backend.hyperstack.multi-h100-sxm5]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"
flavor_name = "n3-H100-SXM5x8"
[backend.hyperstack.multi-a100-nvlink]
environment_name = "canada"
image_name = "Ubuntu Server 22.04 LTS R535 CUDA 12.2"

View File

@@ -1,19 +0,0 @@
#!/usr/bin/env bash
set -e
ARCH_FEATURE=x86_64
IS_AARCH64="$( (uname -a | grep -c "arm64\|aarch64") || true)"
if [[ "${IS_AARCH64}" != "0" ]]; then
ARCH_FEATURE=aarch64
fi
UNAME="$(uname)"
if [[ "${UNAME}" == "Linux" || "${UNAME}" == "Darwin" ]]; then
ARCH_FEATURE="${ARCH_FEATURE}-unix"
fi
echo "${ARCH_FEATURE}"

View File

@@ -10,6 +10,9 @@ function usage() {
echo "--multi-bit Run multi-bit tests only: default off"
echo "--unsigned-only Run only unsigned integer tests, by default both signed and unsigned tests are run"
echo "--signed-only Run only signed integer tests, by default both signed and unsigned tests are run"
echo "--nightly-tests Run integer tests configured for nightly runs (3_3 params)"
echo "--fast-tests Run integer set but skip a subset of longer tests"
echo "--long-tests Run only long run integer tests"
echo "--cargo-profile The cargo profile used to build tests"
echo "--backend Backend to use with tfhe-rs"
echo "--avx512-support Set to ON to enable avx512"
@@ -21,6 +24,7 @@ RUST_TOOLCHAIN="+stable"
multi_bit_argument=
sign_argument=
fast_tests_argument=
long_tests_argument=
nightly_tests_argument=
no_big_params_argument=
cargo_profile="release"
@@ -91,6 +95,10 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
fast_tests_argument=--fast-tests
fi
if [[ "${LONG_TESTS}" == TRUE ]]; then
long_tests_argument=--long-tests
fi
if [[ "${NIGHTLY_TESTS}" == TRUE ]]; then
nightly_tests_argument=--nightly-tests
fi
@@ -104,7 +112,6 @@ if [[ "${backend}" == "gpu" ]]; then
fi
CURR_DIR="$(dirname "$0")"
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
# TODO autodetect/have a finer CPU count depending on memory
num_cpu_threads="$("${CURR_DIR}"/cpu_count.sh)"
@@ -138,32 +145,38 @@ if [[ "${backend}" == "gpu" ]]; then
fi
fi
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
filter_expression=$(/usr/bin/python3 scripts/test_filtering.py --layer integer --backend "${backend}" ${fast_tests_argument} ${long_tests_argument} ${nightly_tests_argument} ${multi_bit_argument} ${sign_argument} ${no_big_params_argument})
if [[ "${FAST_TESTS}" == "TRUE" ]]; then
echo "Running 'fast' test set"
else
elif [[ "${LONG_TESTS}" == "FALSE" ]]; then
echo "Running 'slow' test set"
fi
if [[ "${LONG_TESTS}" == "TRUE" ]]; then
echo "Running 'long run' test set"
fi
if [[ "${NIGHTLY_TESTS}" == "TRUE" ]]; then
echo "Running 'nightly' test set"
fi
echo "${filter_expression}"
cargo "${RUST_TOOLCHAIN}" nextest run \
--tests \
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features="${ARCH_FEATURE}",integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
--features=integer,internal-keycache,zk-pok,experimental,"${avx512_feature}","${gpu_feature}" \
--test-threads "${test_threads}" \
-E "$filter_expression"
if [[ -z ${multi_bit_argument} ]]; then
if [[ -z ${multi_bit_argument} && -z ${long_tests_argument} ]]; then
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
--features="${ARCH_FEATURE}",integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
--features=integer,internal-keycache,experimental,"${avx512_feature}","${gpu_feature}" \
--doc \
-- --test-threads="${doctest_threads}" integer::"${gpu_feature}"
fi

View File

@@ -65,7 +65,6 @@ if [[ "${FAST_TESTS}" == TRUE ]]; then
fi
CURR_DIR="$(dirname "$0")"
ARCH_FEATURE="$("${CURR_DIR}/get_arch_feature.sh")"
n_threads_small="$("${CURR_DIR}"/cpu_count.sh)"
n_threads_big="${n_threads_small}"
@@ -94,7 +93,7 @@ if [[ "${BIG_TESTS_INSTANCE}" != TRUE ]]; then
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
--features=shortint,internal-keycache,zk-pok,experimental \
--test-threads "${n_threads_small}" \
-E "${filter_expression_small_params}"
@@ -111,7 +110,7 @@ and not test(~smart_add_and_mul)"""
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
--features=shortint,internal-keycache,zk-pok,experimental \
--test-threads "${n_threads_big}" \
--no-tests=warn \
-E "${filter_expression_big_params}"
@@ -120,7 +119,7 @@ and not test(~smart_add_and_mul)"""
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
--features="${ARCH_FEATURE}",shortint,internal-keycache,zk-pok,experimental \
--features=shortint,internal-keycache,zk-pok,experimental \
--doc \
-- shortint::
fi
@@ -134,7 +133,7 @@ else
--cargo-profile "${cargo_profile}" \
--package "${tfhe_package}" \
--profile ci \
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
--features=shortint,internal-keycache,experimental \
--test-threads "${n_threads_big}" \
-E "${filter_expression}"
@@ -142,7 +141,7 @@ else
cargo "${RUST_TOOLCHAIN}" test \
--profile "${cargo_profile}" \
--package "${tfhe_package}" \
--features="${ARCH_FEATURE}",shortint,internal-keycache,experimental \
--features=shortint,internal-keycache,experimental \
--doc \
-- --test-threads="${n_threads_big}" shortint::
fi

View File

@@ -26,6 +26,12 @@ parser.add_argument(
action="store_true",
help="Run only a small subset of test suite",
)
parser.add_argument(
"--long-tests",
dest="long_tests",
action="store_true",
help="Run only the long tests suite",
)
parser.add_argument(
"--nightly-tests",
dest="nightly_tests",
@@ -80,6 +86,7 @@ EXCLUDED_INTEGER_TESTS = [
"/.*test_wopbs_bivariate_crt_wopbs_param_message_[34]_carry_[34]_ks_pbs_gaussian_2m64$/",
"/.*test_integer_smart_mul_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*test_integer_default_add_sequence_multi_thread_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
"/.*::tests_long_run::.*/",
]
# skip default_div, default_rem which are covered by default_div_rem
@@ -94,55 +101,61 @@ EXCLUDED_BIG_PARAMETERS = [
"/.*_param_message_4_carry_4_ks_pbs_gaussian_2m64$/",
]
def filter_integer_tests(input_args):
(multi_bit_filter, group_filter) = (
("_multi_bit", "_group_[0-9]") if input_args.multi_bit else ("", "")
)
backend_filter = ""
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"
if not input_args.long_tests:
if input_args.backend == "gpu":
backend_filter = "gpu::"
if multi_bit_filter:
# For now, GPU only has specific parameters set for multi-bit
multi_bit_filter = "_gpu_multi_bit"
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
filter_expression = [f"test(/^integer::{backend_filter}.*/)"]
if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")
if input_args.multi_bit:
filter_expression.append("test(~_multi_bit)")
else:
filter_expression.append("not test(~_multi_bit)")
if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")
if input_args.signed_only:
filter_expression.append("test(~_signed)")
if input_args.unsigned_only:
filter_expression.append("not test(~_signed)")
if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
if input_args.no_big_params:
for pattern in EXCLUDED_BIG_PARAMETERS:
filter_expression.append(f"not test({pattern})")
if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
filter_expression.append(f"not test({pattern})")
if input_args.fast_tests and input_args.nightly_tests:
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_[2-3]_carry_[2-3]_.*/)"
)
elif input_args.fast_tests:
# Test only fast default operations with only one set of parameters
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_2_carry_2_.*/)"
)
elif input_args.nightly_tests:
# Test only fast default operations with only one set of parameters
# This subset would run slower than fast_tests hence the use of nightly_tests
filter_expression.append(
f"test(/.*_default_.*?_param{multi_bit_filter}{group_filter}_message_3_carry_3_.*/)"
)
else:
if input_args.backend == "gpu":
filter_expression = [f"test(/^integer::gpu::server_key::radix::tests_long_run.*/)"]
elif input_args.backend == "cpu":
filter_expression = [f"test(/^integer::server_key::radix_parallel::tests_long_run.*/)"]
excluded_tests = (
EXCLUDED_INTEGER_FAST_TESTS if input_args.fast_tests else EXCLUDED_INTEGER_TESTS
)
for pattern in excluded_tests:
filter_expression.append(f"not test({pattern})")
return " and ".join(filter_expression)

View File

@@ -7,7 +7,6 @@ edition = "2021"
[dependencies]
clap = "=4.4.4"
lazy_static = "1.4"
log = "0.4"
simplelog = "0.12"
walkdir = "2.5.0"

View File

@@ -101,7 +101,7 @@ pub fn check_tfhe_docs_are_tested() -> Result<(), Error> {
.into_iter()
.filter_map(|entry| {
let path = entry.path().canonicalize().ok()?;
if path.is_file() && path.extension().map_or(false, |e| e == "md") {
if path.is_file() && path.extension().is_some_and(|e| e == "md") {
let file_content = std::fs::read_to_string(&path).ok()?;
if file_content.contains("```rust") {
Some(path.to_path_buf())

View File

@@ -1,5 +1,4 @@
use clap::{Arg, Command};
use lazy_static::lazy_static;
use log::LevelFilter;
use simplelog::{ColorChoice, CombinedLogger, Config, TermLogger, TerminalMode};
use std::sync::atomic::AtomicBool;
@@ -12,9 +11,8 @@ mod utils;
// -------------------------------------------------------------------------------------------------
// CONSTANTS
// -------------------------------------------------------------------------------------------------
lazy_static! {
static ref DRY_RUN: AtomicBool = AtomicBool::new(false);
}
static DRY_RUN: AtomicBool = AtomicBool::new(false);
// -------------------------------------------------------------------------------------------------
// MAIN

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-csprng"
version = "0.4.1"
version = "0.5.0"
edition = "2021"
license = "BSD-3-Clause-Clear"
description = "Cryptographically Secure PRNG used in the TFHE-rs library."
@@ -13,41 +13,25 @@ rust-version = "1.72"
[dependencies]
aes = "0.8.2"
rayon = { version = "1.5.0", optional = true }
rayon = { workspace = true , optional = true }
[target.'cfg(target_os = "macos")'.dependencies]
libc = "0.2.133"
[dev-dependencies]
rand = "0.8.3"
rand = { workspace = true }
criterion = "0.5.1"
clap = "=4.4.4"
[features]
parallel = ["rayon"]
seeder_x86_64_rdseed = []
seeder_unix = []
generator_x86_64_aesni = []
generator_fallback = []
generator_aarch64_aes = []
x86_64 = [
"parallel",
"seeder_x86_64_rdseed",
"generator_x86_64_aesni",
"generator_fallback",
]
x86_64-unix = ["x86_64", "seeder_unix"]
aarch64 = ["parallel", "generator_aarch64_aes", "generator_fallback"]
aarch64-unix = ["aarch64", "seeder_unix"]
software-prng = []
[[bench]]
name = "benchmark"
path = "benches/benchmark.rs"
harness = false
required-features = ["seeder_x86_64_rdseed", "generator_x86_64_aesni"]
[[example]]
name = "generate"
path = "examples/generate.rs"
required-features = ["seeder_unix", "generator_fallback"]

View File

@@ -8,13 +8,13 @@ The implementation is based on the AES blockcipher used in CTR mode, as describe
Two implementations are available, an accelerated one on x86_64 CPUs with the `aes` feature and the `sse2` feature, and a pure software one that can be used on other platforms.
The crate also makes two seeders available, one needing the x86_64 feature `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
The crate also makes two seeders available, one needing the x86_64 instruction `rdseed` and another one based on the Unix random device `/dev/random` the latter requires the user to provide a secret.
## Running the benchmarks
To execute the benchmarks on an x86_64 platform:
```shell
RUSTFLAGS="-Ctarget-cpu=native" cargo bench --features=seeder_x86_64_rdseed,generator_x86_64_aesni
RUSTFLAGS="-Ctarget-cpu=native" cargo bench
```
## License

View File

@@ -1,15 +1,53 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use tfhe_csprng::generators::{
AesniRandomGenerator, BytesPerChild, ChildrenCount, RandomGenerator,
BytesPerChild, ChildrenCount, DefaultRandomGenerator, RandomGenerator,
};
use tfhe_csprng::seeders::{RdseedSeeder, Seeder};
#[cfg(target_os = "macos")]
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
#[cfg(all(
not(target_os = "macos"),
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
))]
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
use tfhe_csprng::seeders::Seeder;
// The number of bytes to generate during one benchmark iteration.
const N_GEN: usize = 1_000_000;
fn new_seeder() -> ActivatedSeeder {
#[cfg(target_os = "macos")]
{
ActivatedSeeder
}
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
{
ActivatedSeeder::new()
}
#[cfg(all(
not(target_os = "macos"),
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
))]
{
ActivatedSeeder::new(0)
}
}
fn parent_generate(c: &mut Criterion) {
let mut seeder = RdseedSeeder;
let mut generator = AesniRandomGenerator::new(seeder.seed());
let mut seeder = new_seeder();
let mut generator = DefaultRandomGenerator::new(seeder.seed());
c.bench_function("parent_generate", |b| {
b.iter(|| {
(0..N_GEN).for_each(|_| {
@@ -20,8 +58,8 @@ fn parent_generate(c: &mut Criterion) {
}
fn child_generate(c: &mut Criterion) {
let mut seeder = RdseedSeeder;
let mut generator = AesniRandomGenerator::new(seeder.seed());
let mut seeder = new_seeder();
let mut generator = DefaultRandomGenerator::new(seeder.seed());
let mut generator = generator
.try_fork(ChildrenCount(1), BytesPerChild(N_GEN * 10_000))
.unwrap()
@@ -37,8 +75,8 @@ fn child_generate(c: &mut Criterion) {
}
fn fork(c: &mut Criterion) {
let mut seeder = RdseedSeeder;
let mut generator = AesniRandomGenerator::new(seeder.seed());
let mut seeder = new_seeder();
let mut generator = DefaultRandomGenerator::new(seeder.seed());
c.bench_function("fork", |b| {
b.iter(|| {
black_box(

View File

@@ -1,115 +0,0 @@
// To have clear error messages during compilation about why some piece of code may not be available
// we decided to check the features compatibility with the target configuration in this script.
use std::collections::HashMap;
use std::env;
// See https://doc.rust-lang.org/reference/conditional-compilation.html#target_arch for various
// compilation configuration
// Can be easily extended if needed
pub struct FeatureRequirement {
pub feature_name: &'static str,
// target_arch requirement
pub feature_req_target_arch: Option<&'static str>,
// target_family requirement
pub feature_req_target_family: Option<&'static str>,
}
// We implement a version of default that is const which is not possible through the Default trait
impl FeatureRequirement {
// As we cannot use cfg!(feature = "feature_name") with something else than a literal, we need
// a reference to the HashMap we populate with the enabled features
fn is_activated(&self, build_activated_features: &HashMap<&'static str, bool>) -> bool {
*build_activated_features.get(self.feature_name).unwrap()
}
// panics if the requirements are not met
fn check_requirements(&self) {
let target_arch = get_target_arch_cfg();
if let Some(feature_req_target_arch) = self.feature_req_target_arch {
if feature_req_target_arch != target_arch {
panic!(
"Feature `{}` requires target_arch `{}`, current cfg: `{}`",
self.feature_name, feature_req_target_arch, target_arch
)
}
}
let target_families = get_target_family_cfgs();
if let Some(feature_req_target_family) = self.feature_req_target_family {
if target_families
.split(',')
.all(|family| family != feature_req_target_family)
{
panic!(
"Feature `{}` requires target_family `{}`, current cfgs: `{}`",
self.feature_name, feature_req_target_family, target_families
)
}
}
}
}
// const vecs are not yet a thing so use a fixed size array (update the array size when adding
// requirements)
static FEATURE_REQUIREMENTS: [FeatureRequirement; 4] = [
FeatureRequirement {
feature_name: "seeder_x86_64_rdseed",
feature_req_target_arch: Some("x86_64"),
feature_req_target_family: None,
},
FeatureRequirement {
feature_name: "generator_x86_64_aesni",
feature_req_target_arch: Some("x86_64"),
feature_req_target_family: None,
},
FeatureRequirement {
feature_name: "seeder_unix",
feature_req_target_arch: None,
feature_req_target_family: Some("unix"),
},
FeatureRequirement {
feature_name: "generator_aarch64_aes",
feature_req_target_arch: Some("aarch64"),
feature_req_target_family: None,
},
];
// For a "feature_name" feature_cfg!("feature_name") expands to
// ("feature_name", cfg!(feature = "feature_name"))
macro_rules! feature_cfg {
($feat_name:literal) => {
($feat_name, cfg!(feature = $feat_name))
};
}
// Static HashMap would require an additional crate (phf or lazy static e.g.), so we just write a
// function that returns the HashMap we are interested in
fn get_feature_enabled_status() -> HashMap<&'static str, bool> {
HashMap::from([
feature_cfg!("seeder_x86_64_rdseed"),
feature_cfg!("generator_x86_64_aesni"),
feature_cfg!("seeder_unix"),
feature_cfg!("generator_aarch64_aes"),
])
}
// See https://stackoverflow.com/a/43435335/18088947 for the inspiration of this code
fn get_target_arch_cfg() -> String {
env::var("CARGO_CFG_TARGET_ARCH").expect("CARGO_CFG_TARGET_ARCH is not set")
}
fn get_target_family_cfgs() -> String {
env::var("CARGO_CFG_TARGET_FAMILY").expect("CARGO_CFG_TARGET_FAMILY is not set")
}
fn main() {
let feature_enabled_status = get_feature_enabled_status();
// This will panic if some requirements for a feature are not met
FEATURE_REQUIREMENTS
.iter()
.filter(|&req| FeatureRequirement::is_activated(req, &feature_enabled_status))
.for_each(FeatureRequirement::check_requirements);
}

View File

@@ -2,35 +2,29 @@
//! the program stdout. It can also generate a fixed number of bytes by passing a value along the
//! optional argument `--bytes_total`. For testing purpose.
use clap::{value_parser, Arg, Command};
#[cfg(feature = "generator_x86_64_aesni")]
use tfhe_csprng::generators::AesniRandomGenerator as ActivatedRandomGenerator;
#[cfg(feature = "generator_aarch64_aes")]
use tfhe_csprng::generators::NeonAesRandomGenerator as ActivatedRandomGenerator;
use tfhe_csprng::generators::RandomGenerator;
#[cfg(all(
not(feature = "generator_x86_64_aesni"),
not(feature = "generator_aarch64_aes"),
feature = "generator_fallback"
))]
use tfhe_csprng::generators::SoftwareRandomGenerator as ActivatedRandomGenerator;
use tfhe_csprng::generators::{DefaultRandomGenerator, RandomGenerator};
use std::io::prelude::*;
use std::io::{stdout, StdoutLock};
#[cfg(target_os = "macos")]
use tfhe_csprng::seeders::AppleSecureEnclaveSeeder as ActivatedSeeder;
#[cfg(all(not(target_os = "macos"), feature = "seeder_x86_64_rdseed"))]
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
use tfhe_csprng::seeders::RdseedSeeder as ActivatedSeeder;
use tfhe_csprng::seeders::Seeder;
#[cfg(all(
not(target_os = "macos"),
not(feature = "seeder_x86_64_rdseed"),
feature = "seeder_unix"
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
))]
use tfhe_csprng::seeders::UnixSeeder as ActivatedSeeder;
fn write_bytes(
buffer: &mut [u8],
generator: &mut ActivatedRandomGenerator,
generator: &mut DefaultRandomGenerator,
stdout: &mut StdoutLock<'_>,
) -> std::io::Result<()> {
buffer.iter_mut().zip(generator).for_each(|(b, g)| *b = g);
@@ -39,7 +33,7 @@ fn write_bytes(
fn infinite_bytes_generation(
buffer: &mut [u8],
generator: &mut ActivatedRandomGenerator,
generator: &mut DefaultRandomGenerator,
stdout: &mut StdoutLock<'_>,
) {
while write_bytes(buffer, generator, stdout).is_ok() {}
@@ -48,7 +42,7 @@ fn infinite_bytes_generation(
fn bytes_generation(
bytes_total: usize,
buffer: &mut [u8],
generator: &mut ActivatedRandomGenerator,
generator: &mut DefaultRandomGenerator,
stdout: &mut StdoutLock<'_>,
) {
let quotient = bytes_total / buffer.len();
@@ -61,6 +55,29 @@ fn bytes_generation(
write_bytes(&mut buffer[0..remaining], generator, stdout).unwrap()
}
fn new_seeder() -> ActivatedSeeder {
#[cfg(target_os = "macos")]
{
ActivatedSeeder
}
#[cfg(all(
not(target_os = "macos"),
target_arch = "x86_64",
target_feature = "rdseed"
))]
{
ActivatedSeeder::new()
}
#[cfg(all(
not(target_os = "macos"),
not(all(target_arch = "x86_64", target_feature = "rdseed")),
target_family = "unix"
))]
{
ActivatedSeeder::new(0)
}
}
pub fn main() {
let matches = Command::new(
"Generate a stream of random numbers, specify no flags for infinite generation",
@@ -74,25 +91,11 @@ pub fn main() {
)
.get_matches();
// Ugly hack to be able to use UnixSeeder
#[cfg(all(
not(target_os = "macos"),
not(feature = "seeder_x86_64_rdseed"),
feature = "seeder_unix"
))]
let new_seeder = || ActivatedSeeder::new(0);
#[cfg(not(all(
not(target_os = "macos"),
not(feature = "seeder_x86_64_rdseed"),
feature = "seeder_unix"
)))]
let new_seeder = || ActivatedSeeder;
let mut seeder = new_seeder();
let seed = seeder.seed();
// Don't print on std out
eprintln!("seed={seed:?}");
let mut generator = ActivatedRandomGenerator::new(seed);
let mut generator = DefaultRandomGenerator::new(seed);
let stdout = stdout();
let mut buffer = [0u8; 16];

View File

@@ -206,7 +206,6 @@ pub use index::*;
/// A module containing structures to manage table indices and buffer pointers together properly.
mod states;
pub use states::*;
/// A module containing an abstraction for aes block ciphers.
mod block_cipher;

View File

@@ -1,6 +1,5 @@
use crate::generators::aes_ctr::{
AesBlockCipher, AesCtrGenerator, ChildrenClosure, State, TableIndex,
};
use crate::generators::aes_ctr::states::State;
use crate::generators::aes_ctr::{AesBlockCipher, AesCtrGenerator, ChildrenClosure, TableIndex};
use crate::generators::{BytesPerChild, ChildrenCount, ForkError};
/// A type alias for the parallel children iterator type.

View File

@@ -0,0 +1,9 @@
#[cfg(all(target_arch = "x86_64", not(feature = "software-prng")))]
pub type DefaultRandomGenerator = super::AesniRandomGenerator;
#[cfg(all(target_arch = "aarch64", not(feature = "software-prng")))]
pub type DefaultRandomGenerator = super::NeonAesRandomGenerator;
#[cfg(any(
feature = "software-prng",
not(any(target_arch = "x86_64", target_arch = "aarch64"))
))]
pub type DefaultRandomGenerator = super::SoftwareRandomGenerator;

View File

@@ -25,7 +25,8 @@ impl AesBlockCipher for ArmAesBlockCipher {
if !(aes_detected && neon_detected) {
panic!(
"The ArmAesBlockCipher requires both aes and neon aarch64 CPU features.\n\
aes feature available: {}\nneon feature available: {}\n.",
aes feature available: {}\nneon feature available: {}\n\
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
aes_detected, neon_detected
)
}

View File

@@ -20,7 +20,8 @@ impl AesBlockCipher for AesniBlockCipher {
if !(aes_detected && sse2_detected) {
panic!(
"The AesniBlockCipher requires both aes and sse2 x86 CPU features.\n\
aes feature available: {}\nsse2 feature available: {}\n.",
aes feature available: {}\nsse2 feature available: {}\n\
Please consider enabling the SoftwareRandomGenerator with the `software-prng` feature",
aes_detected, sse2_detected
)
}

View File

@@ -1,14 +1,12 @@
#[cfg(feature = "generator_x86_64_aesni")]
#[cfg(target_arch = "x86_64")]
mod aesni;
#[cfg(feature = "generator_x86_64_aesni")]
#[cfg(target_arch = "x86_64")]
pub use aesni::*;
#[cfg(feature = "generator_aarch64_aes")]
#[cfg(target_arch = "aarch64")]
mod aarch64;
#[cfg(feature = "generator_aarch64_aes")]
#[cfg(target_arch = "aarch64")]
pub use aarch64::*;
#[cfg(feature = "generator_fallback")]
mod soft;
#[cfg(feature = "generator_fallback")]
pub use soft::*;

View File

@@ -123,6 +123,10 @@ mod aes_ctr;
mod implem;
pub use implem::*;
pub mod default;
/// Convenience alias for the most efficient CSPRNG implementation available.
pub use default::DefaultRandomGenerator;
#[cfg(test)]
#[allow(unused)] // to please clippy when tests are not activated
pub mod generator_generic_test {

View File

@@ -3,12 +3,12 @@ mod apple_secure_enclave_seeder;
#[cfg(target_os = "macos")]
pub use apple_secure_enclave_seeder::AppleSecureEnclaveSeeder;
#[cfg(feature = "seeder_x86_64_rdseed")]
#[cfg(target_arch = "x86_64")]
mod rdseed;
#[cfg(feature = "seeder_x86_64_rdseed")]
#[cfg(target_arch = "x86_64")]
pub use rdseed::RdseedSeeder;
#[cfg(feature = "seeder_unix")]
#[cfg(target_family = "unix")]
mod unix;
#[cfg(feature = "seeder_unix")]
#[cfg(target_family = "unix")]
pub use unix::UnixSeeder;

View File

@@ -4,7 +4,23 @@ use crate::seeders::{Seed, Seeder};
///
/// The `rdseed` instruction allows to deliver seeds from a hardware source of entropy see
/// <https://www.felixcloutier.com/x86/rdseed> .
pub struct RdseedSeeder;
pub struct RdseedSeeder(());
impl RdseedSeeder {
pub fn new() -> Self {
if Self::is_available() {
Self(())
} else {
panic!("Tried to use RdSeedSeeder but rdseed instruction is not enabled on the current machine");
}
}
}
impl Default for RdseedSeeder {
fn default() -> Self {
Self::new()
}
}
impl Seeder for RdseedSeeder {
fn seed(&mut self) -> Seed {
@@ -46,6 +62,6 @@ mod test {
#[test]
fn check_bounded_sequence_difference() {
check_seeder_fixed_sequences_different(|_| RdseedSeeder);
check_seeder_fixed_sequences_different(|_| RdseedSeeder::new());
}
}

View File

@@ -29,7 +29,7 @@ serde = ["dep:serde", "num-complex/serde"]
[dev-dependencies]
rustfft = "6.0"
rand = "0.8"
rand = { workspace = true }
bincode = "1.3"
more-asserts = "0.3.1"
serde_json = "1.0.96"

View File

@@ -40,7 +40,7 @@ Additionally, an optional 128-bit negacyclic FFT module is provided.
```rust
use tfhe_fft::c64;
use tfhe_fft::ordered::{Method, Plan};
use dyn_stack::{GlobalPodBuffer, PodStack, ReborrowMut};
use dyn_stack::{GlobalPodBuffer, PodStack};
use num_complex::ComplexFloat;
use std::time::Duration;
@@ -48,7 +48,7 @@ fn main() {
const N: usize = 4;
let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
let mut scratch_memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
let mut stack = PodStack::new(&mut scratch_memory);
let stack = PodStack::new(&mut scratch_memory);
let data = [
c64::new(1.0, 0.0),
@@ -58,10 +58,10 @@ fn main() {
];
let mut transformed_fwd = data;
plan.fwd(&mut transformed_fwd, stack.rb_mut());
plan.fwd(&mut transformed_fwd, stack);
let mut transformed_inv = transformed_fwd;
plan.inv(&mut transformed_inv, stack.rb_mut());
plan.inv(&mut transformed_inv, stack);
for (actual, expected) in transformed_inv.iter().map(|z| z / N as f64).zip(data) {
assert!((expected - actual).abs() < 1e-9);

View File

@@ -1,6 +1,6 @@
use core::ptr::NonNull;
use criterion::{criterion_group, criterion_main, Criterion};
use dyn_stack::{PodStack, ReborrowMut, StackReq};
use dyn_stack::{PodStack, StackReq};
use serde::Serialize;
use std::{fs, path::PathBuf};
use tfhe_fft::c64;
@@ -129,7 +129,7 @@ pub fn bench_ffts(c: &mut Criterion) {
StackReq::new_aligned::<c64>(n, 256), // src
StackReq::new_aligned::<c64>(n, 256), // dst
]));
let mut stack = PodStack::new(&mut mem);
let stack = PodStack::new(&mut mem);
let z = c64::new(0.0, 0.0);
use rustfft::FftPlannerAvx;
@@ -139,8 +139,8 @@ pub fn bench_ffts(c: &mut Criterion) {
let unordered =
tfhe_fft::unordered::Plan::new(n, tfhe_fft::unordered::Method::Measure(bench_duration));
let (dst, stack) = stack.rb_mut().make_aligned_with::<c64, _>(n, 64, |_| z);
let (src, mut stack) = stack.make_aligned_with::<c64, _>(n, 64, |_| z);
let (dst, stack) = stack.make_aligned_with::<c64>(n, 64, |_| z);
let (src, stack) = stack.make_aligned_with::<c64>(n, 64, |_| z);
let bench_id = format!("rustfft-fwd-{n}");
c.bench_function(&bench_id, |b| {
@@ -164,19 +164,19 @@ pub fn bench_ffts(c: &mut Criterion) {
tfhe_fft::ordered::Plan::new(n, tfhe_fft::ordered::Method::Measure(bench_duration));
let bench_id = format!("tfhe-ordered-fwd-{n}");
c.bench_function(&bench_id, |b| b.iter(|| ordered.fwd(dst, stack.rb_mut())));
c.bench_function(&bench_id, |b| b.iter(|| ordered.fwd(dst, stack)));
write_to_json(&bench_id, "tfhe-ordered-fwd", n);
}
let bench_id = format!("tfhe-unordered-fwd-{n}");
c.bench_function(&bench_id, |b| {
b.iter(|| unordered.fwd(dst, stack.rb_mut()));
b.iter(|| unordered.fwd(dst, stack));
});
write_to_json(&bench_id, "tfhe-unordered-fwd", n);
let bench_id = format!("tfhe-unordered-inv-{n}");
c.bench_function(&bench_id, |b| {
b.iter(|| unordered.inv(dst, stack.rb_mut()));
b.iter(|| unordered.inv(dst, stack));
});
write_to_json(&bench_id, "tfhe-unordered-inv", n);

View File

@@ -645,7 +645,7 @@ pub mod x86 {
#[inline(always)]
pub(crate) fn two_diff_f64x4(simd: V3, a: f64x4, b: f64x4) -> (f64x4, f64x4) {
two_sum_f64x4(simd, a, simd.f64s_neg(b))
two_sum_f64x4(simd, a, simd.neg_f64s(b))
}
#[inline(always)]
@@ -677,7 +677,7 @@ pub mod x86 {
#[inline(always)]
#[cfg(feature = "nightly")]
pub(crate) fn two_diff_f64x8(simd: V4, a: f64x8, b: f64x8) -> (f64x8, f64x8) {
two_sum_f64x8(simd, a, simd.f64s_neg(b))
two_sum_f64x8(simd, a, simd.neg_f64s(b))
}
#[cfg(feature = "nightly")]
@@ -714,8 +714,8 @@ pub mod x86 {
simd,
a,
f64x16 {
lo: simd.f64s_neg(b.lo),
hi: simd.f64s_neg(b.hi),
lo: simd.neg_f64s(b.lo),
hi: simd.neg_f64s(b.hi),
},
)
}

View File

@@ -36,14 +36,14 @@
#![cfg_attr(not(feature = "std"), doc = "```ignore")]
//! use tfhe_fft::c64;
//! use tfhe_fft::ordered::{Plan, Method};
//! use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
//! use dyn_stack::{PodStack, GlobalPodBuffer};
//! use num_complex::ComplexFloat;
//! use std::time::Duration;
//!
//! const N: usize = 4;
//! let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
//! let mut scratch_memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
//! let mut stack = PodStack::new(&mut scratch_memory);
//! let stack = PodStack::new(&mut scratch_memory);
//!
//! let data = [
//! c64::new(1.0, 0.0),
@@ -53,10 +53,10 @@
//! ];
//!
//! let mut transformed_fwd = data;
//! plan.fwd(&mut transformed_fwd, stack.rb_mut());
//! plan.fwd(&mut transformed_fwd, stack);
//!
//! let mut transformed_inv = transformed_fwd;
//! plan.inv(&mut transformed_inv, stack.rb_mut());
//! plan.inv(&mut transformed_inv, stack);
//!
//! for (actual, expected) in transformed_inv.iter().map(|z| z / N as f64).zip(data) {
//! assert!((expected - actual).abs() < 1e-9);

View File

@@ -16,7 +16,7 @@ use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
#[cfg(feature = "std")]
use core::time::Duration;
#[cfg(feature = "std")]
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
use dyn_stack::GlobalPodBuffer;
use dyn_stack::{PodStack, SizeOverflow, StackReq};
/// Internal FFT algorithm.
@@ -65,7 +65,7 @@ fn measure_n_runs(
buf: &mut [c64],
twiddles_init: &[c64],
twiddles: &[c64],
stack: PodStack,
stack: &mut PodStack,
) -> Duration {
let n = buf.len();
let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
@@ -99,7 +99,7 @@ pub(crate) fn measure_fastest_scratch(n: usize) -> StackReq {
pub(crate) fn measure_fastest(
min_bench_duration_per_algo: Duration,
n: usize,
stack: PodStack,
stack: &mut PodStack,
) -> (FftAlgo, Duration) {
const N_ALGOS: usize = 8;
const MIN_DURATION: Duration = if cfg!(target_arch = "wasm32") {
@@ -116,14 +116,14 @@ pub(crate) fn measure_fastest(
let f = |_| c64 { re: 0.0, im: 0.0 };
let (twiddles, stack) = stack.make_aligned_with::<c64, _>(2 * n, align, f);
let (twiddles, stack) = stack.make_aligned_with::<c64>(2 * n, align, f);
let twiddles_init = &twiddles[..n];
let twiddles = &twiddles[n..];
let (buf, mut stack) = stack.make_aligned_with::<c64, _>(n, align, f);
let (buf, stack) = stack.make_aligned_with::<c64>(n, align, f);
{
// initialize scratch to load it in the cpu cache
drop(stack.rb_mut().make_aligned_with::<c64, _>(n, align, f));
drop(stack.make_aligned_with::<c64>(n, align, f));
}
let mut avg_durations = [Duration::ZERO; N_ALGOS];
@@ -149,8 +149,7 @@ pub(crate) fn measure_fastest(
let mut n_runs: u128 = 1;
loop {
let duration =
measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack.rb_mut());
let duration = measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack);
if duration < MIN_DURATION {
n_runs *= 2;
@@ -165,8 +164,7 @@ pub(crate) fn measure_fastest(
*avg = if n_runs <= init_n_runs {
approx_duration
} else {
let duration =
measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack.rb_mut());
let duration = measure_n_runs(n_runs, algo, buf, twiddles_init, twiddles, stack);
duration_div_f64(duration, n_runs as f64)
};
}
@@ -339,7 +337,7 @@ impl Plan {
/// let mut buf = [c64::default(); 4];
/// plan.fwd(&mut buf, stack);
/// ```
pub fn fwd(&self, buf: &mut [c64], stack: PodStack) {
pub fn fwd(&self, buf: &mut [c64], stack: &mut PodStack) {
let n = self.fft_size();
let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
let (w_init, w) = split_2(&self.twiddles);
@@ -353,19 +351,19 @@ impl Plan {
#[cfg_attr(not(feature = "std"), doc = " ```ignore")]
/// use tfhe_fft::c64;
/// use tfhe_fft::ordered::{Method, Plan};
/// use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
/// use dyn_stack::{PodStack, GlobalPodBuffer};
/// use core::time::Duration;
///
/// let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
///
/// let mut memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
/// let mut stack = PodStack::new(&mut memory);
/// let stack = PodStack::new(&mut memory);
///
/// let mut buf = [c64::default(); 4];
/// plan.fwd(&mut buf, stack.rb_mut());
/// plan.fwd(&mut buf, stack);
/// plan.inv(&mut buf, stack);
/// ```
pub fn inv(&self, buf: &mut [c64], stack: PodStack) {
pub fn inv(&self, buf: &mut [c64], stack: &mut PodStack) {
let n = self.fft_size();
let (scratch, _) = stack.make_aligned_raw::<c64>(n, CACHELINE_ALIGN);
let (w_init, w) = split_2(&self.twiddles_inv);

View File

@@ -18,7 +18,7 @@ use aligned_vec::{avec, ABox, CACHELINE_ALIGN};
#[cfg(feature = "std")]
use core::time::Duration;
#[cfg(feature = "std")]
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
use dyn_stack::GlobalPodBuffer;
use dyn_stack::{PodStack, SizeOverflow, StackReq};
#[inline(always)]
@@ -553,7 +553,7 @@ fn measure_fastest_scratch(n: usize) -> StackReq {
fn measure_fastest(
mut min_bench_duration_per_algo: Duration,
n: usize,
mut stack: PodStack,
stack: &mut PodStack,
) -> (FftAlgo, usize, Duration) {
const MIN_DURATION: Duration = Duration::from_millis(1);
min_bench_duration_per_algo = min_bench_duration_per_algo.max(MIN_DURATION);
@@ -581,11 +581,8 @@ fn measure_fastest(
n_algos += 1;
// we'll measure the corresponding plan
let (base_algo, duration) = crate::ordered::measure_fastest(
min_bench_duration_per_algo,
base_n,
stack.rb_mut(),
);
let (base_algo, duration) =
crate::ordered::measure_fastest(min_bench_duration_per_algo, base_n, stack);
algos[i] = Some(base_algo);
@@ -599,11 +596,9 @@ fn measure_fastest(
let f = |_| c64 { re: 0.0, im: 0.0 };
let align = CACHELINE_ALIGN;
let (w, stack) = stack
.rb_mut()
.make_aligned_with::<c64, _>(n + base_n, align, f);
let (scratch, stack) = stack.make_aligned_with::<c64, _>(base_n, align, f);
let (z, _) = stack.make_aligned_with::<c64, _>(n, align, f);
let (w, stack) = stack.make_aligned_with::<c64>(n + base_n, align, f);
let (scratch, stack) = stack.make_aligned_with::<c64>(base_n, align, f);
let (z, _) = stack.make_aligned_with::<c64>(n, align, f);
let n_runs = min_bench_duration_per_algo.as_secs_f64()
/ (duration.as_secs_f64() * (n / base_n) as f64);
@@ -823,7 +818,7 @@ impl Plan {
/// let mut buf = [c64::default(); 4];
/// plan.fwd(&mut buf, stack);
/// ```
pub fn fwd(&self, buf: &mut [c64], stack: PodStack) {
pub fn fwd(&self, buf: &mut [c64], stack: &mut PodStack) {
assert_eq!(self.fft_size(), buf.len());
let (scratch, _) = stack.make_aligned_raw::<c64>(self.algo().1, CACHELINE_ALIGN);
fwd_depth(
@@ -912,19 +907,19 @@ impl Plan {
#[cfg_attr(not(feature = "std"), doc = " ```ignore")]
/// use tfhe_fft::c64;
/// use tfhe_fft::unordered::{Method, Plan};
/// use dyn_stack::{PodStack, GlobalPodBuffer, ReborrowMut};
/// use dyn_stack::{PodStack, GlobalPodBuffer};
/// use core::time::Duration;
///
/// let plan = Plan::new(4, Method::Measure(Duration::from_millis(10)));
///
/// let mut memory = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
/// let mut stack = PodStack::new(&mut memory);
/// let stack = PodStack::new(&mut memory);
///
/// let mut buf = [c64::default(); 4];
/// plan.fwd(&mut buf, stack.rb_mut());
/// plan.fwd(&mut buf, stack);
/// plan.inv(&mut buf, stack);
/// ```
pub fn inv(&self, buf: &mut [c64], stack: PodStack) {
pub fn inv(&self, buf: &mut [c64], stack: &mut PodStack) {
assert_eq!(self.fft_size(), buf.len());
let (scratch, _) = stack.make_aligned_raw::<c64>(self.algo().1, CACHELINE_ALIGN);
inv_depth(
@@ -995,7 +990,7 @@ impl Plan {
base_n: usize,
}
impl<'de, 'a> Visitor<'de> for SeqVisitor<'a> {
impl<'de> Visitor<'de> for SeqVisitor<'_> {
type Value = ();
fn expecting(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
@@ -1062,7 +1057,7 @@ fn bit_rev_twice_inv(nbits: u32, base_nbits: u32, i: usize) -> usize {
mod tests {
use super::*;
use alloc::vec;
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
use dyn_stack::GlobalPodBuffer;
use num_complex::ComplexFloat;
use rand::random;
@@ -1157,8 +1152,8 @@ mod tests {
},
);
let mut mem = GlobalPodBuffer::new(plan.fft_scratch().unwrap());
let mut stack = PodStack::new(&mut mem);
plan.fwd(&mut z, stack.rb_mut());
let stack = PodStack::new(&mut mem);
plan.fwd(&mut z, stack);
plan.inv(&mut z, stack);
for z in &mut z {
@@ -9400,7 +9395,7 @@ mod tests {
mod tests_serde {
use super::*;
use alloc::{vec, vec::Vec};
use dyn_stack::{GlobalPodBuffer, ReborrowMut};
use dyn_stack::GlobalPodBuffer;
use num_complex::ComplexFloat;
use rand::random;
@@ -9440,9 +9435,9 @@ mod tests_serde {
.unwrap()
.or(plan2.fft_scratch().unwrap()),
);
let mut stack = PodStack::new(&mut mem);
let stack = PodStack::new(&mut mem);
plan1.fwd(&mut z, stack.rb_mut());
plan1.fwd(&mut z, stack);
let mut buf = Vec::<u8>::new();
let mut serializer = bincode::Serializer::new(&mut buf, bincode::options());

Some files were not shown because too many files have changed in this diff Show More