Compare commits

..

90 Commits

Author SHA1 Message Date
David Testé
cae938a75b wip: measure latencies of a list of cts 2025-01-28 11:19:27 +01:00
David Testé
bae1d1cf77 WIP: fix gpu streams and use iter_batched 2025-01-22 10:56:08 +01:00
David Testé
a3bc1a9d9e chore(bench): new heuristic to define elements for throughput
This is done to fill up backend with enough elements to fill the
backend and avoid having long execution time for heavy operations
like multiplication or division.
2025-01-20 15:21:05 +01:00
Nicolas Sarlin
cc85c441ea chore(doc): add strings feature to doctests 2025-01-20 10:59:12 +01:00
Mayeul@Zama
1f254d6523 chore: feature gating js_high_level_api 2025-01-20 10:00:49 +01:00
Mayeul@Zama
909ce4ecbb chore: update rust dependencies 2025-01-20 09:42:38 +01:00
Mayeul@Zama
67783f4683 chore: upgrade wasm-bindgen 2025-01-20 09:42:38 +01:00
tmontaigu
c62112a4a9 feat(c-hlapi): add abs 2025-01-17 17:04:18 +01:00
tmontaigu
9eb2eb9f0e refactor(c-hlapi): have different macro for FheUint/FheInt
As FheUint and FheInt may have different set of functions,
we now have 2 different macros to define their C API.
2025-01-17 17:04:18 +01:00
Arthur Meyre
aa5b431aae test(core): use 5 bits parameters tweaked to 4 bits to avoid high pfail 2025-01-17 16:06:27 +01:00
David Testé
2b914ae57a chore(ci): use ubuntu 22.04 to run security checks
Sagemath is not available as a package on Ubuntu 24.04.
2025-01-17 11:31:40 +01:00
Nicolas Sarlin
f4a8991f67 fix(safe_ser): aliases in named for renamed types deserialization 2025-01-17 11:13:43 +01:00
David Testé
a882262691 chore(ci): fix recursive search for changes in docs files 2025-01-17 10:04:46 +01:00
aquint-zama
1976a9dce6 chore: update release workflow 2025-01-16 16:04:58 +01:00
aquint-zama
ec87c15cc2 chore: add slsa support for tfhe-ntt 2025-01-16 16:04:58 +01:00
aquint-zama
c273e973bb chore: add slsa support for tfhe-fft 2025-01-16 16:04:58 +01:00
aquint-zama
07e3fb2779 chore: add slsa support for tfhe-cuda-backend 2025-01-16 16:04:58 +01:00
aquint-zama
c2d4e77eec chore: add slsa support for tfhe-versionable 2025-01-16 16:04:58 +01:00
aquint-zama
cdf627f2d5 chore: add slsa support for tfhe-csprng 2025-01-16 16:04:58 +01:00
aquint-zama
2f79f646f7 chore: add slsa support for tfhe-zk-pok 2025-01-16 16:04:58 +01:00
Guillermo Oyarzun
a9e4724178 feat(gpu): implement fhe rand on gpu 2025-01-16 14:48:09 +01:00
David Testé
7a8efb1934 chore(ci): display head ref in slack notification message
Default 'Ref' displayed in message relies on github.ref value. On
pull_request_target, it's the base_ref, instead of head_ref, that
is set as value for github.ref.
We cannot change 'Ref' field directly. As a workaround, we hide
'Ref' in the message and display the head_ref directly in
SLACK_MESSAGE.
2025-01-16 14:25:05 +01:00
David Testé
bc1aeeb85e chore(ci): skip aws fast tests if ci files changed
This would skip 'check-user-permission' job if the event
'pull_request_target' is emitted and CI files have changed.
It avoids overlapping of 'pull_request' and 'pull_request_target'
events. CI changes would only be tested on 'pull_request' for
Zama own pull requests.
2025-01-16 14:25:05 +01:00
Nicolas Sarlin
cbdba38147 fix(shortint): error message in packing keyswitch 2025-01-16 13:08:36 +01:00
Agnes Leroy
cea871fc6b chore(gpu): update multi-bit parameters 2025-01-16 10:23:04 +01:00
Arthur Meyre
34a006a3ee chore(ci): fix clippy on M1 for disabled multi bit noise 2025-01-16 09:44:21 +01:00
Pedro Alves
b3740e75f2 chore(docs): Remove mention to NVLink
NVLink is not needed anymore in the CUDA backend.
2025-01-16 09:35:15 +01:00
Andrei Stoian
b46affa45b chore(gpu): add reference to gemm algorithm 2025-01-16 09:07:58 +01:00
Mayeul@Zama
72095144dc chore: fix new lints 2025-01-15 15:16:15 +01:00
Arthur Meyre
a91e8618c9 chore(docs): fix various issues with the docs 2025-01-15 11:37:04 +01:00
Nicolas Sarlin
9a64c34989 chore(lint): use dylint as lint driver for tfhe-lint 2025-01-14 18:30:04 +01:00
Nicolas Sarlin
7103a83ce5 chore(zk)!: use builtin isqrt instead of the internal implementation
BREAKING CHANGE: bump MSRV
2025-01-14 18:29:29 +01:00
Nicolas Sarlin
1f41a6b85d chore(zk): sqr fc takes u64 as parameter to avoid overflow 2025-01-14 18:29:29 +01:00
David Testé
ccc647a5ee chore(ci): remove paths for pull_request_target event
A 'paths:' directive with only excluded paths won't trigger the event.
2025-01-14 17:37:48 +01:00
Arthur Meyre
ffd4f5a93e chore(ci): remove unused Dockerfile for wasm tests 2025-01-14 17:31:18 +01:00
David Testé
594157ecaa chore(ci): choose pull request event with path changes
To be able to run CI for external contribution, through forked
repository, and be able to test CI modification in a development
branch, we need to discriminate pull request events. For the
former 'pull_request_target' event is needed to have access to
GitHub secrets. 'pull_request' event is required for the latter
otherwise the workflow would be pulled from the HEAD of the base
branch and thus wouldn't contain changes from the developer.
2025-01-14 14:38:02 +01:00
dependabot[bot]
8ae871ec33 chore(deps): bump actions/upload-artifact from 4.5.0 to 4.6.0
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.5.0 to 4.6.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](6f51ac03b9...65c4c4a1dd)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-14 13:48:39 +01:00
dependabot[bot]
9535544409 chore(deps): bump actions-cool/check-user-permission from 2.2.1 to 2.3.0
Bumps [actions-cool/check-user-permission](https://github.com/actions-cool/check-user-permission) from 2.2.1 to 2.3.0.
- [Release notes](https://github.com/actions-cool/check-user-permission/releases)
- [Changelog](https://github.com/actions-cool/check-user-permission/blob/main/CHANGELOG.md)
- [Commits](956b2e73cd...7b90a27f92)

---
updated-dependencies:
- dependency-name: actions-cool/check-user-permission
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-14 13:48:31 +01:00
dependabot[bot]
4438042b7d chore(deps): bump zgosalvez/github-actions-ensure-sha-pinned-actions
Bumps [zgosalvez/github-actions-ensure-sha-pinned-actions](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions) from 3.0.19 to 3.0.20.
- [Release notes](https://github.com/zgosalvez/github-actions-ensure-sha-pinned-actions/releases)
- [Commits](6ae615f647...c3a2b64f69)

---
updated-dependencies:
- dependency-name: zgosalvez/github-actions-ensure-sha-pinned-actions
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-01-14 13:48:23 +01:00
Arthur Meyre
f7189edb20 chore(ci): temporarily disable the multi bit noise check on Apple M1 2025-01-14 10:56:36 +01:00
Nicolas Sarlin
7058b3611a chore(backward): update data repo to 0.5 2025-01-14 09:35:32 +01:00
Pedro Alves
59b315993d chore(gpu): update new_multi_gpu() description 2025-01-14 09:24:24 +01:00
Arthur Meyre
3d1998635a docs: indicate PBS benchmarks have Gaussian parameters 2025-01-13 16:57:40 +01:00
Arthur Meyre
3fa72e62ae chore(docs): replace tabs by spaces 2025-01-13 16:57:40 +01:00
Arthur Meyre
0d43912884 docs: add TUniform distribution and link in benchmarks 2025-01-13 16:57:40 +01:00
Agnes Leroy
9930550b69 chore(doc): update links to the benchmark tables 2025-01-13 16:57:40 +01:00
Nicolas Sarlin
bdc3539954 doc(zk): explain how to use zkv1 2025-01-13 13:40:16 +01:00
tmontaigu
979a1b36f8 chore(docs): add strings guides 2025-01-13 13:25:47 +01:00
Andrei Stoian
298fd66631 feat(gpu): optimize packing keyswitch on gpu 2025-01-13 09:18:53 -03:00
David Testé
0952dfa1ad chore(ci): run teardown job only if setup is a success 2025-01-13 12:19:39 +01:00
Mayeul@Zama
e1e567a145 chore: remove unused cargo key 2025-01-13 12:03:06 +01:00
Nicolas Sarlin
ee1a534584 chore: formatting uniformization for c and js 2025-01-10 16:09:04 +01:00
Nicolas Sarlin
c9eef7d193 chore: enable space in editorconfig for more file types 2025-01-10 16:09:04 +01:00
Nicolas Sarlin
4c8d55f32b chore(zk): add bench zk v1 vs v2 2025-01-10 15:47:25 +01:00
Nicolas Sarlin
cd5b3c61eb chore(backward): move compat tests to avoid compiling them everytime 2025-01-10 09:43:04 +01:00
Arthur Meyre
baefb7d911 chore: enable strings for docs.rs generation 2025-01-09 19:48:43 +01:00
Guillermo Oyarzun
d2a3450ab9 fix(gpu): fix cornercase in match value function 2025-01-09 17:51:31 +01:00
David Testé
6fb13328ec chore(ci): put back weekly core_crypto benchmarks 2025-01-09 17:45:14 +01:00
David Testé
f633eedc29 chore(ci): push tfhe-cuda-backend to crates.io unconditionally 2025-01-09 12:10:55 +01:00
Nicolas Sarlin
a9fb3e9fbf chore(trivium): fix params, reverted back to gaussian 2025-01-09 09:19:04 +01:00
Arthur Meyre
9a4b584419 chore(ci): make cargo-builds-ntt unique to ntt 2025-01-08 17:05:42 +01:00
Arthur Meyre
cdcba5ca13 chore(ci): make test names unique for fft and ntt 2025-01-08 17:05:42 +01:00
Nicolas Sarlin
adf52acd90 chore: add js fmt to "make conformance"
and remove linelint autofix which has false positives
2025-01-08 13:23:14 +01:00
Nicolas Sarlin
9ac89fc6bf fix(bench): use correct name for parameters in wasm benches 2025-01-08 13:23:14 +01:00
Agnes Leroy
a668112694 chore(test): modify cpu multi-bit parameters for noise test 2025-01-08 09:02:07 +01:00
David Testé
ba105cd1d0 chore(ci): relocate permission checking after should-run step
This induces a failure if the job has to run AND if the triggering actor isn't a member of the zama-ai organization. That would help tfhe-rs maintainers to re-run only workflows that are supposed to run.

The reference is selected based on the event emitted.

We also now use token with restricted permission to check out the repository.
2025-01-07 17:36:41 +01:00
David Testé
3690ad0b25 chore(ci): remove pull_request which duplicate pull_request_target
Previously pull_request and pull_request_target events were both
emitted thus leading one cancelling the other because of
concurrency group name format.
Since external contribution needs to be allowed we only need
pull_request_target event.
2025-01-07 17:36:41 +01:00
yuxizama
b9ddeebd29 chore(docs): update discord link 2025-01-07 17:27:46 +01:00
Nicolas Sarlin
bc742e989a chore(zk-pok): bump to 0.4.0 2025-01-07 15:36:49 +01:00
Nicolas Sarlin
17c714f153 chore(versionable): bump to 0.4.0 2025-01-07 15:36:49 +01:00
Arthur Meyre
e0a264dfa0 chore: update docs version to be 0.11 2025-01-07 14:53:17 +01:00
tmontaigu
0551f4a1cc feat(hlapi): add strings 2025-01-07 14:52:42 +01:00
Arthur Meyre
54c2f4d14d chore: bump ntt to 0.4.0 2025-01-07 13:47:08 +01:00
Arthur Meyre
aa12c75312 chore: bump tfhe-fft to 0.7.0 2025-01-07 13:47:08 +01:00
Nicolas Sarlin
3c3e2d720f chore(shortint): update compression parameters 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
a7bf1cdb43 chore(shortint): update keyswitch 1_1 to 2_2 parameters 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
f06b04fd83 chore(zk)!: store inside the pke params the supported zk scheme
BREAKING_CHANGE:
- Zk for compact PKE now requires dedicated encryption parameters
2025-01-07 12:02:09 +01:00
Nicolas Sarlin
c19683a320 chore(shortint): move tuniform 0.10 parameters into their own folder 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
96ca0d4f7c chore(shortint): re-export v0.10 params at top level 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
b6d1b5ffff chore(shortint): update multibit gaussian parameters 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
8ee1bdd9a9 chore(shortint): update classic gaussian param 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
58801cf7a5 chore(shortint): update gaussian compact pk parameters 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
010fb790c2 chore(shortint): remove tuniform pbs_ks parameters 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
8a795c79ac chore(shortint): rename wopbs params to LEGACY_ 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
f4c956636f chore(zk): add a test with zkv1 in integer compact list encryption 2025-01-07 12:02:09 +01:00
Nicolas Sarlin
cdca7be20b chore(shortint): new parameters for tuniform 2025-01-07 12:02:09 +01:00
Agnes Leroy
b7f1318815 chore(gpu): bump backend version 2025-01-07 11:33:16 +01:00
Arthur Meyre
721cb3bcbf chore(ci): fix workflow not triggering on pull request 2025-01-07 10:34:07 +01:00
Pedro Alves
53fed5eb21 fix(gpu): fix delta calculation when Torus is not a 64-bit type 2025-01-06 17:36:36 -03:00
Pedro Alves
e1b57fabe0 chore(gpu): port fix to compression encoding
- Modifies the generation of the LUT used in decompression so that the delta is calculated with a different precision, as in the CPU implementation
2025-01-06 17:36:36 -03:00
413 changed files with 20389 additions and 8181 deletions

View File

@@ -8,8 +8,14 @@ root = true
end_of_line = lf
insert_final_newline = true
# 4 space indentation
[*.rs]
# 4 space indentation for rust and toml
[*.{rs,toml}]
charset = utf-8
indent_style = space
indent_size = 4
# 2 for c and js
[*.{js,json,c,h}]
charset = utf-8
indent_style = space
indent_size = 2

View File

@@ -5,6 +5,7 @@ self-hosted-runner:
- 4090-desktop
- large_windows_16_latest
- large_ubuntu_16
- large_ubuntu_16-22.04
# Configuration variables in array of strings defined in your repository or
# organization. `null` means disabling configuration variables check.
# Empty array means no configuration variable is allowed.

View File

@@ -76,7 +76,7 @@ jobs:
with:
persist-credentials: 'false'
repository: zama-ai/tfhe-backward-compat-data
path: tfhe/tfhe-backward-compat-data
path: tests/tfhe-backward-compat-data
lfs: 'true'
ref: ${{ steps.backward_compat_branch.outputs.branch }}
@@ -94,7 +94,7 @@ jobs:
teardown-instance:
name: Teardown instance (backward-compat-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, backward-compat-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -11,26 +11,30 @@ env:
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
MSG_MINIMAL: event,action url,commit
BRANCH: ${{ github.head_ref || github.ref }}
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' || github.event_name == 'pull_request_target' }}
REF: ${{ github.event.pull_request.head.sha || github.sha }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
# Trigger pull_request event on CI files to be able to test changes before merging to main branch.
# Workflow would fail if changes come from a forked repository since secrets are not available with this event.
pull_request:
paths:
- '.github/**'
- 'ci/**'
# General entry point for Zama's pull request as well as contribution from forks.
pull_request_target:
paths:
- '**'
- '!.github/**'
- '!ci/**'
jobs:
check-user-permission:
if: github.event_name == 'pull_request_target'
uses: ./.github/workflows/check_triggering_actor.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
should-run:
runs-on: ubuntu-latest
needs: check-user-permission
if: github.event_name != 'pull_request_target' ||
needs.check-user-permission.result == 'success'
permissions:
pull-requests: write
outputs:
@@ -58,14 +62,15 @@ jobs:
user_docs_test: ${{ env.IS_PULL_REQUEST == 'false' ||
steps.changed-files.outputs.user_docs_any_changed ||
steps.changed-files.outputs.dependencies_any_changed }}
ci_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.changed-files.outputs.ci_any_changed }}
any_file_changed: ${{ env.IS_PULL_REQUEST == 'false' || steps.aggregated-changes.outputs.any_changed }}
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
ref: ${{ github.event.pull_request.head.sha }}
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Check for file changes
id: changed-files
@@ -114,11 +119,15 @@ jobs:
user_docs:
- tfhe/src/**
- '!tfhe/src/c_api/**'
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- README.md
ci:
- .github/**
- ci/**
- name: Aggregate file changes
id: aggregated-changes
# CI files are not included in this aggregator.
if: ( steps.changed-files.outputs.dependencies_any_changed == 'true' ||
steps.changed-files.outputs.csprng_any_changed == 'true' ||
steps.changed-files.outputs.zk_pok_any_changed == 'true' ||
@@ -133,11 +142,21 @@ jobs:
run: |
echo "any_changed=true" >> "$GITHUB_OUTPUT"
# Fail if the triggering actor is not part of Zama organization.
# If pull_request_target is emitted and CI files have changed, skip this job. This would skip following jobs.
check-user-permission:
needs: should-run
if: github.event_name != 'pull_request_target' ||
(github.event_name == 'pull_request_target' && needs.should-run.outputs.ci_file_changed == 'false')
uses: ./.github/workflows/check_triggering_actor.yml
secrets:
TOKEN: ${{ secrets.GITHUB_TOKEN }}
setup-instance:
name: Setup instance (fast-tests)
if: github.event_name != 'pull_request' ||
needs.should-run.outputs.any_file_changed == 'true'
needs: should-run
if: github.event_name == 'workflow_dispatch' ||
(github.event_name != 'workflow_dispatch' && needs.should-run.outputs.any_file_changed == 'true')
needs: [ should-run, check-user-permission ]
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-instance.outputs.label }}
@@ -155,8 +174,6 @@ jobs:
fast-tests:
name: Fast CPU tests
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
needs: [ should-run, setup-instance ]
concurrency:
group: ${{ github.workflow }}_${{ github.head_ref || github.ref }}
@@ -167,8 +184,8 @@ jobs:
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
ref: ${{ github.event.pull_request.head.sha }}
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
ref: ${{ env.REF }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@a54c7afa936fefeb4456b2dd8068152669aa8203
@@ -272,11 +289,11 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Fast AWS tests finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (fast-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, fast-tests ]
runs-on: ubuntu-latest
steps:
@@ -296,4 +313,4 @@ jobs:
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_MESSAGE: "Instance teardown (fast-tests) finished with status: ${{ job.status }} on '${{ env.BRANCH }}'. (${{ env.ACTION_RUN_URL }})"

View File

@@ -134,7 +134,7 @@ jobs:
teardown-instance:
name: Teardown instance (unsigned-integer-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [setup-instance, unsigned-integer-tests]
runs-on: ubuntu-latest
steps:

View File

@@ -138,7 +138,7 @@ jobs:
teardown-instance:
name: Teardown instance (signed-integer-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [setup-instance, signed-integer-tests]
runs-on: ubuntu-latest
steps:

View File

@@ -111,7 +111,7 @@ jobs:
user_docs:
- tfhe/src/**
- '!tfhe/src/c_api/**'
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- README.md
- name: Aggregate file changes
@@ -244,7 +244,7 @@ jobs:
teardown-instance:
name: Teardown instance (cpu-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cpu-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -113,7 +113,7 @@ jobs:
teardown-instance:
name: Teardown instance (wasm-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, wasm-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -93,7 +93,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_boolean
path: ${{ env.RESULTS_FILENAME }}
@@ -121,7 +121,7 @@ jobs:
teardown-instance:
name: Teardown instance (boolean-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, boolean-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -3,6 +3,9 @@ name: Core crypto benchmarks
on:
workflow_dispatch:
schedule:
# Weekly benchmarks will be triggered each Saturday at 5a.m.
- cron: '0 5 * * 6'
env:
CARGO_TERM_COLOR: always
@@ -81,7 +84,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}
@@ -109,7 +112,7 @@ jobs:
teardown-instance:
name: Teardown instance (core-crypto-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, core-crypto-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -97,7 +97,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_erc20
path: ${{ env.RESULTS_FILENAME }}
@@ -118,7 +118,7 @@ jobs:
teardown-instance:
name: Teardown instance (erc20-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, erc20-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -82,7 +82,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_integer_multi_bit_gpu_default
path: ${{ env.RESULTS_FILENAME }}
@@ -157,7 +157,7 @@ jobs:
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}

View File

@@ -98,7 +98,7 @@ jobs:
--walk-subdirs
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_core_crypto
path: ${{ env.RESULTS_FILENAME }}
@@ -131,7 +131,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-integer-full-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-core-crypto-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:

View File

@@ -124,7 +124,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_erc20_${{ inputs.profile }}
path: ${{ env.RESULTS_FILENAME }}
@@ -157,7 +157,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-erc20-${{ inputs.profile }}-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-erc20-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:

View File

@@ -200,7 +200,7 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ inputs.profile }}
path: ${{ env.RESULTS_FILENAME }}
@@ -233,7 +233,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-${{ inputs.profile }}-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-benchmarks, slack-notify ]
runs-on: ubuntu-latest
steps:

View File

@@ -170,7 +170,7 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -191,7 +191,7 @@ jobs:
teardown-instance:
name: Teardown instance (integer-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, integer-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -136,7 +136,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_shortint_${{ matrix.op_flavor }}
path: ${{ env.RESULTS_FILENAME }}
@@ -157,7 +157,7 @@ jobs:
teardown-instance:
name: Teardown instance (shortint-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, shortint-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -164,7 +164,7 @@ jobs:
--bench-type ${{ matrix.bench_type }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_${{ matrix.command }}_${{ matrix.op_flavor }}_${{ matrix.bench_type }}
path: ${{ env.RESULTS_FILENAME }}
@@ -185,7 +185,7 @@ jobs:
teardown-instance:
name: Teardown instance (integer-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, signed-integer-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -84,7 +84,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_fft
path: ${{ env.RESULTS_FILENAME }}

View File

@@ -84,7 +84,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_ntt
path: ${{ env.RESULTS_FILENAME }}

View File

@@ -121,7 +121,7 @@ jobs:
--name-suffix avx512
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_tfhe_zk_pok
path: ${{ env.RESULTS_FILENAME }}
@@ -149,7 +149,7 @@ jobs:
teardown-instance:
name: Teardown instance (tfhe-zk-pok-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, tfhe-zk-pok-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -166,7 +166,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_wasm_${{ matrix.browser }}
path: ${{ env.RESULTS_FILENAME }}
@@ -194,7 +194,7 @@ jobs:
teardown-instance:
name: Teardown instance (wasm-client-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, wasm-client-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -177,7 +177,7 @@ jobs:
--append-results
- name: Upload parsed results artifact
uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b
uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08
with:
name: ${{ github.sha }}_integer_zk
path: ${{ env.RESULTS_FILENAME }}
@@ -205,7 +205,7 @@ jobs:
teardown-instance:
name: Teardown instance (pke-zk-benchmarks)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, pke-zk-benchmarks ]
runs-on: ubuntu-latest
steps:

View File

@@ -12,7 +12,7 @@ concurrency:
cancel-in-progress: true
jobs:
cargo-builds:
cargo-builds-ntt:
runs-on: ${{ matrix.os }}
strategy:
matrix:

View File

@@ -12,7 +12,7 @@ concurrency:
cancel-in-progress: true
jobs:
cargo-tests:
cargo-tests-fft:
runs-on: ${{ matrix.runner_type }}
strategy:
matrix:
@@ -38,7 +38,7 @@ jobs:
run: |
make test_fft_no_std
cargo-tests-nightly:
cargo-tests-fft-nightly:
runs-on: ${{ matrix.runner_type }}
strategy:
matrix:
@@ -60,7 +60,7 @@ jobs:
run: |
make test_fft_no_std_nightly
cargo-tests-node-js:
cargo-tests-fft-node-js:
runs-on: "ubuntu-latest"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

View File

@@ -12,7 +12,7 @@ concurrency:
cancel-in-progress: true
jobs:
cargo-tests:
cargo-tests-ntt:
runs-on: ${{ matrix.os }}
strategy:
matrix:
@@ -33,7 +33,7 @@ jobs:
- name: Test no-std
run: make test_ntt_no_std
cargo-tests-nightly:
cargo-tests-ntt-nightly:
runs-on: ${{ matrix.os }}
strategy:
matrix:

View File

@@ -13,7 +13,7 @@ jobs:
steps:
- name: Get User Permission
id: check-access
uses: actions-cool/check-user-permission@956b2e73cdfe3bcb819bb7225e490cb3b18fd76e # v2.2.1
uses: actions-cool/check-user-permission@7b90a27f92f3961b368376107661682c441f6103 # v2.3.0
with:
require: write
username: ${{ github.triggering_actor }}

View File

@@ -27,7 +27,7 @@ jobs:
make lint_workflow
- name: Ensure SHA pinned actions
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@6ae615f6475d2ede5ad88bea6baa7a1d5e93ffaa # v3.0.19
uses: zgosalvez/github-actions-ensure-sha-pinned-actions@c3a2b64f69b7a1542a68f44d9edbd9ec3fc1455e # v3.0.20
with:
allowlist: |
slsa-framework/slsa-github-generator

View File

@@ -115,7 +115,7 @@ jobs:
teardown-instance:
name: Teardown instance (code-coverage)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, code-coverage ]
runs-on: ubuntu-latest
steps:

View File

@@ -69,7 +69,7 @@ jobs:
teardown-instance:
name: Teardown instance (csprng-randomness-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, csprng-randomness-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -51,7 +51,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_fast_h100_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -149,7 +149,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-h100-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:

View File

@@ -50,7 +50,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_fast_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -147,7 +147,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:

View File

@@ -51,7 +51,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/**_multi_gpu_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -152,7 +152,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-tests-multi-gpu)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:

View File

@@ -92,7 +92,7 @@ jobs:
teardown-instance:
name: Teardown instance (gpu-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -104,7 +104,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-pcc)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-pcc ]
runs-on: ubuntu-latest
steps:

View File

@@ -51,7 +51,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_classic_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -132,7 +132,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-signed-classic-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:

View File

@@ -51,7 +51,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_h100_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -132,7 +132,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-h100-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:

View File

@@ -58,7 +58,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_signed_integer_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -150,7 +150,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-signed-integer-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -51,7 +51,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_classic_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -132,7 +132,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-unsigned-classic-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:

View File

@@ -51,7 +51,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_h100_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -132,7 +132,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-h100-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:

View File

@@ -57,7 +57,7 @@ jobs:
- tfhe/src/shortint/parameters/**
- tfhe/src/high_level_api/**
- tfhe/src/c_api/**
- 'tfhe/docs/**.md'
- 'tfhe/docs/**/**.md'
- '.github/workflows/gpu_unsigned_integer_tests.yml'
- scripts/integer-tests.sh
- ci/slab.toml
@@ -146,7 +146,7 @@ jobs:
teardown-instance:
name: Teardown instance (cuda-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-unsigned-integer-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -72,7 +72,7 @@ jobs:
teardown-instance:
name: Teardown instance (cpu-tests)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cpu-tests ]
runs-on: ubuntu-latest
steps:

View File

@@ -43,14 +43,14 @@ jobs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Prepare package
run: |
cargo package -p tfhe
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
- uses: actions/upload-artifact@65c4c4a1ddee5b72f698fdd19549f0f0fb45cf08 # v4.6.0
with:
name: crate
path: target/package/*.crate
@@ -77,12 +77,9 @@ jobs:
name: Publish Release
needs: [package] # for comparing hashes
runs-on: ubuntu-latest
permissions:
contents: read
id-token: write
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
@@ -110,7 +107,7 @@ jobs:
- name: Slack notification (hashes comparison)
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
@@ -155,7 +152,7 @@ jobs:
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}

View File

@@ -1,49 +0,0 @@
name: Publish tfhe-csprng release
on:
workflow_dispatch:
inputs:
dry_run:
description: "Dry-run"
type: boolean
default: true
env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
jobs:
verify_tag:
uses: ./.github/workflows/verify_tagged_commit.yml
secrets:
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
publish_release:
name: Publish tfhe-csprng Release
needs: verify_tag
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

View File

@@ -1,4 +1,3 @@
# Publish new release of tfhe-cuda-backend on crates.io.
name: Publish CUDA release
on:
@@ -8,10 +7,6 @@ on:
description: "Dry-run"
type: boolean
default: true
push_to_crates:
description: "Push to crate"
type: boolean
default: true
env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
@@ -45,10 +40,12 @@ jobs:
backend: aws
profile: gpu-build
publish-cuda-release:
name: Publish CUDA Release
package:
name: Package CUDA Release for provenance
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
outputs:
hash: ${{ steps.hash.outputs.hash }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
@@ -61,7 +58,7 @@ jobs:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
@@ -95,26 +92,75 @@ jobs:
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
- name: Prepare package
run: |
cargo package -p tfhe-cuda-backend
- name: generate hash
id: hash
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance:
if: ${{ !inputs.dry_run }}
needs: [package]
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
permissions:
# Needed to detect the GitHub Actions environment
actions: read
# Needed to create the provenance via GitHub OIDC
id-token: write
# Needed to upload assets/artifacts
contents: write
with:
# SHA-256 hashes of the Crate package.
base64-subjects: ${{ needs.package.outputs.hash }}
publish-cuda-release:
name: Publish CUDA Release
needs: [setup-instance, package] # for comparing hashes
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
gcc: 9
steps:
- name: Publish crate.io package
if: ${{ inputs.push_to_crates }}
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
cargo publish -p tfhe-cuda-backend --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
- name: Slack notification (hashes comparison)
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "SLSA tfhe-cuda-backend crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "tfhe-cuda-backend release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
teardown-instance:
name: Teardown instance (publish-release)
if: ${{ always() && needs.setup-instance.result != 'skipped' }}
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, publish-cuda-release ]
runs-on: ubuntu-latest
steps:

View File

@@ -0,0 +1,103 @@
name: Publish tfhe-csprng release
on:
workflow_dispatch:
inputs:
dry_run:
description: "Dry-run"
type: boolean
default: true
env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
jobs:
verify_tag:
uses: ./.github/workflows/verify_tagged_commit.yml
secrets:
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
package:
runs-on: ubuntu-latest
outputs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Prepare package
run: |
cargo package -p tfhe-csprng
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-tfhe-csprng
path: target/package/*.crate
- name: generate hash
id: hash
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance:
if: ${{ !inputs.dry_run }}
needs: [package]
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
permissions:
# Needed to detect the GitHub Actions environment
actions: read
# Needed to create the provenance via GitHub OIDC
id-token: write
# Needed to upload assets/artifacts
contents: write
with:
# SHA-256 hashes of the Crate package.
base64-subjects: ${{ needs.package.outputs.hash }}
publish_release:
name: Publish tfhe-csprng Release
needs: [verify_tag, package]
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: crate-tfhe-csprng
path: target/package
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
cargo publish -p tfhe-csprng --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
- name: Slack notification (hashes comparison)
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "SLSA tfhe-csprng - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "tfhe-csprng release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}

View File

@@ -19,15 +19,53 @@ jobs:
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
package:
runs-on: ubuntu-latest
needs: verify_tag
outputs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Prepare package
run: |
cargo package -p tfhe-fft
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: crate
path: target/package/*.crate
- name: generate hash
id: hash
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance:
if: ${{ !inputs.dry_run }}
needs: [package]
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
permissions:
# Needed to detect the GitHub Actions environment
actions: read
# Needed to create the provenance via GitHub OIDC
id-token: write
# Needed to upload assets/artifacts
contents: write
with:
# SHA-256 hashes of the Crate package.
base64-subjects: ${{ needs.package.outputs.hash }}
publish_release:
name: Publish tfhe-fft Release
runs-on: ubuntu-latest
needs: verify_tag
needs: [verify_tag, package] # for comparing hashes
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Publish crate.io package
env:
@@ -36,10 +74,26 @@ jobs:
run: |
cargo publish -p tfhe-fft --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
- name: Slack notification (hashes comparison)
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "SLSA tfhe-fft crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}

View File

@@ -19,13 +19,50 @@ jobs:
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
package:
runs-on: ubuntu-latest
needs: verify_tag
outputs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Prepare package
run: |
cargo package -p tfhe-ntt
- uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0
with:
name: crate
path: target/package/*.crate
- name: generate hash
id: hash
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance:
if: ${{ !inputs.dry_run }}
needs: [package]
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
permissions:
# Needed to detect the GitHub Actions environment
actions: read
# Needed to create the provenance via GitHub OIDC
id-token: write
# Needed to upload assets/artifacts
contents: write
with:
# SHA-256 hashes of the Crate package.
base64-subjects: ${{ needs.package.outputs.hash }}
publish_release:
name: Publish tfhe-ntt Release
runs-on: ubuntu-latest
needs: verify_tag
needs: [verify_tag, package] # for comparing hashes
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
@@ -36,10 +73,26 @@ jobs:
run: |
cargo publish -p tfhe-ntt --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
- name: Slack notification (hashes comparison)
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "SLSA tfhe-ntt crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}

View File

@@ -18,35 +18,159 @@ jobs:
RELEASE_TEAM: ${{ secrets.RELEASE_TEAM }}
READ_ORG_TOKEN: ${{ secrets.READ_ORG_TOKEN }}
publish_release:
package-derive:
runs-on: ubuntu-latest
outputs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Prepare package
run: |
cargo package -p tfhe-versionable-derive
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-tfhe-versionable-derive
path: target/package/*.crate
- name: generate hash
id: hash
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance-derive:
needs: [package-derive]
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
permissions:
# Needed to detect the GitHub Actions environment
actions: read
# Needed to create the provenance via GitHub OIDC
id-token: write
# Needed to upload assets/artifacts
contents: write
with:
# SHA-256 hashes of the Crate package.
base64-subjects: ${{ needs.package-derive.outputs.hash }}
publish_release-derive:
name: Publish tfhe-versionable Release
needs: verify_tag
needs: [verify_tag, package-derive] # for comparing hashes
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Publish proc-macro crate
- name: Download artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: crate-tfhe-versionable-derive
path: target/package
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
cargo publish -p tfhe-versionable-derive --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
- name: Slack notification (hashes comparison)
if: ${{ needs.package-derive.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "SLSA tfhe-versionable-derive - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "tfhe-versionable-derive release finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Publish main crate
if: ${{ ! inputs.dry_run }}
package:
runs-on: ubuntu-latest
outputs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332
with:
fetch-depth: 0
- name: Prepare package
run: |
cargo package -p tfhe-versionable
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-tfhe-versionable
path: target/package/*.crate
- name: generate hash
id: hash
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance:
needs: [package]
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
permissions:
# Needed to detect the GitHub Actions environment
actions: read
# Needed to create the provenance via GitHub OIDC
id-token: write
# Needed to upload assets/artifacts
contents: write
with:
# SHA-256 hashes of the Crate package.
base64-subjects: ${{ needs.package.outputs.hash }}
publish_release:
name: Publish tfhe-versionable Release
needs: [package] # for comparing hashes
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
with:
fetch-depth: 0
- name: Download artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: crate-tfhe-versionable
path: target/package
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
run: |
cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }}
cargo publish -p tfhe-versionable --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
- name: Slack notification (hashes comparison)
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "SLSA tfhe-versionable - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}

View File

@@ -1,4 +1,3 @@
# Publish new release of tfhe-zk-pok on crates.io.
name: Publish tfhe-zk-pok release
on:
@@ -13,6 +12,40 @@ env:
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
jobs:
package:
runs-on: ubuntu-latest
outputs:
hash: ${{ steps.hash.outputs.hash }}
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: Prepare package
run: |
cargo package -p tfhe-zk-pok
- uses: actions/upload-artifact@834a144ee995460fba8ed112a2fc961b36a5ec5a # v4.3.6
with:
name: crate-zk-pok
path: target/package/*.crate
- name: generate hash
id: hash
run: cd target/package && echo "hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
provenance:
if: ${{ !inputs.dry_run }}
needs: [package]
uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.0.0
permissions:
# Needed to detect the GitHub Actions environment
actions: read
# Needed to create the provenance via GitHub OIDC
id-token: write
# Needed to upload assets/artifacts
contents: write
with:
# SHA-256 hashes of the Crate package.
base64-subjects: ${{ needs.package.outputs.hash }}
verify_tag:
uses: ./.github/workflows/verify_tagged_commit.yml
secrets:
@@ -21,26 +54,43 @@ jobs:
publish_release:
name: Publish tfhe-zk-pok Release
needs: verify_tag
needs: [verify_tag, package] # for comparing hashes
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
token: ${{ secrets.FHE_ACTIONS_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@fa0a91b85d4f404e444e00e005971372dc801d16 # v4.1.8
with:
name: crate-zk-pok
path: target/package
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
cargo publish -p tfhe-zk-pok --token ${{ env.CRATES_TOKEN }} ${{ env.DRY_RUN }}
- name: Verify hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
- name: Slack notification (hashes comparison)
if: ${{ needs.package.outputs.hash != steps.published_hash.outputs.pub_hash }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: failure
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_MESSAGE: "SLSA tfhe-zk-pok crate - hash comparison failure: (${{ env.ACTION_RUN_URL }})"
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990
uses: rtCamp/action-slack-notify@c33737706dea87cd7784c687dadc9adf1be59990 # v2.3.2
env:
SLACK_COLOR: ${{ job.status }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}

View File

@@ -14,7 +14,7 @@ on:
jobs:
params-curves-security-check:
runs-on: large_ubuntu_16
runs-on: large_ubuntu_16-22.04
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683

2
.gitignore vendored
View File

@@ -33,4 +33,4 @@ node_modules/
package-lock.json
# Dir used for backward compatibility test data
tfhe/tfhe-backward-compat-data/
tests/tfhe-backward-compat-data/

View File

@@ -1,11 +1,15 @@
ignore:
- .git
- target
- tfhe/build
- venv
- web-test-runner
- tfhe/benchmarks_parameters
- tfhe/web_wasm_parallel_tests/node_modules
- tfhe/web_wasm_parallel_tests/dist
- keys
- coverage
- utils/tfhe-lints/ui/main.stderr
rules:
# checks if file ends in a newline character

View File

@@ -11,24 +11,21 @@ members = [
"backends/tfhe-cuda-backend",
"utils/tfhe-versionable",
"utils/tfhe-versionable-derive",
"tests",
]
exclude = [
"tfhe/backward_compatibility_tests",
"utils/cargo-tfhe-lints-inner",
"utils/cargo-tfhe-lints"
]
exclude = ["tests/backward_compatibility_tests", "utils/tfhe-lints"]
[workspace.dependencies]
aligned-vec = { version = "0.6", default-features = false }
bytemuck = "1.14.3"
dyn-stack = { version = "0.11", default-features = false }
itertools = "0.13"
itertools = "0.14"
num-complex = "0.4"
pulp = { version = "0.20.0", default-features = false }
pulp = { version = "0.20", default-features = false }
rand = "0.8"
rayon = "1"
serde = { version = "1.0", default-features = false }
wasm-bindgen = ">=0.2.86,<0.2.94"
wasm-bindgen = "0.2.100"
[profile.bench]
lto = "fat"
@@ -46,3 +43,6 @@ inherits = "dev"
opt-level = 3
lto = "off"
debug-assertions = false
[workspace.metadata.dylint]
libraries = [{ path = "utils/tfhe-lints" }]

View File

@@ -20,7 +20,7 @@ BENCH_OP_FLAVOR?=DEFAULT
BENCH_TYPE?=latency
NODE_VERSION=22.6
BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
BACKWARD_COMPAT_DATA_BRANCH?=v0.4
BACKWARD_COMPAT_DATA_BRANCH?=v0.5
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
TFHE_SPEC:=tfhe
@@ -117,7 +117,7 @@ install_wasm_bindgen_cli: install_rs_build_toolchain
.PHONY: install_wasm_pack # Install wasm-pack to build JS packages
install_wasm_pack: install_rs_build_toolchain
@wasm-pack --version | grep "$(WASM_PACK_VERSION)" > /dev/null 2>&1 || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@0.13.1 || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked wasm-pack@$(WASM_PACK_VERSION) || \
( echo "Unable to install cargo wasm-pack, unknown error." && exit 1 )
.PHONY: install_node # Install last version of NodeJS via nvm
@@ -151,10 +151,9 @@ install_tarpaulin: install_rs_build_toolchain
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install cargo-tarpaulin --locked || \
( echo "Unable to install cargo tarpaulin, unknown error." && exit 1 )
.PHONY: install_tfhe_lints # Install custom tfhe-rs lints
install_tfhe_lints:
(cd utils/cargo-tfhe-lints-inner && cargo install --path .) && \
cd utils/cargo-tfhe-lints && cargo install --path .
.PHONY: install_cargo_dylint # Install custom tfhe-rs lints
install_cargo_dylint:
cargo install cargo-dylint dylint-link
.PHONY: install_typos_checker # Install typos checker
install_typos_checker: install_rs_build_toolchain
@@ -243,7 +242,8 @@ fmt_js: check_nvm_installed
source ~/.nvm/nvm.sh && \
nvm install $(NODE_VERSION) && \
nvm use $(NODE_VERSION) && \
$(MAKE) -C tfhe/web_wasm_parallel_tests fmt
$(MAKE) -C tfhe/web_wasm_parallel_tests fmt && \
$(MAKE) -C tfhe/js_on_wasm_tests fmt
.PHONY: fmt_gpu # Format rust and cuda code
fmt_gpu: install_rs_check_toolchain
@@ -272,7 +272,8 @@ check_fmt_js: check_nvm_installed
source ~/.nvm/nvm.sh && \
nvm install $(NODE_VERSION) && \
nvm use $(NODE_VERSION) && \
$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt
$(MAKE) -C tfhe/web_wasm_parallel_tests check_fmt && \
$(MAKE) -C tfhe/js_on_wasm_tests check_fmt
.PHONY: check_typos # Check for typos in codebase
check_typos: install_typos_checker
@@ -281,14 +282,14 @@ check_typos: install_typos_checker
.PHONY: clippy_gpu # Run clippy lints on tfhe with "gpu" enabled
clippy_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
--all-targets \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
check_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
--features=boolean,shortint,integer,internal-keycache,gpu \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
--all-targets \
-p $(TFHE_SPEC)
@@ -393,10 +394,10 @@ clippy_trivium: install_rs_check_toolchain
.PHONY: clippy_all_targets # Run clippy lints on all targets (benches, examples, etc.)
clippy_all_targets: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats \
-p $(TFHE_SPEC) -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,experimental \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
@@ -416,10 +417,15 @@ clippy_versionable: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-versionable -- --no-deps -D warnings
.PHONY: clippy_tfhe_lints # Run clippy lints on tfhe-lints
clippy_tfhe_lints: install_cargo_dylint # the toolchain is selected with toolchain.toml
cd utils/tfhe-lints && \
cargo clippy --all-targets -- --no-deps -D warnings
.PHONY: clippy_all # Run all clippy targets
clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
clippy_versionable
clippy_versionable clippy_tfhe_lints
.PHONY: clippy_fast # Run main clippy targets
clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
@@ -435,13 +441,13 @@ check_rust_bindings_did_not_change:
cargo build -p tfhe-cuda-backend && "$(MAKE)" fmt_gpu && \
git diff --quiet HEAD -- backends/tfhe-cuda-backend/src/bindings.rs || \
( echo "Generated bindings have changed! Please run 'git add backends/tfhe-cuda-backend/src/bindings.rs' \
and commit the changes." && exit 1 )
and commit the changes." && exit 1 )
.PHONY: tfhe_lints # Run custom tfhe-rs lints
tfhe_lints: install_tfhe_lints
cd tfhe && RUSTFLAGS="$(RUSTFLAGS)" cargo tfhe-lints \
--features=boolean,shortint,integer,zk-pok -- -D warnings
tfhe_lints: install_cargo_dylint
RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe --no-deps -- \
--features=boolean,shortint,integer,strings,zk-pok
.PHONY: build_core # Build core_crypto without experimental features
build_core: install_rs_build_toolchain install_rs_check_toolchain
@@ -515,11 +521,11 @@ build_web_js_api: install_rs_build_toolchain install_wasm_pack
build_web_js_api_parallel: install_rs_check_toolchain install_wasm_pack
cd tfhe && \
rustup component add rust-src --toolchain $(RS_CHECK_TOOLCHAIN) && \
RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory,+mutable-globals" rustup run $(RS_CHECK_TOOLCHAIN) \
RUSTFLAGS="$(WASM_RUSTFLAGS) -C target-feature=+atomics,+bulk-memory" rustup run $(RS_CHECK_TOOLCHAIN) \
wasm-pack build --release --target=web \
-- --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,parallel-wasm-api,zk-pok \
-Z build-std=panic_abort,std && \
find pkg/snippets -type f -iname workerHelpers.worker.js -exec sed -i "s|from '..\/..\/..\/';|from '..\/..\/..\/tfhe.js';|" {} \;
find pkg/snippets -type f -iname workerHelpers.js -exec sed -i "s|const pkg = await import('..\/..\/..');|const pkg = await import('..\/..\/..\/tfhe.js');|" {} \;
jq '.files += ["snippets"]' tfhe/pkg/package.json > tmp_pkg.json && mv -f tmp_pkg.json tfhe/pkg/package.json
.PHONY: build_node_js_api # Build the js API targeting nodejs
@@ -806,7 +812,7 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
.PHONY: test_high_level_api # Run all the tests for high_level_api
test_high_level_api: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,internal-keycache,zk-pok -p $(TFHE_SPEC) \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p $(TFHE_SPEC) \
-- high_level_api::
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
@@ -824,7 +830,7 @@ test_strings: install_rs_build_toolchain
.PHONY: test_user_doc # Run tests from the .md documentation
test_user_doc: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok \
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok,strings \
-p $(TFHE_SPEC) \
-- test_user_docs::
@@ -887,16 +893,21 @@ test_versionable: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--all-targets -p tfhe-versionable
.PHONY: test_tfhe_lints # Run test on tfhe-lints
test_tfhe_lints: install_cargo_dylint
cd utils/tfhe-lints && \
cargo test
# The backward compat data repo holds historical binary data but also rust code to generate and load them.
# Here we use the "patch" functionality of Cargo to make sure the repo used for the data is the same as the one used for the code.
.PHONY: test_backward_compatibility_ci
test_backward_compatibility_ci: install_rs_build_toolchain
TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tfhe/$(BACKWARD_COMPAT_DATA_DIR)\"" \
--features=shortint,integer,zk-pok -p $(TFHE_SPEC) test_backward_compatibility -- --nocapture
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tests/$(BACKWARD_COMPAT_DATA_DIR)\"" \
--features=shortint,integer,zk-pok -p tests test_backward_compatibility -- --nocapture
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
test_backward_compatibility: tfhe/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
test_backward_compatibility: tests/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
.PHONY: backward_compat_branch # Prints the required backward compatibility branch
backward_compat_branch:
@@ -1045,35 +1056,35 @@ bench_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer # Run benchmarks for signed integer
bench_signed_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_gpu # Run benchmarks for integer on GPU backend
bench_integer_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression # Run benchmarks for unsigned integer compression
bench_integer_compression: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_compression_gpu
bench_integer_compression_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench glwe_packing_compression-integer-bench \
--features=integer,internal-keycache,gpu -p $(TFHE_SPEC) --
--features=integer,internal-keycache,gpu,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit # Run benchmarks for unsigned integer using multi-bit parameters
bench_integer_multi_bit: install_rs_check_toolchain
@@ -1081,7 +1092,7 @@ bench_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_signed_integer_multi_bit # Run benchmarks for signed integer using multi-bit parameters
bench_signed_integer_multi_bit: install_rs_check_toolchain
@@ -1089,7 +1100,7 @@ bench_signed_integer_multi_bit: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-signed-bench \
--features=integer,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_integer_multi_bit_gpu # Run benchmarks for integer on GPU backend using multi-bit parameters
bench_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1097,7 +1108,7 @@ bench_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) --
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) --
.PHONY: bench_unsigned_integer_multi_bit_gpu # Run benchmarks for unsigned integer on GPU backend using multi-bit parameters
bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
@@ -1105,14 +1116,14 @@ bench_unsigned_integer_multi_bit_gpu: install_rs_check_toolchain
__TFHE_RS_BENCH_OP_FLAVOR=$(BENCH_OP_FLAVOR) __TFHE_RS_FAST_BENCH=$(FAST_BENCH) __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench integer-bench \
--features=integer,gpu,internal-keycache,nightly-avx512 -p $(TFHE_SPEC) -- ::unsigned
--features=integer,gpu,internal-keycache,nightly-avx512,pbs-stats -p $(TFHE_SPEC) -- ::unsigned
.PHONY: bench_integer_zk # Run benchmarks for integer encryption with ZK proofs
bench_integer_zk: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench zk-pke-bench \
--features=integer,internal-keycache,zk-pok,nightly-avx512 \
--features=integer,internal-keycache,zk-pok,nightly-avx512,pbs-stats \
-p $(TFHE_SPEC) --
.PHONY: bench_shortint # Run benchmarks for shortint
@@ -1275,9 +1286,9 @@ write_params_to_file: install_rs_check_toolchain
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
clone_backward_compat_data:
./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tfhe/$(BACKWARD_COMPAT_DATA_DIR)
./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tests/$(BACKWARD_COMPAT_DATA_DIR)
tfhe/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
tests/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
#
# Real use case examples
@@ -1303,9 +1314,7 @@ sha256_bool: install_rs_check_toolchain
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
pcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_tested check_intra_md_links \
clippy_all check_compile_tests
# TFHE lints deactivated as it's incompatible with 1.83 - temporary
# tfhe_lints
clippy_all check_compile_tests test_tfhe_lints tfhe_lints
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
pcc_gpu: clippy_gpu clippy_cuda_backend check_compile_tests_benches_gpu check_rust_bindings_did_not_change
@@ -1315,7 +1324,7 @@ fpcc: no_tfhe_typo no_dbg_log check_fmt check_typos lint_doc check_md_docs_are_t
check_compile_tests
.PHONY: conformance # Automatically fix problems that can be fixed
conformance: fix_newline fmt
conformance: fix_newline fmt fmt_js
#=============================== FFT Section ==================================
.PHONY: doc_fft # Build rust doc for tfhe-fft
@@ -1387,7 +1396,7 @@ test_fft_nightly: install_rs_check_toolchain
.PHONY: test_fft_no_std
test_fft_no_std: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-fft \
--no-default-features
--no-default-features
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-fft \
--no-default-features \
--features=fft128
@@ -1481,7 +1490,7 @@ test_ntt_nightly: install_rs_check_toolchain
.PHONY: test_ntt_no_std
test_ntt_no_std: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --release -p tfhe-ntt \
--no-default-features
--no-default-features
.PHONY: test_ntt_no_std_nightly
test_ntt_no_std_nightly: install_rs_check_toolchain

View File

@@ -95,7 +95,7 @@ fn main() {
val >>= 1;
}
}
let output_0_63 = "F4CD954A717F26A7D6930830C4E7CF0819F80E03F25F342C64ADC66ABA7F8A8E6EAA49F23632AE3CD41A7BD290A0132F81C6D4043B6E397D7388F3A03B5FE358".to_string();
let cipher_key = key.map(|x| FheBool::encrypt(x, &client_key));
@@ -129,24 +129,36 @@ Other sizes than 64 bit are expected to be available in the future.
# FHE shortint Trivium implementation
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `PARAM_MESSAGE_1_CARRY_1_KS_PBS`). It uses a lower level API
of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run on the same
cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64`).
It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
its setup a little more intricate.
Example code:
```rust
use tfhe::shortint::prelude::*;
use tfhe::shortint::CastingKey;
use tfhe::shortint::parameters::{
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
};
use tfhe::{ConfigBuilder, generate_keys, FheUint64};
use tfhe::prelude::*;
use tfhe_trivium::TriviumStreamShortint;
fn test_shortint() {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS);
let ksk = CastingKey::new((&client_key, &server_key), (&hl_client_key, &hl_server_key));
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();
let mut key = [0; 80];

View File

@@ -1,23 +1,28 @@
use criterion::Criterion;
use tfhe::prelude::*;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::{
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
};
use tfhe::shortint::prelude::*;
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -57,18 +62,20 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
}
pub fn kreyvium_shortint_gen(c: &mut Criterion) {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -103,18 +110,20 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
}
pub fn kreyvium_shortint_trans(c: &mut Criterion) {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();

View File

@@ -1,23 +1,28 @@
use criterion::Criterion;
use tfhe::prelude::*;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::{
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
};
use tfhe::shortint::prelude::*;
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
pub fn trivium_shortint_warmup(c: &mut Criterion) {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -57,18 +62,20 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
}
pub fn trivium_shortint_gen(c: &mut Criterion) {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -103,18 +110,20 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
}
pub fn trivium_shortint_trans(c: &mut Criterion) {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();

View File

@@ -1,6 +1,9 @@
use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
use tfhe::prelude::*;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::{
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
};
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
// Values for these tests come from the github repo renaud1239/Kreyvium,
// commit fd6828f68711276c25f55e605935028f5e843f43
@@ -216,18 +219,20 @@ use tfhe::shortint::prelude::*;
#[test]
fn kreyvium_test_shortint_long() {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();

View File

@@ -1,6 +1,9 @@
use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
use tfhe::prelude::*;
use tfhe::shortint::parameters::PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64;
use tfhe::shortint::parameters::{
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64,
};
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
// file testvectors/trivium-80.80.test-vectors
@@ -352,18 +355,20 @@ use tfhe::shortint::prelude::*;
#[test]
fn trivium_test_shortint_long() {
let config = ConfigBuilder::default().build();
let config = ConfigBuilder::default()
.use_custom_parameters(V0_11_PARAM_MESSAGE_2_CARRY_2_PBS_KS_GAUSSIAN_2M64)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
gen_keys(V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
V0_11_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS,
);
let key_string = "0053A6F94C9FF24598EB".to_string();

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-cuda-backend"
version = "0.6.0"
version = "0.7.0"
edition = "2021"
authors = ["Zama team"]
license = "BSD-3-Clause-Clear"
@@ -14,4 +14,4 @@ keywords = ["fully", "homomorphic", "encryption", "fhe", "cryptography"]
[build-dependencies]
cmake = { version = "0.1" }
pkg-config = { version = "0.3" }
bindgen = "0.70.1"
bindgen = "0.71"

View File

@@ -65,7 +65,7 @@ template <typename Torus> struct int_decompression {
Torus *tmp_extracted_lwe;
uint32_t *tmp_indexes_array;
int_radix_lut<Torus> *carry_extract_lut;
int_radix_lut<Torus> *decompression_rescale_lut;
int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, int_radix_params encryption_params,
@@ -84,7 +84,7 @@ template <typename Torus> struct int_decompression {
Torus lwe_accumulator_size = (compression_params.glwe_dimension *
compression_params.polynomial_size +
1);
carry_extract_lut = new int_radix_lut<Torus>(
decompression_rescale_lut = new int_radix_lut<Torus>(
streams, gpu_indexes, gpu_count, encryption_params, 1,
num_radix_blocks, allocate_gpu_memory);
@@ -97,18 +97,28 @@ template <typename Torus> struct int_decompression {
num_radix_blocks * lwe_accumulator_size * sizeof(Torus), streams[0],
gpu_indexes[0]);
// Carry extract LUT
auto carry_extract_f = [encryption_params](Torus x) -> Torus {
return x / encryption_params.message_modulus;
// Rescale is done using an identity LUT
// Here we do not divide by message_modulus
// Example: in the 2_2 case we are mapping a 2 bits message onto a 4 bits
// space, we want to keep the original 2 bits value in the 4 bits space,
// so we apply the identity and the encoding will rescale it for us.
auto decompression_rescale_f = [encryption_params](Torus x) -> Torus {
return x;
};
generate_device_accumulator<Torus>(
streams[0], gpu_indexes[0], carry_extract_lut->get_lut(0, 0),
encryption_params.glwe_dimension, encryption_params.polynomial_size,
encryption_params.message_modulus, encryption_params.carry_modulus,
carry_extract_f);
auto effective_compression_message_modulus =
encryption_params.carry_modulus;
auto effective_compression_carry_modulus = 1;
carry_extract_lut->broadcast_lut(streams, gpu_indexes, 0);
generate_device_accumulator_with_encoding<Torus>(
streams[0], gpu_indexes[0], decompression_rescale_lut->get_lut(0, 0),
encryption_params.glwe_dimension, encryption_params.polynomial_size,
effective_compression_message_modulus,
effective_compression_carry_modulus,
encryption_params.message_modulus, encryption_params.carry_modulus,
decompression_rescale_f);
decompression_rescale_lut->broadcast_lut(streams, gpu_indexes, 0);
}
}
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
@@ -117,8 +127,8 @@ template <typename Torus> struct int_decompression {
cuda_drop_async(tmp_extracted_lwe, streams[0], gpu_indexes[0]);
cuda_drop_async(tmp_indexes_array, streams[0], gpu_indexes[0]);
carry_extract_lut->release(streams, gpu_indexes, gpu_count);
delete carry_extract_lut;
decompression_rescale_lut->release(streams, gpu_indexes, gpu_count);
delete decompression_rescale_lut;
}
};
#endif

View File

@@ -38,6 +38,15 @@ void generate_device_accumulator_bivariate_with_factor(
cudaStream_t stream, uint32_t gpu_index, Torus *acc_bivariate,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t message_modulus,
uint32_t carry_modulus, std::function<Torus(Torus, Torus)> f, int factor);
template <typename Torus>
void generate_device_accumulator_with_encoding(
cudaStream_t stream, uint32_t gpu_index, Torus *acc,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t input_message_modulus, uint32_t input_carry_modulus,
uint32_t output_message_modulus, uint32_t output_carry_modulus,
std::function<Torus(Torus)> f);
/*
* generate univariate accumulator (lut) for device pointer
* stream - cuda stream

View File

@@ -5,45 +5,50 @@
extern "C" {
void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count);
void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in_1,
void const *lwe_array_in_2,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in_1,
void const *lwe_array_in_2,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count);
void cuda_negate_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_negate_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *plaintext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *plaintext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *cleartext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *cleartext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count);
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
void cuda_add_lwe_ciphertext_vector_plaintext_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, const uint64_t plaintext_in,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count);
}
#endif // CUDA_LINALG_H_

View File

@@ -26,15 +26,6 @@ template <typename Torus> uint64_t get_shared_mem_size_tgemm() {
return BLOCK_SIZE_GEMM * THREADS_GEMM * 2 * sizeof(Torus);
}
__host__ inline bool can_use_pks_fast_path(uint32_t lwe_dimension,
uint32_t num_lwe,
uint32_t polynomial_size,
uint32_t level_count,
uint32_t glwe_dimension) {
// TODO: activate it back, fix tests and extend to level_count > 1
return false;
}
// Initialize decomposition by performing rounding
// and decomposing one level of an array of Torus LWEs. Only
// decomposes the mask elements of the incoming LWEs.
@@ -57,6 +48,8 @@ __global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
// is lwe_dimension + 1, while for writing it is lwe_dimension
auto read_val_idx = lwe_idx * (lwe_dimension + 1) + lwe_sample_idx;
auto write_val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
auto write_state_idx =
num_lwe * lwe_dimension + lwe_idx * lwe_dimension + lwe_sample_idx;
Torus a_i = lwe_in[read_val_idx];
@@ -64,6 +57,8 @@ __global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
Torus mod_b_mask = (1ll << base_log) - 1ll;
lwe_out[write_val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
synchronize_threads_in_block();
lwe_out[write_state_idx] = state;
}
// Continue decomposiion of an array of Torus elements in place. Supposes
@@ -84,12 +79,16 @@ decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
return;
auto val_idx = lwe_idx * lwe_dimension + lwe_sample_idx;
auto state_idx = num_lwe * lwe_dimension + val_idx;
Torus state = buffer_in[val_idx];
Torus state = buffer_in[state_idx];
synchronize_threads_in_block();
Torus mod_b_mask = (1ll << base_log) - 1ll;
buffer_in[val_idx] = decompose_one<Torus>(state, mod_b_mask, base_log);
synchronize_threads_in_block();
buffer_in[state_idx] = state;
}
// Multiply matrices A, B of size (M, K), (K, N) respectively
@@ -99,6 +98,10 @@ decompose_vectorize_step_inplace(Torus *buffer_in, uint32_t lwe_dimension,
// BLOCK_SIZE_GEMM) splitting them in multiple tiles: (BLOCK_SIZE_GEMM,
// THREADS_GEMM)-shaped tiles of values from A, and a (THREADS_GEMM,
// BLOCK_SIZE_GEMM)-shaped tiles of values from B.
//
// This code is adapted by generalizing the 1d block-tiling
// kernel from https://github.com/siboehm/SGEMM_CUDA
// to any matrix dimension
template <typename Torus, typename TorusVec>
__global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
int stride_B, Torus *C) {
@@ -111,7 +114,6 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
const uint cRow = blockIdx.y;
const uint cCol = blockIdx.x;
const uint totalResultsBlocktile = BM * BN;
const int threadCol = threadIdx.x % BN;
const int threadRow = threadIdx.x / BN;
@@ -152,7 +154,7 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
} else {
Bs[innerRowB * BN + innerColB] = 0;
}
__syncthreads();
synchronize_threads_in_block();
// Advance blocktile for the next iteration of this loop
A += BK;
@@ -168,7 +170,7 @@ __global__ void tgemm(int M, int N, int K, const Torus *A, const Torus *B,
As[(threadRow * TM + resIdx) * BK + dotIdx] * tmp;
}
}
__syncthreads();
synchronize_threads_in_block();
}
// Initialize the pointer to the output block of size (BLOCK_SIZE_GEMM,
@@ -259,10 +261,6 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
// Optimization of packing keyswitch when packing many LWEs
if (level_count > 1) {
PANIC("Fast path PKS only supports level_count==1");
}
cudaSetDevice(gpu_index);
check_cuda_error(cudaGetLastError());
@@ -273,10 +271,11 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
// buffer and the keyswitched GLWEs in the second half of the buffer. Thus the
// scratch buffer for the fast path must determine the half-size of the
// scratch buffer as the max between the size of the GLWE and the size of the
// LWE-mask
int memory_unit = glwe_accumulator_size > lwe_dimension
// LWE-mask times two (to keep both decomposition state and decomposed
// intermediate value)
int memory_unit = glwe_accumulator_size > lwe_dimension * 2
? glwe_accumulator_size
: lwe_dimension;
: lwe_dimension * 2;
// ping pong the buffer between successive calls
// split the buffer in two parts of this size
@@ -309,7 +308,7 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
CEIL_DIV(num_lwes, BLOCK_SIZE_GEMM));
dim3 threads_gemm(BLOCK_SIZE_GEMM * THREADS_GEMM);
auto stride_KSK_buffer = glwe_accumulator_size;
auto stride_KSK_buffer = glwe_accumulator_size * level_count;
uint32_t shared_mem_size = get_shared_mem_size_tgemm<Torus>();
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
@@ -317,21 +316,20 @@ __host__ void host_fast_packing_keyswitch_lwe_list_to_glwe(
stride_KSK_buffer, d_mem_1);
check_cuda_error(cudaGetLastError());
/*
TODO: transpose key to generalize to level_count > 1
auto ksk_block_size = glwe_accumulator_size;
for (int li = 1; li < level_count; ++li) {
decompose_vectorize_step_inplace<Torus, TorusVec>
<<<grid_decomp, threads_decomp, 0, stream>>>(
d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
check_cuda_error(cudaGetLastError());
for (int li = 1; li < level_count; ++li) {
decompose_vectorize_step_inplace<Torus, TorusVec>
<<<grid_decomp, threads_decomp, 0, stream>>>(
d_mem_0, lwe_dimension, num_lwes, base_log, level_count);
check_cuda_error(cudaGetLastError());
tgemm<Torus, TorusVec><<<grid_gemm, threads_gemm, shared_mem_size,
stream>>>( num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
check_cuda_error(cudaGetLastError());
}
*/
tgemm<Torus, TorusVec>
<<<grid_gemm, threads_gemm, shared_mem_size, stream>>>(
num_lwes, glwe_accumulator_size, lwe_dimension, d_mem_0,
fp_ksk_array + li * ksk_block_size, stride_KSK_buffer, d_mem_1);
check_cuda_error(cudaGetLastError());
}
// should we include the mask in the rotation ??
dim3 grid_rotate(CEIL_DIV(num_lwes, BLOCK_SIZE_DECOMP),

View File

@@ -73,24 +73,13 @@ void cuda_packing_keyswitch_lwe_list_to_glwe_64(
uint32_t output_polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_lwes) {
if (can_use_pks_fast_path(input_lwe_dimension, num_lwes,
output_polynomial_size, level_count,
output_glwe_dimension)) {
host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
} else
host_packing_keyswitch_lwe_list_to_glwe<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
host_fast_packing_keyswitch_lwe_list_to_glwe<uint64_t, ulonglong4>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(glwe_array_out),
static_cast<const uint64_t *>(lwe_array_in),
static_cast<const uint64_t *>(fp_ksk_array), fp_ks_buffer,
input_lwe_dimension, output_glwe_dimension, output_polynomial_size,
base_log, level_count, num_lwes);
}
void cleanup_packing_keyswitch_lwe_list_to_glwe(void *stream,

View File

@@ -164,9 +164,11 @@ __host__ void scratch_packing_keyswitch_lwe_list_to_glwe(
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
int memory_unit = glwe_accumulator_size > lwe_dimension
// allocate at least LWE-mask times two: to keep both decomposition state and
// decomposed intermediate value
int memory_unit = glwe_accumulator_size > lwe_dimension * 2
? glwe_accumulator_size
: lwe_dimension;
: lwe_dimension * 2;
if (allocate_gpu_memory) {
*fp_ks_buffer = (int8_t *)cuda_malloc_async(
@@ -221,44 +223,6 @@ __device__ void packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext(
}
}
// public functional packing keyswitch for a batch of LWE ciphertexts
//
// Selects the input each thread is working on using the y-block index.
//
// Assumes there are (glwe_dimension+1) * polynomial_size threads split through
// different thread blocks at the x-axis to work on that input.
template <typename Torus>
__global__ void packing_keyswitch_lwe_list_to_glwe(
Torus *glwe_array_out, Torus const *lwe_array_in, Torus const *fp_ksk,
uint32_t lwe_dimension_in, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
Torus *d_mem) {
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
const int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
const int lwe_size = (lwe_dimension_in + 1);
const int input_id = blockIdx.y;
const int degree = input_id;
// Select an input
auto lwe_in = lwe_array_in + input_id * lwe_size;
auto ks_glwe_out = d_mem + input_id * glwe_accumulator_size;
auto glwe_out = glwe_array_out + input_id * glwe_accumulator_size;
// KS LWE to GLWE
packing_keyswitch_lwe_ciphertext_into_glwe_ciphertext<Torus>(
ks_glwe_out, lwe_in, fp_ksk, lwe_dimension_in, glwe_dimension,
polynomial_size, base_log, level_count);
// P * x ^degree
auto in_poly = ks_glwe_out + (tid / polynomial_size) * polynomial_size;
auto out_result = glwe_out + (tid / polynomial_size) * polynomial_size;
polynomial_accumulate_monic_monomial_mul<Torus>(out_result, in_poly, degree,
tid % polynomial_size,
polynomial_size, 1, true);
}
/// To-do: Rewrite this kernel for efficiency
template <typename Torus>
__global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
@@ -276,52 +240,4 @@ __global__ void accumulate_glwes(Torus *glwe_out, Torus *glwe_array_in,
}
}
template <typename Torus>
__host__ void host_packing_keyswitch_lwe_list_to_glwe(
cudaStream_t stream, uint32_t gpu_index, Torus *glwe_out,
Torus const *lwe_array_in, Torus const *fp_ksk_array, int8_t *fp_ks_buffer,
uint32_t lwe_dimension_in, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t base_log, uint32_t level_count,
uint32_t num_lwes) {
if (num_lwes > polynomial_size)
PANIC("Cuda error: too many LWEs to pack. The number of LWEs should be "
"smaller than "
"polynomial_size.")
cudaSetDevice(gpu_index);
int glwe_accumulator_size = (glwe_dimension + 1) * polynomial_size;
int num_blocks = 0, num_threads = 0;
getNumBlocksAndThreads(glwe_accumulator_size, 128, num_blocks, num_threads);
dim3 grid(num_blocks, num_lwes);
dim3 threads(num_threads);
// The fast path of PKS uses the scratch buffer (d_mem) differently:
// it needs to store the decomposed masks in the first half of this buffer
// and the keyswitched GLWEs in the second half of the buffer. Thus the
// scratch buffer for the fast path must determine the half-size of the
// scratch buffer as the max between the size of the GLWE and the size of the
// LWE-mask
int memory_unit = glwe_accumulator_size > lwe_dimension_in
? glwe_accumulator_size
: lwe_dimension_in;
auto d_mem = (Torus *)fp_ks_buffer;
auto d_tmp_glwe_array_out = d_mem + num_lwes * memory_unit;
// individually keyswitch each lwe
packing_keyswitch_lwe_list_to_glwe<Torus><<<grid, threads, 0, stream>>>(
d_tmp_glwe_array_out, lwe_array_in, fp_ksk_array, lwe_dimension_in,
glwe_dimension, polynomial_size, base_log, level_count, d_mem);
check_cuda_error(cudaGetLastError());
// accumulate to a single glwe
accumulate_glwes<Torus><<<num_blocks, threads, 0, stream>>>(
glwe_out, d_tmp_glwe_array_out, glwe_dimension, polynomial_size,
num_lwes);
check_cuda_error(cudaGetLastError());
}
#endif

View File

@@ -117,21 +117,11 @@ host_integer_compress(cudaStream_t const *streams, uint32_t const *gpu_indexes,
while (rem_lwes > 0) {
auto chunk_size = min(rem_lwes, mem_ptr->lwe_per_glwe);
if (can_use_pks_fast_path(
input_lwe_dimension, chunk_size, compression_params.polynomial_size,
compression_params.ks_level, compression_params.glwe_dimension)) {
host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
} else {
host_packing_keyswitch_lwe_list_to_glwe<Torus>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
}
host_fast_packing_keyswitch_lwe_list_to_glwe<Torus, ulonglong4>(
streams[0], gpu_indexes[0], glwe_out, lwe_subset, fp_ksk[0],
fp_ks_buffer, input_lwe_dimension, compression_params.glwe_dimension,
compression_params.polynomial_size, compression_params.ks_base_log,
compression_params.ks_level, chunk_size);
rem_lwes -= chunk_size;
lwe_subset += chunk_size * lwe_in_size;
@@ -311,7 +301,7 @@ __host__ void host_integer_decompress(
/// Apply PBS to apply a LUT, reduce the noise and go from a small LWE
/// dimension to a big LWE dimension
auto encryption_params = h_mem_ptr->encryption_params;
auto lut = h_mem_ptr->carry_extract_lut;
auto lut = h_mem_ptr->decompression_rescale_lut;
auto active_gpu_count = get_active_gpu_count(num_radix_blocks, gpu_count);
if (active_gpu_count == 1) {
execute_pbs_async<Torus>(

View File

@@ -627,26 +627,48 @@ void rotate_left(Torus *buffer, int mid, uint32_t array_length) {
std::rotate(buffer, buffer + mid, buffer + array_length);
}
/// Caller needs to ensure that the operation applied is coherent from an
/// encoding perspective.
///
/// For example:
///
/// Input encoding has 2 bits and output encoding has 4 bits, applying the
/// identity lut would map the following:
///
/// 0|00|xx -> 0|00|00
/// 0|01|xx -> 0|00|01
/// 0|10|xx -> 0|00|10
/// 0|11|xx -> 0|00|11
///
/// The reason is the identity function is computed in the input space but the
/// scaling is done in the output space, as there are more bits in the output
/// space, the delta is smaller hence the apparent "division" happening.
template <typename Torus>
void generate_lookup_table(Torus *acc, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t message_modulus,
uint32_t carry_modulus,
std::function<Torus(Torus)> f) {
void generate_lookup_table_with_encoding(Torus *acc, uint32_t glwe_dimension,
uint32_t polynomial_size,
uint32_t input_message_modulus,
uint32_t input_carry_modulus,
uint32_t output_message_modulus,
uint32_t output_carry_modulus,
std::function<Torus(Torus)> f) {
uint32_t modulus_sup = message_modulus * carry_modulus;
uint32_t box_size = polynomial_size / modulus_sup;
Torus delta = (1ul << 63) / modulus_sup;
uint32_t input_modulus_sup = input_message_modulus * input_carry_modulus;
uint32_t output_modulus_sup = output_message_modulus * output_carry_modulus;
uint32_t box_size = polynomial_size / input_modulus_sup;
auto nbits = sizeof(Torus) * 8;
Torus output_delta =
(static_cast<Torus>(1) << (nbits - 1)) / output_modulus_sup;
memset(acc, 0, glwe_dimension * polynomial_size * sizeof(Torus));
auto body = &acc[glwe_dimension * polynomial_size];
// This accumulator extracts the carry bits
for (int i = 0; i < modulus_sup; i++) {
for (int i = 0; i < input_modulus_sup; i++) {
int index = i * box_size;
for (int j = index; j < index + box_size; j++) {
auto f_eval = f(i);
body[j] = f_eval * delta;
body[j] = f_eval * output_delta;
}
}
@@ -660,6 +682,16 @@ void generate_lookup_table(Torus *acc, uint32_t glwe_dimension,
rotate_left<Torus>(body, half_box_size, polynomial_size);
}
template <typename Torus>
void generate_lookup_table(Torus *acc, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t message_modulus,
uint32_t carry_modulus,
std::function<Torus(Torus)> f) {
generate_lookup_table_with_encoding(acc, glwe_dimension, polynomial_size,
message_modulus, carry_modulus,
message_modulus, carry_modulus, f);
}
template <typename Torus>
void generate_many_lookup_table(
Torus *acc, uint32_t glwe_dimension, uint32_t polynomial_size,
@@ -668,7 +700,8 @@ void generate_many_lookup_table(
uint32_t modulus_sup = message_modulus * carry_modulus;
uint32_t box_size = polynomial_size / modulus_sup;
Torus delta = (1ul << 63) / modulus_sup;
auto nbits = sizeof(Torus) * 8;
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) / modulus_sup;
memset(acc, 0, glwe_dimension * polynomial_size * sizeof(Torus));
@@ -803,6 +836,32 @@ void generate_device_accumulator_bivariate_with_factor(
free(h_lut);
}
template <typename Torus>
void generate_device_accumulator_with_encoding(
cudaStream_t stream, uint32_t gpu_index, Torus *acc,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t input_message_modulus, uint32_t input_carry_modulus,
uint32_t output_message_modulus, uint32_t output_carry_modulus,
std::function<Torus(Torus)> f) {
// host lut
Torus *h_lut =
(Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus));
// fill accumulator
generate_lookup_table_with_encoding<Torus>(
h_lut, glwe_dimension, polynomial_size, input_message_modulus,
input_carry_modulus, output_message_modulus, output_carry_modulus, f);
// copy host lut and lut_indexes_vec to device
cuda_memcpy_async_to_gpu(
acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus),
stream, gpu_index);
cuda_synchronize_stream(stream, gpu_index);
free(h_lut);
}
/*
* generate accumulator for device pointer
* v_stream - cuda stream
@@ -818,21 +877,9 @@ void generate_device_accumulator(cudaStream_t stream, uint32_t gpu_index,
uint32_t carry_modulus,
std::function<Torus(Torus)> f) {
// host lut
Torus *h_lut =
(Torus *)malloc((glwe_dimension + 1) * polynomial_size * sizeof(Torus));
// fill accumulator
generate_lookup_table<Torus>(h_lut, glwe_dimension, polynomial_size,
message_modulus, carry_modulus, f);
// copy host lut and lut_indexes_vec to device
cuda_memcpy_async_to_gpu(
acc, h_lut, (glwe_dimension + 1) * polynomial_size * sizeof(Torus),
stream, gpu_index);
cuda_synchronize_stream(stream, gpu_index);
free(h_lut);
generate_device_accumulator_with_encoding(
stream, gpu_index, acc, glwe_dimension, polynomial_size, message_modulus,
carry_modulus, message_modulus, carry_modulus, f);
}
/*
@@ -1055,7 +1102,8 @@ void host_compute_propagation_simulators_and_group_carries(
message_modulus, carry_modulus);
uint32_t modulus_sup = message_modulus * carry_modulus;
Torus delta = (1ull << 63) / modulus_sup;
auto nbits = sizeof(Torus) * 8;
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) / modulus_sup;
auto simulators = mem->simulators;
auto grouping_pgns = mem->grouping_pgns;
host_radix_split_simulators_and_grouping_pgns<Torus>(
@@ -1382,8 +1430,8 @@ __host__ void
create_trivial_radix(cudaStream_t stream, uint32_t gpu_index,
Torus *lwe_array_out, Torus const *scalar_array,
uint32_t lwe_dimension, uint32_t num_radix_blocks,
uint32_t num_scalar_blocks, uint64_t message_modulus,
uint64_t carry_modulus) {
uint32_t num_scalar_blocks, Torus message_modulus,
Torus carry_modulus) {
cudaSetDevice(gpu_index);
size_t radix_size = (lwe_dimension + 1) * num_radix_blocks;
@@ -1403,7 +1451,9 @@ create_trivial_radix(cudaStream_t stream, uint32_t gpu_index,
// Value of the shift we multiply our messages by
// If message_modulus and carry_modulus are always powers of 2 we can simplify
// this
uint64_t delta = ((uint64_t)1 << 63) / (message_modulus * carry_modulus);
auto nbits = sizeof(Torus) * 8;
Torus delta = (static_cast<Torus>(1) << (nbits - 1)) /
(message_modulus * carry_modulus);
device_create_trivial_radix<Torus><<<grid, thds, 0, stream>>>(
lwe_array_out, scalar_array, num_scalar_blocks, lwe_dimension, delta);

View File

@@ -4,12 +4,11 @@
* Perform the addition of two u32 input LWE ciphertext vectors.
* See the equivalent operation on u64 ciphertexts for more details.
*/
void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in_1,
void const *lwe_array_in_2,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
void cuda_add_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_addition<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint32_t *>(lwe_array_out),
@@ -44,12 +43,11 @@ void cuda_add_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
* vectors are left unchanged. This function is a wrapper to a device function
* that performs the operation on the GPU.
*/
void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in_1,
void const *lwe_array_in_2,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
void cuda_add_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in_1, void const *lwe_array_in_2,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_addition<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
@@ -65,7 +63,8 @@ void cuda_add_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *plaintext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_addition_plaintext<uint32_t>(
static_cast<cudaStream_t>(stream), gpu_index,
@@ -105,7 +104,8 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_32(
void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *plaintext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_addition_plaintext<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
@@ -114,3 +114,41 @@ void cuda_add_lwe_ciphertext_vector_plaintext_vector_64(
static_cast<const uint64_t *>(plaintext_array_in), input_lwe_dimension,
input_lwe_ciphertext_count);
}
/*
* Perform the addition of a u64 input LWE ciphertext vector with a u64 input
* plaintext scalar.
* - `stream` is a void pointer to the Cuda stream to be used in the kernel
* launch
* - `gpu_index` is the index of the GPU to be used in the kernel launch
* - `lwe_array_out` is an array of size
* `(input_lwe_dimension + 1) * input_lwe_ciphertext_count` that should have
* been allocated on the GPU before calling this function, and that will hold
* the result of the computation.
* - `lwe_array_in` is the LWE ciphertext vector used as input, it should have
* been allocated and initialized before calling this function. It has the same
* size as the output array.
* - `plaintext_in` is the plaintext used as input.
* - `input_lwe_dimension` is the number of mask elements in the input and
* output LWE ciphertext vectors
* - `input_lwe_ciphertext_count` is the number of ciphertexts contained in the
* input LWE ciphertext vector, as well as in the output.
*
* The same input plaintext is added to the body of the
* LWE ciphertexts in the LWE ciphertext vector. The result of the
* operation is stored in the output LWE ciphertext vector. The two input
* vectors are unchanged. This function is a wrapper to a device function that
* performs the operation on the GPU.
*/
void cuda_add_lwe_ciphertext_vector_plaintext_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, const uint64_t plaintext_in,
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_addition_plaintext_scalar<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),
static_cast<const uint64_t *>(lwe_array_in), plaintext_in,
input_lwe_dimension, input_lwe_ciphertext_count);
}

View File

@@ -13,9 +13,10 @@
#include <stdio.h>
template <typename T>
__global__ void
plaintext_addition(T *output, T const *lwe_input, T const *plaintext_input,
uint32_t input_lwe_dimension, uint32_t num_entries) {
__global__ void plaintext_addition(T *output, T const *lwe_input,
T const *plaintext_input,
const uint32_t input_lwe_dimension,
const uint32_t num_entries) {
int tid = threadIdx.x;
int plaintext_index = blockIdx.x * blockDim.x + tid;
@@ -28,10 +29,26 @@ plaintext_addition(T *output, T const *lwe_input, T const *plaintext_input,
}
template <typename T>
__host__ void
host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output,
T const *lwe_input, T const *plaintext_input,
uint32_t lwe_dimension, uint32_t lwe_ciphertext_count) {
__global__ void plaintext_addition_scalar(T *output, T const *lwe_input,
const T plaintext_input,
const uint32_t input_lwe_dimension,
const uint32_t num_entries) {
int tid = threadIdx.x;
int lwe_index = blockIdx.x * blockDim.x + tid;
if (lwe_index < num_entries) {
int index = lwe_index * (input_lwe_dimension + 1) + input_lwe_dimension;
// Here we take advantage of the wrapping behaviour of uint
output[index] = lwe_input[index] + plaintext_input;
}
}
template <typename T>
__host__ void host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index,
T *output, T const *lwe_input,
T const *plaintext_input,
const uint32_t lwe_dimension,
const uint32_t lwe_ciphertext_count) {
cudaSetDevice(gpu_index);
int num_blocks = 0, num_threads = 0;
@@ -48,6 +65,27 @@ host_addition_plaintext(cudaStream_t stream, uint32_t gpu_index, T *output,
check_cuda_error(cudaGetLastError());
}
template <typename T>
__host__ void host_addition_plaintext_scalar(
cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input,
const T plaintext_input, const uint32_t lwe_dimension,
const uint32_t lwe_ciphertext_count) {
cudaSetDevice(gpu_index);
int num_blocks = 0, num_threads = 0;
int num_entries = lwe_ciphertext_count;
getNumBlocksAndThreads(num_entries, 512, num_blocks, num_threads);
dim3 grid(num_blocks, 1, 1);
dim3 thds(num_threads, 1, 1);
cuda_memcpy_async_gpu_to_gpu(
output, lwe_input, (lwe_dimension + 1) * lwe_ciphertext_count * sizeof(T),
stream, gpu_index);
plaintext_addition_scalar<T><<<grid, thds, 0, stream>>>(
output, lwe_input, plaintext_input, lwe_dimension, num_entries);
check_cuda_error(cudaGetLastError());
}
template <typename T>
__global__ void addition(T *output, T const *input_1, T const *input_2,
uint32_t num_entries) {
@@ -64,8 +102,8 @@ __global__ void addition(T *output, T const *input_1, T const *input_2,
template <typename T>
__host__ void host_addition(cudaStream_t stream, uint32_t gpu_index, T *output,
T const *input_1, T const *input_2,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body

View File

@@ -7,7 +7,8 @@
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *cleartext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_cleartext_vec_multiplication<uint32_t>(
static_cast<cudaStream_t>(stream), gpu_index,
@@ -47,7 +48,8 @@ void cuda_mult_lwe_ciphertext_vector_cleartext_vector_32(
void cuda_mult_lwe_ciphertext_vector_cleartext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *cleartext_array_in,
uint32_t input_lwe_dimension, uint32_t input_lwe_ciphertext_count) {
const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_cleartext_vec_multiplication<uint64_t>(
static_cast<cudaStream_t>(stream), gpu_index,

View File

@@ -16,8 +16,8 @@
template <typename T>
__global__ void cleartext_vec_multiplication(T *output, T const *lwe_input,
T const *cleartext_input,
uint32_t input_lwe_dimension,
uint32_t num_entries) {
const uint32_t input_lwe_dimension,
const uint32_t num_entries) {
int tid = threadIdx.x;
int index = blockIdx.x * blockDim.x + tid;
@@ -31,8 +31,8 @@ __global__ void cleartext_vec_multiplication(T *output, T const *lwe_input,
template <typename T>
__host__ void host_cleartext_vec_multiplication(
cudaStream_t stream, uint32_t gpu_index, T *output, T const *lwe_input,
T const *cleartext_input, uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
T const *cleartext_input, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body

View File

@@ -4,11 +4,10 @@
* Perform the negation of a u32 input LWE ciphertext vector.
* See the equivalent operation on u64 ciphertexts for more details.
*/
void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
void cuda_negate_lwe_ciphertext_vector_32(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_negation<uint32_t>(static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint32_t *>(lwe_array_out),
@@ -38,11 +37,10 @@ void cuda_negate_lwe_ciphertext_vector_32(void *stream, uint32_t gpu_index,
* LWE ciphertext vector is left unchanged. This function is a wrapper to a
* device function that performs the operation on the GPU.
*/
void cuda_negate_lwe_ciphertext_vector_64(void *stream, uint32_t gpu_index,
void *lwe_array_out,
void const *lwe_array_in,
uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
void cuda_negate_lwe_ciphertext_vector_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
host_negation<uint64_t>(static_cast<cudaStream_t>(stream), gpu_index,
static_cast<uint64_t *>(lwe_array_out),

View File

@@ -23,8 +23,8 @@ __global__ void negation(T *output, T const *input, uint32_t num_entries) {
template <typename T>
__host__ void host_negation(cudaStream_t stream, uint32_t gpu_index, T *output,
T const *input, uint32_t input_lwe_dimension,
uint32_t input_lwe_ciphertext_count) {
T const *input, const uint32_t input_lwe_dimension,
const uint32_t input_lwe_ciphertext_count) {
cudaSetDevice(gpu_index);
// lwe_size includes the presence of the body

View File

@@ -480,30 +480,20 @@ __host__ void host_programmable_bootstrap(
double2 *global_join_buffer = pbs_buffer->global_join_buffer;
int8_t *d_mem = pbs_buffer->d_mem;
bool graphCreated = false;
cudaGraph_t graph;
cudaGraphExec_t instance;
for (int i = 0; i < lwe_dimension; i++) {
if (!graphCreated) {
cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_one, full_sm_step_one, full_dm_step_one);
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, lut_vector,
lut_vector_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
num_many_lut, lut_stride);
cudaStreamEndCapture(stream, &graph);
cudaGraphInstantiate(&instance, graph, NULL, NULL, 0);
graphCreated = true;
}
cudaGraphLaunch(instance, stream);
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_one, full_sm_step_one, full_dm_step_one);
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, lut_vector,
lut_vector_indexes, bootstrapping_key, global_accumulator,
global_join_buffer, input_lwe_ciphertext_count, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, d_mem, i,
partial_sm, partial_dm_step_two, full_sm_step_two, full_dm_step_two,
num_many_lut, lut_stride);
}
}

View File

@@ -649,41 +649,29 @@ __host__ void host_multi_bit_programmable_bootstrap(
auto lwe_chunk_size = buffer->lwe_chunk_size;
bool graphCreated = false;
cudaGraph_t graph;
cudaGraphExec_t instance;
for (uint32_t lwe_offset = 0; lwe_offset < (lwe_dimension / grouping_factor);
lwe_offset += lwe_chunk_size) {
if (!graphCreated) {
cudaStreamBeginCapture(stream, cudaStreamCaptureModeThreadLocal);
// Compute a keybundle
execute_compute_keybundle<Torus, params>(
stream, gpu_index, lwe_array_in, lwe_input_indexes, bootstrapping_key,
buffer, num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, lwe_offset);
// Accumulate
uint32_t chunk_size = std::min(
lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset);
for (uint32_t j = 0; j < chunk_size; j++) {
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, buffer, num_samples, lwe_dimension,
glwe_dimension, polynomial_size, base_log, level_count, j,
lwe_offset);
// Compute a keybundle
execute_compute_keybundle<Torus, params>(
stream, gpu_index, lwe_array_in, lwe_input_indexes, bootstrapping_key,
buffer, num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, lwe_offset);
// Accumulate
uint32_t chunk_size = std::min(
lwe_chunk_size, (lwe_dimension / grouping_factor) - lwe_offset);
for (uint32_t j = 0; j < chunk_size; j++) {
execute_step_one<Torus, params>(
stream, gpu_index, lut_vector, lut_vector_indexes, lwe_array_in,
lwe_input_indexes, buffer, num_samples, lwe_dimension, glwe_dimension,
polynomial_size, base_log, level_count, j, lwe_offset);
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, j, lwe_offset, num_many_lut,
lut_stride);
}
cudaStreamEndCapture(stream, &graph);
cudaGraphInstantiate(&instance, graph, NULL, NULL, 0);
graphCreated = true;
execute_step_two<Torus, params>(
stream, gpu_index, lwe_array_out, lwe_output_indexes, buffer,
num_samples, lwe_dimension, glwe_dimension, polynomial_size,
grouping_factor, level_count, j, lwe_offset, num_many_lut,
lut_stride);
}
cudaGraphLaunch(instance, stream);
}
}
#endif // MULTIBIT_PBS_H

View File

@@ -237,7 +237,7 @@ TEST_P(ClassicalProgrammableBootstrapTestPrimitives_u64, bootstrap) {
(ClassicalProgrammableBootstrapTestParams){
887, 1, 2048, new_t_uniform(46), new_t_uniform(17), 22, 1, 4, 4,
100, 1, 1},
// PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64
// V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64
(ClassicalProgrammableBootstrapTestParams){
977, 1, 8192, new_gaussian_from_std_dev(3.0144389706858286e-07),
new_gaussian_from_std_dev(2.168404344971009e-19), 16, 2, 8, 8, 100,

View File

@@ -1345,6 +1345,17 @@ extern "C" {
input_lwe_ciphertext_count: u32,
);
}
extern "C" {
pub fn cuda_add_lwe_ciphertext_vector_plaintext_64(
stream: *mut ffi::c_void,
gpu_index: u32,
lwe_array_out: *mut ffi::c_void,
lwe_array_in: *const ffi::c_void,
plaintext_in: u64,
input_lwe_dimension: u32,
input_lwe_ciphertext_count: u32,
);
}
extern "C" {
pub fn cuda_fourier_polynomial_mul(
stream: *mut ffi::c_void,

View File

@@ -1,40 +0,0 @@
FROM ubuntu:22.04
ENV TZ=Europe/Paris
RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
# Replace default archive.ubuntu.com with fr mirror
# original archive showed performance issues and is farther away
RUN sed -i 's|^deb http://archive.ubuntu.com/ubuntu/|deb http://mirror.ubuntu.ikoula.com/|g' /etc/apt/sources.list && \
sed -i 's|^deb http://security.ubuntu.com/ubuntu/|deb http://mirror.ubuntu.ikoula.com/|g' /etc/apt/sources.list
ENV CARGO_TARGET_DIR=/root/tfhe-rs-target
ARG RUST_TOOLCHAIN="stable"
ARG NODE_VERSION
WORKDIR /tfhe-wasm-tests
RUN apt-get update && \
apt-get install -y \
build-essential \
curl \
git \
python3 \
python3-pip \
python3-venv && \
rm -rf /var/lib/apt/lists/*
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs > install-rustup.sh && \
chmod +x install-rustup.sh && \
./install-rustup.sh -y --default-toolchain "${RUST_TOOLCHAIN}" \
-c rust-src -t wasm32-unknown-unknown && \
. "$HOME/.cargo/env" && \
cargo install wasm-pack && \
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.3/install.sh > install-node.sh && \
chmod +x install-node.sh && \
./install-node.sh && \
. "$HOME/.nvm/nvm.sh" && \
bash -i -c 'nvm install ${NODE_VERSION} && nvm use ${NODE_VERSION}'
WORKDIR /tfhe-wasm-tests/tfhe-rs/

View File

@@ -7,10 +7,10 @@ const DIR_TO_IGNORE: [&str; 3] = [
".git",
"target",
// If the data repo has been cloned, we ignore its README
"tfhe/tfhe-backward-compat-data",
"tests/tfhe-backward-compat-data",
];
const FILES_TO_IGNORE: [&str; 5] = [
const FILES_TO_IGNORE: [&str; 6] = [
// This contains fragments of code that are unrelated to TFHE-rs
"tfhe/docs/tutorials/sha256_bool.md",
// TODO: This contains code that could be executed as a trivium docstring
@@ -21,6 +21,7 @@ const FILES_TO_IGNORE: [&str; 5] = [
"tfhe-fft/README.md",
// TODO: find a way to test the tfhe-ntt readme
"tfhe-ntt/README.md",
"utils/tfhe-lints/README.md",
];
pub fn check_tfhe_docs_are_tested() -> Result<(), Error> {

23
tests/Cargo.toml Normal file
View File

@@ -0,0 +1,23 @@
[package]
name = "tests"
version = "0.1.0"
edition = "2021"
publish = false
[dev-dependencies]
tfhe = { path = "../tfhe" }
tfhe-versionable = { path = "../utils/tfhe-versionable" }
tfhe-backward-compat-data = { git = "https://github.com/zama-ai/tfhe-backward-compat-data.git", branch = "v0.5", default-features = false, features = [
"load",
] }
ron = "0.8"
[[test]]
name = "backward_compatibility_tests"
path = "backward_compatibility_tests.rs"
[features]
shortint = ["tfhe/shortint"]
integer = ["shortint", "tfhe/integer"]
zk-pok = ["tfhe/zk-pok"]

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-fft"
version = "0.6.0"
version = "0.7.0"
edition = "2021"
description = "tfhe-fft is a pure Rust high performance fast Fourier transform library."
readme = "README.md"
@@ -43,7 +43,7 @@ getrandom = { version = "0.2", features = ["js"] }
rug = "1.19.1"
[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = "0.4"
criterion = "0.5"
fftw-sys = { version = "0.6", default-features = false, features = ["system"] }
[[bench]]

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-ntt"
version = "0.3.0"
version = "0.4.0"
edition = "2021"
description = "tfhe-ntt is a pure Rust high performance number theoretic transform library."
readme = "README.md"
@@ -22,7 +22,7 @@ std = ["pulp/std", "aligned-vec/std"]
nightly = ["pulp/nightly"]
[dev-dependencies]
criterion = "0.4"
criterion = "0.5"
rand = { workspace = true }
serde = "1.0.163"
serde_json = "1.0.96"

View File

@@ -278,12 +278,12 @@ impl Plan {
let ntt_32: &mut [u32] = bytemuck::cast_slice_mut(ntt_32);
// optimize common cases(?): u64x1, u32x1
if self.plan_32.len() == 0 && self.plan_64.len() == 1 {
if self.plan_32.is_empty() && self.plan_64.len() == 1 {
ntt_64.copy_from_slice(standard);
self.plan_64[0].fwd(ntt_64);
return;
}
if self.plan_32.len() == 1 && self.plan_64.len() == 0 {
if self.plan_32.len() == 1 && self.plan_64.is_empty() {
for (ntt, &standard) in ntt_32.iter_mut().zip(standard) {
*ntt = standard as u32;
}
@@ -291,7 +291,7 @@ impl Plan {
return;
}
if self.plan_32.len() == 2 && self.plan_64.len() == 0 {
if self.plan_32.len() == 2 && self.plan_64.is_empty() {
let (ntt0, ntt1) = ntt_32.split_at_mut(self.ntt_size());
let p0_div = self.plan_32[0].p_div();
let p1_div = self.plan_32[1].p_div();
@@ -375,7 +375,7 @@ impl Plan {
let ntt_64 = &*ntt_64;
// optimize common cases(?): u64x1, u32x1, u32x2
if self.plan_32.len() == 0 && self.plan_64.len() == 0 {
if self.plan_32.is_empty() && self.plan_64.is_empty() {
match mode {
InvMode::Replace => standard.fill(0),
InvMode::Accumulate => {}
@@ -383,7 +383,7 @@ impl Plan {
return;
}
if self.plan_32.len() == 0 && self.plan_64.len() == 1 {
if self.plan_32.is_empty() && self.plan_64.len() == 1 {
match mode {
InvMode::Replace => standard.copy_from_slice(ntt_64),
InvMode::Accumulate => {
@@ -396,7 +396,7 @@ impl Plan {
}
return;
}
if self.plan_32.len() == 1 && self.plan_64.len() == 0 {
if self.plan_32.len() == 1 && self.plan_64.is_empty() {
match mode {
InvMode::Replace => {
for (standard, &ntt) in standard.iter_mut().zip(ntt_32) {
@@ -416,7 +416,7 @@ impl Plan {
// implements the algorithms from "the art of computer programming (Donald E. Knuth)" 4.3.2
// for finding solutions of the chinese remainder theorem
if self.plan_32.len() == 2 && self.plan_64.len() == 0 {
if self.plan_32.len() == 2 && self.plan_64.is_empty() {
let (ntt0, ntt1) = ntt_32.split_at(self.ntt_size());
let p0 = self.plan_32[0].modulus();
let p1 = self.plan_32[1].modulus();

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-zk-pok"
version = "0.3.1"
version = "0.4.0"
edition = "2021"
keywords = ["zero", "knowledge", "proof", "vector-commitments"]
homepage = "https://zama.ai/"
@@ -8,6 +8,7 @@ documentation = "https://docs.zama.ai/tfhe-rs"
repository = "https://github.com/zama-ai/tfhe-rs"
license = "BSD-3-Clause-Clear"
description = "tfhe-zk-pok: An implementation of zero-knowledge proofs of encryption for TFHE."
rust-version = "1.84"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -22,7 +23,7 @@ sha3 = "0.10.8"
serde = { workspace = true, features = ["default", "derive"] }
zeroize = "1.7.0"
num-bigint = "0.4.5"
tfhe-versionable = { version = "0.3.2", path = "../utils/tfhe-versionable" }
tfhe-versionable = { version = "0.4.0", path = "../utils/tfhe-versionable" }
[dev-dependencies]
serde_json = "~1.0"

View File

@@ -11,7 +11,7 @@ use std::fmt::Display;
use tfhe_versionable::{Upgrade, Version, VersionsDispatch};
use crate::curve_api::Curve;
use crate::four_squares::{isqrt, sqr};
use crate::four_squares::sqr;
use crate::proofs::pke_v2::Bound;
use crate::proofs::GroupElements;
use crate::serialization::{
@@ -102,7 +102,7 @@ impl Upgrade<SerializablePKEv2PublicParams> for SerializablePKEv2PublicParamsV0
type Error = Infallible;
fn upgrade(self) -> Result<SerializablePKEv2PublicParams, Self::Error> {
let slack_factor = isqrt((self.d + self.k) as u128) as u64;
let slack_factor = (self.d + self.k).isqrt() as u64;
let B_inf = self.B / slack_factor;
Ok(SerializablePKEv2PublicParams {
g_lists: self.g_lists,
@@ -110,7 +110,7 @@ impl Upgrade<SerializablePKEv2PublicParams> for SerializablePKEv2PublicParamsV0
n: self.n,
d: self.d,
k: self.k,
B_bound_squared: sqr(self.B_bound as u128),
B_bound_squared: sqr(self.B_bound),
B_inf,
q: self.q,
t: self.t,

View File

@@ -1,7 +1,9 @@
use ark_ff::biginteger::arithmetic::widening_mul;
use rand::prelude::*;
pub fn sqr(x: u128) -> u128 {
/// Avoid overflows for squares of u64
pub fn sqr(x: u64) -> u128 {
let x = x as u128;
x * x
}
@@ -9,37 +11,8 @@ pub fn checked_sqr(x: u128) -> Option<u128> {
x.checked_mul(x)
}
// copied from the standard library
// since isqrt is unstable at the moment
pub fn isqrt(this: u128) -> u128 {
if this < 2 {
return this;
}
// The algorithm is based on the one presented in
// <https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_(base_2)>
// which cites as source the following C code:
// <https://web.archive.org/web/20120306040058/http://medialab.freaknet.org/martin/src/sqrt/sqrt.c>.
let mut op = this;
let mut res = 0;
let mut one = 1 << (this.ilog2() & !1);
while one != 0 {
if op >= res + one {
op -= res + one;
res = (res >> 1) + one;
} else {
res >>= 1;
}
one >>= 2;
}
res
}
fn half_gcd(p: u128, s: u128) -> u128 {
let sq_p = isqrt(p as _);
let sq_p = p.isqrt();
let mut a = p;
let mut b = s;
while b > sq_p {
@@ -225,13 +198,13 @@ pub fn four_squares(v: u128) -> [u64; 4] {
let f = v % 4;
if f == 2 {
let b = isqrt(v as _) as u64;
let b = v.isqrt() as u64;
'main_loop: loop {
let x = 2 + rng.gen::<u64>() % (b - 2);
let y = 2 + rng.gen::<u64>() % (b - 2);
let (sum, o) = u128::overflowing_add(sqr(x as u128), sqr(y as u128));
let (sum, o) = u128::overflowing_add(sqr(x), sqr(y));
if o || sum > v {
continue 'main_loop;
}
@@ -288,9 +261,9 @@ pub fn four_squares(v: u128) -> [u64; 4] {
let i = mont.natural_from_mont(sqrt);
let i = if i <= p / 2 { p - i } else { i };
let z = half_gcd(p, i) as u64;
let w = isqrt(p - sqr(z as u128)) as u64;
let w = (p - sqr(z)).isqrt() as u64;
if p != sqr(z as u128) + sqr(w as u128) {
if p != sqr(z) + sqr(w) {
continue 'main_loop;
}

View File

@@ -511,7 +511,7 @@ than the lwe dimension d. Please pick a smaller k: k = {k}, d = {d}"
Bound::GHL => 950625,
Bound::CS => 2 * (d as u128 + k as u128) + 4,
})
.checked_mul(B_squared + (sqr((d + 2) as u128) * (d + k) as u128) / 4)
.checked_mul(B_squared + (sqr((d + 2) as u64) * (d + k) as u128) / 4)
.unwrap_or_else(|| {
panic!(
"Invalid parameters for zk_pok, B_squared: {B_squared}, d: {d}, k: {k}. \
@@ -552,8 +552,9 @@ The computed m parameter is {m_bound} > 64. Please select a smaller B, d and/or
/// Use the relationship: `||x||_2 <= sqrt(dim)*||x||_inf`. Since we are only interested in the
/// squared bound, we avoid the sqrt by returning dim*(||x||_inf)^2.
fn inf_norm_bound_to_euclidean_squared(B_inf: u64, dim: usize) -> u128 {
checked_sqr(B_inf as u128)
.and_then(|norm_squared| norm_squared.checked_mul(dim as u128))
let norm_squared = sqr(B_inf);
norm_squared
.checked_mul(dim as u128)
.unwrap_or_else(|| panic!("Invalid parameters for zk_pok, B_inf: {B_inf}, d+k: {dim}"))
}
@@ -765,7 +766,7 @@ fn prove_impl<G: Curve>(
let e_sqr_norm = e1
.iter()
.chain(e2)
.map(|x| sqr(x.unsigned_abs() as u128))
.map(|x| sqr(x.unsigned_abs()))
.sum::<u128>();
if sanity_check_mode == ProofSanityCheckMode::Panic {
@@ -940,7 +941,7 @@ fn prove_impl<G: Curve>(
assert!(
checked_sqr(acc.unsigned_abs()).unwrap() <= B_bound_squared,
"sqr(acc) ({}) > B_bound_squared ({B_bound_squared})",
sqr(acc as u128)
checked_sqr(acc.unsigned_abs()).unwrap()
);
}
acc as i64
@@ -2786,7 +2787,7 @@ mod tests {
};
let B_with_slack_squared = inf_norm_bound_to_euclidean_squared(B, d + k);
let B_with_slack = isqrt(B_with_slack_squared) as u64;
let B_with_slack = B_with_slack_squared.isqrt() as u64;
let bound = match slack_mode {
// The slack is maximal, any term above B+slack should be refused
@@ -2797,7 +2798,7 @@ mod tests {
.e1
.iter()
.chain(&testcase.e2)
.map(|x| sqr(x.unsigned_abs() as u128))
.map(|x| sqr(x.unsigned_abs()))
.sum::<u128>();
let orig_value = match coeff_type {
@@ -2806,8 +2807,8 @@ mod tests {
};
let bound_squared =
B_with_slack_squared - (e_sqr_norm - sqr(orig_value as u128));
isqrt(bound_squared) as i64
B_with_slack_squared - (e_sqr_norm - sqr(orig_value as u64));
bound_squared.isqrt() as i64
}
// There is no slack effect, any term above B should be refused
BoundTestSlackMode::Min => B as i64,
@@ -2849,7 +2850,7 @@ mod tests {
let crs_max_k = crs_gen::<Curve>(d, d, B, q, t, msbs_zero_padding_bit_count, rng);
let B_with_slack_squared = inf_norm_bound_to_euclidean_squared(B, d + k);
let B_with_slack_upper = isqrt(B_with_slack_squared) as u64 + 1;
let B_with_slack_upper = B_with_slack_squared.isqrt() as u64 + 1;
// Generate test noise vectors with random coeffs and one completely out of bounds

View File

@@ -17,7 +17,7 @@ exclude = [
"/js_on_wasm_tests/",
"/web_wasm_parallel_tests/",
]
rust-version = "1.83"
rust-version = "1.84"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
@@ -32,38 +32,35 @@ serde_json = "1.0.94"
clap = { version = "=4.4.4", features = ["derive"] }
# Used in user documentation
fs2 = { version = "0.4.3" }
statrs = "0.16"
statrs = "0.18"
# For erf and normality test
libm = "0.2.6"
# Begin regex-engine deps
test-case = "3.1.0"
combine = "4.6.6"
env_logger = "0.10.0"
env_logger = "0.11"
log = "0.4.19"
hex = "0.4.3"
# End regex-engine deps
# Used for backward compatibility test metadata
ron = "0.8"
tfhe-backward-compat-data = { git = "https://github.com/zama-ai/tfhe-backward-compat-data.git", branch = "v0.4", default-features = false, features = [
"load",
] }
strum = { version = "0.26", features = ["derive"] }
[build-dependencies]
cbindgen = { version = "0.26.0", optional = true }
cbindgen = { version = "0.28", optional = true }
[dependencies]
tfhe-csprng = { version = "0.5.0", path = "../tfhe-csprng", features = ["parallel"] }
tfhe-csprng = { version = "0.5.0", path = "../tfhe-csprng", features = [
"parallel",
] }
serde = { workspace = true, features = ["default", "derive"] }
rayon = { workspace = true }
bincode = "1.3.3"
tfhe-fft = { version = "0.6.0", path = "../tfhe-fft", features = [
tfhe-fft = { version = "0.7.0", path = "../tfhe-fft", features = [
"serde",
"fft128",
] }
tfhe-ntt = { version = "0.3.0", path = "../tfhe-ntt" }
tfhe-ntt = { version = "0.4.0", path = "../tfhe-ntt" }
pulp = { workspace = true, features = ["default"] }
tfhe-cuda-backend = { version = "0.6.0", path = "../backends/tfhe-cuda-backend", optional = true }
tfhe-cuda-backend = { version = "0.7.0", path = "../backends/tfhe-cuda-backend", optional = true }
aligned-vec = { workspace = true, features = ["default", "serde"] }
dyn-stack = { workspace = true, features = ["default"] }
paste = "1.0.7"
@@ -72,14 +69,14 @@ fs2 = { version = "0.4.3", optional = true }
sha3 = { version = "0.10", optional = true }
itertools = { workspace = true }
rand_core = { version = "0.6.4", features = ["std"] }
tfhe-zk-pok = { version = "0.3.1", path = "../tfhe-zk-pok", optional = true }
tfhe-versionable = { version = "0.3.2", path = "../utils/tfhe-versionable" }
tfhe-zk-pok = { version = "0.4.0", path = "../tfhe-zk-pok", optional = true }
tfhe-versionable = { version = "0.4.0", path = "../utils/tfhe-versionable" }
# wasm deps
wasm-bindgen = { workspace = true, features = [
"serde-serialize",
], optional = true }
wasm-bindgen-rayon = { version = "=1.2.2", optional = true }
wasm-bindgen-rayon = { version = "1.3.0", optional = true }
js-sys = { version = "0.3", optional = true }
console_error_panic_hook = { version = "0.1.7", optional = true }
serde-wasm-bindgen = { version = "0.6.0", optional = true }
@@ -131,16 +128,17 @@ __profiling = []
software-prng = ["tfhe-csprng/software-prng"]
# Cover several profiles as we cannot have a wildcard it seems
[package.metadata.wasm-pack.profile.dev.wasm-bindgen]
split-linked-modules = true
[package.metadata.wasm-pack.profile.release.wasm-bindgen]
split-linked-modules = true
[package.metadata.docs.rs]
# TODO: manage builds for docs.rs based on their documentation https://docs.rs/about
features = ["boolean", "shortint", "integer", "gpu", "zk-pok", "software-prng"]
features = [
"boolean",
"shortint",
"integer",
"gpu",
"zk-pok",
"software-prng",
"strings",
]
rustdoc-args = ["--html-in-header", "katex-header.html"]
###########
@@ -321,7 +319,7 @@ crate-type = ["lib", "staticlib", "cdylib"]
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = [
'cfg(tarpaulin)',
'cfg(tfhe_lints)',
'cfg(dylint_lib, values(any()))',
# This is a bug/unwanted behavior from wasm_bindgen macro, for now warn instead of erroring
'cfg(wasm_bindgen_unstable_test_coverage)',
] }

View File

@@ -9,8 +9,10 @@ use tfhe::core_crypto::prelude::*;
use tfhe::keycache::NamedParam;
use tfhe::shortint::parameters::{
COMP_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64, PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
};
#[cfg(feature = "gpu")]
use tfhe::shortint::parameters::{
@@ -21,13 +23,13 @@ use tfhe::shortint::parameters::{
};
#[cfg(not(feature = "gpu"))]
use tfhe::shortint::parameters::{
PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
};
use tfhe::shortint::prelude::*;
use tfhe::shortint::{MultiBitPBSParameters, PBSParameters};
@@ -35,28 +37,28 @@ use tfhe::shortint::{MultiBitPBSParameters, PBSParameters};
#[cfg(not(feature = "gpu"))]
const SHORTINT_BENCH_PARAMS: [ClassicPBSParameters; 5] = [
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
];
#[cfg(feature = "gpu")]
const SHORTINT_BENCH_PARAMS: [ClassicPBSParameters; 4] = [
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
];
#[cfg(not(feature = "gpu"))]
const SHORTINT_MULTI_BIT_BENCH_PARAMS: [MultiBitPBSParameters; 6] = [
PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
];
#[cfg(feature = "gpu")]

View File

@@ -17,10 +17,10 @@ const SHORTINT_BENCH_PARAMS_TUNIFORM: [ClassicPBSParameters; 1] =
[PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64];
const SHORTINT_BENCH_PARAMS_GAUSSIAN: [ClassicPBSParameters; 4] = [
PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
];
const BOOLEAN_BENCH_PARAMS: [(&str, BooleanParameters); 2] = [
@@ -57,17 +57,17 @@ fn throughput_benchmark_parameters_64bits() -> Vec<(String, CryptoParametersReco
let parameters = if cfg!(feature = "gpu") {
vec![
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
]
} else {
vec![
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_4_KS_PBS_GAUSSIAN_2M64,
]
};
@@ -102,12 +102,12 @@ fn multi_bit_benchmark_parameters_64bits(
]
} else {
vec![
PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_2_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MULTI_BIT_GROUP_3_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
]
};
@@ -832,13 +832,20 @@ mod cuda {
use tfhe::core_crypto::prelude::*;
use tfhe::keycache::NamedParam;
use tfhe::shortint::parameters::{
PARAM_MESSAGE_1_CARRY_0_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_0_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_2_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
PARAM_MESSAGE_3_CARRY_0_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_3_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_4_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_4_CARRY_3_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_5_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_6_CARRY_0_KS_PBS_GAUSSIAN_2M64, PARAM_MESSAGE_7_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_5_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_6_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_7_CARRY_0_KS_PBS_GAUSSIAN_2M64,
};
use tfhe::shortint::{ClassicPBSParameters, PBSParameters};
@@ -846,19 +853,19 @@ mod cuda {
// TUniform
PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M64,
// Gaussian
PARAM_MESSAGE_1_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_1_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_2_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_4_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_4_CARRY_3_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_5_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_6_CARRY_0_KS_PBS_GAUSSIAN_2M64,
PARAM_MESSAGE_7_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_1_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_2_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_3_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_4_CARRY_3_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_5_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_6_CARRY_0_KS_PBS_GAUSSIAN_2M64,
V0_11_PARAM_MESSAGE_7_CARRY_0_KS_PBS_GAUSSIAN_2M64,
];
fn cuda_benchmark_parameters_64bits() -> Vec<(String, CryptoParametersRecord<u64>)> {

File diff suppressed because it is too large Load Diff

View File

@@ -6,6 +6,7 @@ use crate::utilities::{
};
use criterion::{black_box, criterion_group, Criterion, Throughput};
use rayon::prelude::*;
use std::cmp::max;
use tfhe::integer::ciphertext::CompressedCiphertextListBuilder;
use tfhe::integer::{ClientKey, RadixCiphertext};
use tfhe::keycache::NamedParam;
@@ -77,9 +78,19 @@ fn cpu_glwe_packing(c: &mut Criterion) {
});
}
BenchmarkType::Throughput => {
// Execute the operation once to know its cost.
let ct = cks.encrypt_radix(0_u32, num_blocks);
let mut builder = CompressedCiphertextListBuilder::new();
builder.push(ct);
let compressed = builder.build(&compression_key);
reset_pbs_count();
let _: RadixCiphertext = compressed.get(0, &decompression_key).unwrap().unwrap();
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
let num_block =
(bit_size as f64 / (param.message_modulus.0 as f64).log(2.0)).ceil() as usize;
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
// FIXME thread usage seemed to be somewhat more "efficient".
// For example, with bit_size = 2, my laptop is only using around 2/3 of the
// available threads Thread usage increases with bit_size = 8 but
@@ -150,6 +161,7 @@ fn cpu_glwe_packing(c: &mut Criterion) {
#[cfg(feature = "gpu")]
mod cuda {
use super::*;
use std::cmp::max;
use tfhe::core_crypto::gpu::CudaStreams;
use tfhe::integer::gpu::ciphertext::compressed_ciphertext_list::CudaCompressedCiphertextListBuilder;
use tfhe::integer::gpu::ciphertext::CudaUnsignedRadixCiphertext;
@@ -185,27 +197,26 @@ mod cuda {
let bench_id_pack;
let bench_id_unpack;
// Generate private compression key
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
// Generate and convert compression keys
let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream);
let (compressed_compression_key, compressed_decompression_key) =
radix_cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&stream);
let cuda_decompression_key = compressed_decompression_key.decompress_to_cuda(
radix_cks.parameters().glwe_dimension(),
radix_cks.parameters().polynomial_size(),
radix_cks.parameters().message_modulus(),
radix_cks.parameters().carry_modulus(),
radix_cks.parameters().ciphertext_modulus(),
&stream,
);
match BENCH_TYPE.get().unwrap() {
BenchmarkType::Latency => {
// Generate private compression key
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
// Generate and convert compression keys
let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream);
let (compressed_compression_key, compressed_decompression_key) = radix_cks
.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key =
compressed_compression_key.decompress_to_cuda(&stream);
let cuda_decompression_key = compressed_decompression_key.decompress_to_cuda(
radix_cks.parameters().glwe_dimension(),
radix_cks.parameters().polynomial_size(),
radix_cks.parameters().message_modulus(),
radix_cks.parameters().carry_modulus(),
radix_cks.parameters().ciphertext_modulus(),
&stream,
);
// Encrypt
let ct = cks.encrypt_radix(0_u32, num_blocks);
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
@@ -239,28 +250,25 @@ mod cuda {
});
}
BenchmarkType::Throughput => {
// Execute the operation once to know its cost.
let (cpu_compression_key, cpu_decompression_key) =
cks.new_compression_decompression_keys(&private_compression_key);
let ct = cks.encrypt_radix(0_u32, num_blocks);
let mut builder = CompressedCiphertextListBuilder::new();
builder.push(ct);
let compressed = builder.build(&cpu_compression_key);
reset_pbs_count();
// Use CPU operation as pbs_count do not count PBS on GPU backend.
let _: RadixCiphertext =
compressed.get(0, &cpu_decompression_key).unwrap().unwrap();
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
let num_block = (bit_size as f64 / (param.message_modulus.0 as f64).log(2.0))
.ceil() as usize;
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
let cks = ClientKey::new(param);
let private_compression_key = cks.new_compression_private_key(comp_param);
let (radix_cks, _) = gen_keys_radix_gpu(param, num_blocks, &stream);
let (compressed_compression_key, compressed_decompression_key) = radix_cks
.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key =
compressed_compression_key.decompress_to_cuda(&stream);
let cuda_decompression_key = compressed_decompression_key.decompress_to_cuda(
radix_cks.parameters().glwe_dimension(),
radix_cks.parameters().polynomial_size(),
radix_cks.parameters().message_modulus(),
radix_cks.parameters().carry_modulus(),
radix_cks.parameters().ciphertext_modulus(),
&stream,
);
// Encrypt
let ct = cks.encrypt_radix(0_u32, num_blocks);
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&ct, &stream);
@@ -344,6 +352,7 @@ criterion_group!(cpu_glwe_packing2, cpu_glwe_packing);
#[cfg(feature = "gpu")]
use cuda::gpu_glwe_packing2;
use tfhe::{get_pbs_count, reset_pbs_count};
fn main() {
BENCH_TYPE.get_or_init(|| BenchmarkType::from_env().unwrap());

View File

@@ -4,9 +4,11 @@ use crate::utilities::{
};
use criterion::{black_box, Criterion, Throughput};
use rayon::prelude::*;
use std::cmp::max;
use tfhe::integer::keycache::KEY_CACHE;
use tfhe::integer::IntegerKeyKind;
use tfhe::keycache::NamedParam;
use tfhe::{get_pbs_count, reset_pbs_count};
use tfhe_csprng::seeders::Seed;
pub fn unsigned_oprf(c: &mut Criterion) {
@@ -40,12 +42,21 @@ pub fn unsigned_oprf(c: &mut Criterion) {
});
}
BenchmarkType::Throughput => {
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
// Execute the operation once to know its cost.
reset_pbs_count();
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(
Seed(0),
bit_size as u64,
num_block as u64,
);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (_, sk) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
b.iter(|| {
(0..elements).into_par_iter().for_each(|_| {
sk.par_generate_oblivious_pseudo_random_unsigned_integer_bounded(

View File

@@ -8,6 +8,7 @@ use crate::utilities::{
use criterion::{criterion_group, Criterion, Throughput};
use rand::prelude::*;
use rayon::prelude::*;
use std::cmp::max;
use std::env;
use tfhe::integer::keycache::KEY_CACHE;
use tfhe::integer::prelude::*;
@@ -66,12 +67,20 @@ fn bench_server_key_signed_binary_function_clean_inputs<F>(
});
}
BenchmarkType::Throughput => {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
// Execute the operation once to know its cost.
let ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let ct_1 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
reset_pbs_count();
binary_op(&sks, &ct_0, &ct_1);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let mut cts_0 = (0..elements)
.map(|_| cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block))
.collect::<Vec<_>>();
@@ -151,12 +160,21 @@ fn bench_server_key_signed_shift_function_clean_inputs<F>(
});
}
BenchmarkType::Throughput => {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
// Execute the operation once to know its cost.
let clear_1 = rng.gen_range(0u128..bit_size as u128);
let ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let ct_1 = cks.encrypt_radix(clear_1, num_block);
reset_pbs_count();
binary_op(&sks, &ct_0, &ct_1);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let mut cts_0 = (0..elements)
.map(|_| cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block))
.collect::<Vec<_>>();
@@ -233,12 +251,19 @@ fn bench_server_key_unary_function_clean_inputs<F>(
});
}
BenchmarkType::Throughput => {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
// Execute the operation once to know its cost.
let ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
reset_pbs_count();
unary_fn(&sks, &ct_0);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let mut cts_0 = (0..elements)
.map(|_| cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block))
.collect::<Vec<_>>();
@@ -307,12 +332,21 @@ fn signed_if_then_else_parallelized(c: &mut Criterion) {
});
}
BenchmarkType::Throughput => {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
// Execute the operation once to know its cost.
let cond = sks.create_trivial_boolean_block(rng.gen_bool(0.5));
let ct_then = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let ct_else = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
reset_pbs_count();
sks.if_then_else_parallelized(&cond, &ct_then, &ct_else);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let cts_cond = (0..elements)
.map(|_| sks.create_trivial_boolean_block(rng.gen_bool(0.5)))
.collect::<Vec<_>>();
@@ -830,12 +864,20 @@ fn bench_server_key_binary_scalar_function_clean_inputs<F, G>(
});
}
BenchmarkType::Throughput => {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
// Execute the operation once to know its cost.
let mut ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let clear_1 = rng_func(&mut rng, bit_size);
reset_pbs_count();
binary_op(&sks, &mut ct_0, clear_1);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let mut cts_0 = (0..elements)
.map(|_| cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block))
.collect::<Vec<_>>();
@@ -1328,6 +1370,7 @@ mod cuda {
use super::*;
use criterion::criterion_group;
use rayon::iter::IntoParallelRefIterator;
use std::cmp::max;
use tfhe::core_crypto::gpu::CudaStreams;
use tfhe::integer::gpu::ciphertext::boolean_value::CudaBooleanBlock;
use tfhe::integer::gpu::ciphertext::{CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext};
@@ -1335,11 +1378,12 @@ mod cuda {
/// Base function to bench a server key function that is a binary operation, input ciphertext
/// will contain only zero carries
fn bench_cuda_server_key_binary_signed_function_clean_inputs<F>(
fn bench_cuda_server_key_binary_signed_function_clean_inputs<F, G>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
binary_op: F,
binary_op_cpu: G,
) where
F: Fn(
&CudaServerKey,
@@ -1347,6 +1391,7 @@ mod cuda {
&mut CudaSignedRadixCiphertext,
&CudaStreams,
) + Sync,
G: Fn(&ServerKey, &SignedRadixCiphertext, &SignedRadixCiphertext) + Sync,
{
let mut bench_group = c.benchmark_group(bench_name);
bench_group
@@ -1401,14 +1446,22 @@ mod cuda {
});
}
BenchmarkType::Throughput => {
let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
// Execute the operation once to know its cost.
let mut ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let mut ct_1 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
reset_pbs_count();
// Use CPU operation as pbs_count do not count PBS on GPU backend.
binary_op_cpu(&cpu_sks, &mut ct_0, &mut ct_1);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, _cpu_sks) =
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
let mut cts_0 = (0..elements)
.map(|_| {
let clearlow = rng.gen::<u128>();
@@ -1460,7 +1513,7 @@ mod cuda {
}
macro_rules! define_cuda_server_key_bench_clean_input_signed_fn (
(method_name: $server_key_method:ident, display_name:$name:ident) => {
(method_name: $server_key_method:ident, method_name_cpu: $server_key_method_cpu:ident, display_name:$name:ident) => {
::paste::paste!{
fn [<cuda_ $server_key_method>](c: &mut Criterion) {
bench_cuda_server_key_binary_signed_function_clean_inputs(
@@ -1469,6 +1522,9 @@ mod cuda {
stringify!($name),
|server_key, lhs, rhs, stream| {
server_key.$server_key_method(lhs, rhs, stream);
},
|server_key_cpu, lhs, rhs| {
server_key_cpu.$server_key_method_cpu(lhs, rhs);
}
)
}
@@ -1478,13 +1534,15 @@ mod cuda {
/// Base function to bench a server key function that is a unary operation, input ciphertext
/// will contain only zero carries
fn bench_cuda_server_key_unary_signed_function_clean_inputs<F>(
fn bench_cuda_server_key_unary_signed_function_clean_inputs<F, G>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
unary_op: F,
unary_op_cpu: G,
) where
F: Fn(&CudaServerKey, &mut CudaSignedRadixCiphertext, &CudaStreams) + Sync,
G: Fn(&ServerKey, &SignedRadixCiphertext) + Sync,
{
let mut bench_group = c.benchmark_group(bench_name);
bench_group
@@ -1527,14 +1585,21 @@ mod cuda {
});
}
BenchmarkType::Throughput => {
let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
// Execute the operation once to know its cost.
let ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
reset_pbs_count();
// Use CPU operation as pbs_count do not count PBS on GPU backend.
unary_op_cpu(&cpu_sks, &ct_0);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, _cpu_sks) =
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
let mut cts_0 = (0..elements)
.map(|_| {
let clearlow = rng.gen::<u128>();
@@ -1572,7 +1637,7 @@ mod cuda {
}
macro_rules! define_cuda_server_key_bench_clean_input_signed_unary_fn (
(method_name: $server_key_method:ident, display_name:$name:ident) => {
(method_name: $server_key_method:ident, method_name_cpu: $server_key_method_cpu:ident, display_name:$name:ident) => {
::paste::paste!{
fn [<cuda_ $server_key_method>](c: &mut Criterion) {
bench_cuda_server_key_unary_signed_function_clean_inputs(
@@ -1581,6 +1646,9 @@ mod cuda {
stringify!($name),
|server_key, input, stream| {
server_key.$server_key_method(input, stream);
},
|server_key_cpu, lhs| {
server_key_cpu.$server_key_method_cpu(lhs);
}
)
}
@@ -1588,15 +1656,17 @@ mod cuda {
}
);
fn bench_cuda_server_key_binary_scalar_signed_function_clean_inputs<F, G>(
fn bench_cuda_server_key_binary_scalar_signed_function_clean_inputs<F, G, H>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
binary_op: F,
rng_func: G,
binary_op_cpu: G,
rng_func: H,
) where
F: Fn(&CudaServerKey, &mut CudaSignedRadixCiphertext, ScalarType, &CudaStreams) + Sync,
G: Fn(&mut ThreadRng, usize) -> ScalarType,
G: Fn(&ServerKey, &mut SignedRadixCiphertext, ScalarType) + Sync,
H: Fn(&mut ThreadRng, usize) -> ScalarType,
{
let mut bench_group = c.benchmark_group(bench_name);
bench_group
@@ -1650,16 +1720,24 @@ mod cuda {
});
}
BenchmarkType::Throughput => {
let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
// Execute the operation once to know its cost.
let mut ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let clear_0 = rng_func(&mut rng, bit_size);
reset_pbs_count();
// Use CPU operation as pbs_count do not count PBS on GPU backend.
binary_op_cpu(&cpu_sks, &mut ct_0, clear_0);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!(
"{bench_name}::throughput::{param_name}::{bit_size}_bits_scalar_{bit_size}"
);
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, _cpu_sks) =
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
let mut cts_0 = (0..elements)
.map(|_| {
let clearlow = rng.gen::<u128>();
@@ -1702,7 +1780,7 @@ mod cuda {
}
macro_rules! define_cuda_server_key_bench_clean_input_scalar_signed_fn (
(method_name: $server_key_method:ident, display_name:$name:ident, rng_func:$($rng_fn:tt)*) => {
(method_name: $server_key_method:ident, method_name_cpu: $server_key_method_cpu:ident, display_name:$name:ident, rng_func:$($rng_fn:tt)*) => {
::paste::paste!{
fn [<cuda_ $server_key_method>](c: &mut Criterion) {
bench_cuda_server_key_binary_scalar_signed_function_clean_inputs(
@@ -1712,6 +1790,9 @@ mod cuda {
|server_key, lhs, rhs, stream| {
server_key.$server_key_method(lhs, rhs, stream);
},
|server_key_cpu, lhs, rhs| {
server_key_cpu.$server_key_method_cpu(lhs, rhs);
},
$($rng_fn)*
)
}
@@ -1721,11 +1802,12 @@ mod cuda {
/// Base function to bench a server key function that is a binary operation for shift/rotate,
/// input ciphertext will contain only zero carries
fn bench_cuda_server_key_shift_rotate_signed_function_clean_inputs<F>(
fn bench_cuda_server_key_shift_rotate_signed_function_clean_inputs<F, G>(
c: &mut Criterion,
bench_name: &str,
display_name: &str,
binary_op: F,
binary_op_cpu: G,
) where
F: Fn(
&CudaServerKey,
@@ -1733,6 +1815,7 @@ mod cuda {
&mut CudaUnsignedRadixCiphertext,
&CudaStreams,
) + Sync,
G: Fn(&ServerKey, &SignedRadixCiphertext, &RadixCiphertext) + Sync,
{
let mut bench_group = c.benchmark_group(bench_name);
bench_group
@@ -1786,14 +1869,23 @@ mod cuda {
});
}
BenchmarkType::Throughput => {
let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
// Execute the operation once to know its cost.
let clear_1 = rng.gen_range(0u128..bit_size as u128);
let ct_0 = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let ct_1 = cks.encrypt_radix(clear_1, num_block);
reset_pbs_count();
// Use CPU operation as pbs_count do not count PBS on GPU backend.
binary_op_cpu(&cpu_sks, &ct_0, &ct_1);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, _cpu_sks) =
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
let mut cts_0 = (0..elements)
.map(|_| {
let clearlow = rng.gen::<u128>();
@@ -1843,7 +1935,7 @@ mod cuda {
}
macro_rules! define_cuda_server_key_bench_clean_input_signed_shift_rotate (
(method_name: $server_key_method:ident, display_name:$name:ident) => {
(method_name: $server_key_method:ident, method_name_cpu: $server_key_method_cpu:ident, display_name:$name:ident) => {
::paste::paste!{
fn [<cuda_ $server_key_method>](c: &mut Criterion) {
bench_cuda_server_key_shift_rotate_signed_function_clean_inputs(
@@ -1852,6 +1944,9 @@ mod cuda {
stringify!($name),
|server_key, lhs, rhs, stream| {
server_key.$server_key_method(lhs, rhs, stream);
},
|server_key_cpu, lhs, rhs| {
server_key_cpu.$server_key_method_cpu(lhs, rhs);
}
)
}
@@ -1916,14 +2011,23 @@ mod cuda {
});
}
BenchmarkType::Throughput => {
let (cks, cpu_sks) = KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
// Execute the operation once to know its cost.
let cond = cpu_sks.create_trivial_boolean_block(rng.gen_bool(0.5));
let ct_then = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
let ct_else = cks.encrypt_signed_radix(gen_random_i256(&mut rng), num_block);
reset_pbs_count();
// Use CPU operation as pbs_count do not count PBS on GPU backend.
cpu_sks.if_then_else_parallelized(&cond, &ct_then, &ct_else);
let pbs_count = max(get_pbs_count(), 1); // Operation might not perform any PBS, so we take 1 as default
bench_id = format!("{bench_name}::throughput::{param_name}::{bit_size}_bits");
let elements = throughput_num_threads(num_block);
let elements = throughput_num_threads(num_block, pbs_count);
bench_group.throughput(Throughput::Elements(elements));
bench_group.bench_function(&bench_id, |b| {
let (cks, _cpu_sks) =
KEY_CACHE.get_from_params(param, IntegerKeyKind::Radix);
let gpu_sks = CudaServerKey::new(&cks, &stream);
let cts_cond = (0..elements)
.map(|_| {
let ct_cond = cks.encrypt_bool(rng.gen::<bool>());
@@ -1997,246 +2101,291 @@ mod cuda {
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_add,
method_name_cpu: unchecked_add_parallelized,
display_name: add
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_sub,
method_name_cpu: unchecked_sub,
display_name: sub
);
define_cuda_server_key_bench_clean_input_signed_unary_fn!(
method_name: unchecked_neg,
method_name_cpu: unchecked_neg,
display_name: neg
);
define_cuda_server_key_bench_clean_input_signed_unary_fn!(
method_name: unchecked_abs,
method_name_cpu: unchecked_abs_parallelized,
display_name: abs
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_mul,
method_name_cpu: unchecked_mul_parallelized,
display_name: mul
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_div_rem,
method_name_cpu: unchecked_div_rem_parallelized,
display_name: div_mod
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_bitand,
method_name_cpu: unchecked_bitand_parallelized,
display_name: bitand
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_bitor,
method_name_cpu: unchecked_bitor_parallelized,
display_name: bitor
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_bitxor,
method_name_cpu: unchecked_bitxor_parallelized,
display_name: bitxor
);
define_cuda_server_key_bench_clean_input_signed_unary_fn!(
method_name: unchecked_bitnot,
method_name_cpu: bitnot,
display_name: bitnot
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: unchecked_rotate_left,
method_name_cpu: unchecked_rotate_left_parallelized,
display_name: rotate_left
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: unchecked_rotate_right,
method_name_cpu: unchecked_rotate_right_parallelized,
display_name: rotate_right
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: unchecked_left_shift,
method_name_cpu: unchecked_left_shift_parallelized,
display_name: left_shift
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: unchecked_right_shift,
method_name_cpu: unchecked_right_shift_parallelized,
display_name: right_shift
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_eq,
method_name_cpu: unchecked_eq_parallelized,
display_name: eq
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_ne,
method_name_cpu: unchecked_ne_parallelized,
display_name: ne
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_gt,
method_name_cpu: unchecked_gt_parallelized,
display_name: gt
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_ge,
method_name_cpu: unchecked_ge_parallelized,
display_name: ge
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_lt,
method_name_cpu: unchecked_lt_parallelized,
display_name: lt
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_le,
method_name_cpu: unchecked_le_parallelized,
display_name: le
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_min,
method_name_cpu: unchecked_min_parallelized,
display_name: min
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_max,
method_name_cpu: unchecked_max_parallelized,
display_name: max
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_signed_overflowing_add,
method_name_cpu: unchecked_signed_overflowing_add_parallelized,
display_name: overflowing_add
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: unchecked_signed_overflowing_sub,
method_name_cpu: unchecked_signed_overflowing_sub_parallelized,
display_name: overflowing_sub
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_add,
method_name_cpu: unchecked_scalar_add,
display_name: add,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_mul,
method_name_cpu: unchecked_scalar_mul_parallelized,
display_name: mul,
rng_func: mul_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_sub,
method_name_cpu: unchecked_scalar_sub,
display_name: sub,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_bitand,
method_name_cpu: unchecked_scalar_bitand_parallelized,
display_name: bitand,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_bitor,
method_name_cpu: unchecked_scalar_bitor_parallelized,
display_name: bitor,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_bitxor,
method_name_cpu: unchecked_scalar_bitxor_parallelized,
display_name: bitxor,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_right_shift,
method_name_cpu: unchecked_scalar_right_shift_parallelized,
display_name: right_shift,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_left_shift,
method_name_cpu: unchecked_scalar_left_shift_parallelized,
display_name: left_shift,
rng_func: shift_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_rotate_right,
method_name_cpu: unchecked_scalar_rotate_right_parallelized,
display_name: rotate_right,
rng_func: shift_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_rotate_left,
method_name_cpu: unchecked_scalar_rotate_left_parallelized,
display_name: rotate_left,
rng_func: shift_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_eq,
method_name_cpu: unchecked_scalar_eq_parallelized,
display_name: eq,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_ne,
method_name_cpu: unchecked_scalar_ne_parallelized,
display_name: ne,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_gt,
method_name_cpu: unchecked_scalar_gt_parallelized,
display_name: gt,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_ge,
method_name_cpu: unchecked_scalar_ge_parallelized,
display_name: ge,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_lt,
method_name_cpu: unchecked_scalar_lt_parallelized,
display_name: lt,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_le,
method_name_cpu: unchecked_scalar_le_parallelized,
display_name: le,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_min,
method_name_cpu: unchecked_scalar_min_parallelized,
display_name: min,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_scalar_max,
method_name_cpu: unchecked_scalar_max_parallelized,
display_name: max,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: signed_overflowing_scalar_add,
method_name_cpu: signed_overflowing_scalar_add_parallelized,
display_name: overflowing_add,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: signed_overflowing_scalar_sub,
method_name_cpu: signed_overflowing_scalar_sub_parallelized,
display_name: overflowing_sub,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: unchecked_signed_scalar_div_rem,
method_name_cpu: unchecked_signed_scalar_div_rem_parallelized,
display_name: div_rem,
rng_func: div_scalar
);
@@ -2247,234 +2396,277 @@ mod cuda {
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: add,
method_name_cpu: add_parallelized,
display_name: add
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: sub,
method_name_cpu: sub_parallelized,
display_name: sub
);
define_cuda_server_key_bench_clean_input_signed_unary_fn!(
method_name: neg,
method_name_cpu: neg_parallelized,
display_name: neg
);
define_cuda_server_key_bench_clean_input_signed_unary_fn!(
method_name: abs,
method_name_cpu: abs_parallelized,
display_name: abs
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: mul,
method_name_cpu: mul_parallelized,
display_name: mul
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: div_rem,
method_name_cpu: div_rem_parallelized,
display_name: div_mod
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: bitand,
method_name_cpu: bitand_parallelized,
display_name: bitand
);
define_cuda_server_key_bench_clean_input_signed_unary_fn!(
method_name: bitnot,
method_name_cpu: bitnot,
display_name: bitnot
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: bitor,
method_name_cpu: bitor_parallelized,
display_name: bitor
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: bitxor,
method_name_cpu: bitxor_parallelized,
display_name: bitxor
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: rotate_left,
method_name_cpu: rotate_left_parallelized,
display_name: rotate_left
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: rotate_right,
method_name_cpu: rotate_right_parallelized,
display_name: rotate_right
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: left_shift,
method_name_cpu: left_shift_parallelized,
display_name: left_shift
);
define_cuda_server_key_bench_clean_input_signed_shift_rotate!(
method_name: right_shift,
method_name_cpu: right_shift_parallelized,
display_name: right_shift
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: eq,
method_name_cpu: eq_parallelized,
display_name: eq
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: ne,
method_name_cpu: ne_parallelized,
display_name: ne
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: gt,
method_name_cpu: gt_parallelized,
display_name: gt
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: ge,
method_name_cpu: ge_parallelized,
display_name: ge
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: lt,
method_name_cpu: lt_parallelized,
display_name: lt
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: le,
method_name_cpu: le_parallelized,
display_name: le
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: min,
method_name_cpu: min_parallelized,
display_name: min
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: max,
method_name_cpu: max_parallelized,
display_name: max
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: signed_overflowing_add,
method_name_cpu: signed_overflowing_add_parallelized,
display_name: overflowing_add
);
define_cuda_server_key_bench_clean_input_signed_fn!(
method_name: signed_overflowing_sub,
method_name_cpu: signed_overflowing_sub_parallelized,
display_name: overflowing_sub
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_add,
method_name_cpu: scalar_add_parallelized,
display_name: add,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_mul,
method_name_cpu: scalar_mul_parallelized,
display_name: mul,
rng_func: mul_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_sub,
method_name_cpu: scalar_sub_parallelized,
display_name: sub,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_bitand,
method_name_cpu: scalar_bitand_parallelized,
display_name: bitand,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_bitor,
method_name_cpu: scalar_bitor_parallelized,
display_name: bitor,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_bitxor,
method_name_cpu: scalar_bitxor_parallelized,
display_name: bitxor,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_left_shift,
method_name_cpu: scalar_left_shift_parallelized,
display_name: left_shift,
rng_func: shift_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_right_shift,
method_name_cpu: scalar_right_shift_parallelized,
display_name: right_shift,
rng_func: shift_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_rotate_left,
method_name_cpu: scalar_rotate_left_parallelized,
display_name: rotate_left,
rng_func: shift_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_rotate_right,
method_name_cpu: scalar_rotate_right_parallelized,
display_name: rotate_right,
rng_func: shift_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_eq,
method_name_cpu: scalar_eq_parallelized,
display_name: eq,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_ne,
method_name_cpu: scalar_ne_parallelized,
display_name: ne,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_gt,
method_name_cpu: scalar_gt_parallelized,
display_name: gt,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_ge,
method_name_cpu: scalar_ge_parallelized,
display_name: ge,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_lt,
method_name_cpu: scalar_lt_parallelized,
display_name: lt,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_le,
method_name_cpu: scalar_le_parallelized,
display_name: le,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_min,
method_name_cpu: scalar_min_parallelized,
display_name: min,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: scalar_max,
method_name_cpu: scalar_max_parallelized,
display_name: max,
rng_func: default_signed_scalar
);
define_cuda_server_key_bench_clean_input_scalar_signed_fn!(
method_name: signed_scalar_div_rem,
method_name_cpu: signed_scalar_div_rem_parallelized,
display_name: div_rem,
rng_func: div_scalar
);
@@ -2697,6 +2889,7 @@ use cuda::{
cuda_cast_ops, default_cuda_dedup_ops, default_cuda_ops, default_scalar_cuda_ops,
unchecked_cuda_ops, unchecked_scalar_cuda_ops,
};
use tfhe::{get_pbs_count, reset_pbs_count};
#[cfg(feature = "gpu")]
fn go_through_gpu_bench_groups(val: &str) {

Some files were not shown because too many files have changed in this diff Show More