Compare commits

...

301 Commits

Author SHA1 Message Date
David Testé
516f001056 DEBUG: run bench with minimal parsing 2025-08-13 15:23:28 +02:00
Nicolas Sarlin
5fa8cc8563 fix(core): use of deprecated rayon repeatn 2025-08-13 15:00:15 +02:00
Arthur Meyre
a7dd071bd4 test(shortint): pbs 128 + compression test with new noise measurement 2025-08-13 09:16:49 +02:00
Arthur Meyre
eb6760a7c8 feat(core): add a primitive to build an LweCiphertextList from an Iterator 2025-08-13 09:16:49 +02:00
Arthur Meyre
7f0838270c chore(core): relax trait requirements for GLWE encryption/decryption 2025-08-13 09:16:49 +02:00
Arthur Meyre
1169096058 chore: fix whitespace in Makefile 2025-08-13 09:16:49 +02:00
Antoniu Pop
9316922e81 fix(benches): fix hlapi dex benchmark transfer function 2025-08-12 17:28:40 +01:00
dependabot[bot]
8ff73f7d73 chore(deps): bump actions/cache from 4.2.3 to 4.2.4
Bumps [actions/cache](https://github.com/actions/cache) from 4.2.3 to 4.2.4.
- [Release notes](https://github.com/actions/cache/releases)
- [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md)
- [Commits](5a3ec84eff...0400d5f644)

---
updated-dependencies:
- dependency-name: actions/cache
  dependency-version: 4.2.4
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-12 15:44:29 +02:00
dependabot[bot]
0c3bda3444 chore(deps): bump actions/download-artifact from 4.3.0 to 5.0.0
Bumps [actions/download-artifact](https://github.com/actions/download-artifact) from 4.3.0 to 5.0.0.
- [Release notes](https://github.com/actions/download-artifact/releases)
- [Commits](d3f86a106a...634f93cb29)

---
updated-dependencies:
- dependency-name: actions/download-artifact
  dependency-version: 5.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-12 15:44:22 +02:00
luory ✞
55eade03e6 chore: fix typo in comment section 2025-08-12 15:38:37 +02:00
David Testé
52b1946f25 chore(ci): use crates.io trusted publishing feature 2025-08-12 12:54:57 +02:00
Nicolas Sarlin
bc5c2f51ff fix(bench): store correct pfail from params 2025-08-12 09:44:37 +02:00
Enzo Di Maria
e5e54be4a4 refactor(gpu): moving unchecked_ilog2_async to the backend 2025-08-12 09:05:29 +02:00
Nicolas Sarlin
0aaadf04d9 chore(versionable): bump version to 0.6.1 2025-08-11 16:49:27 +02:00
Mayeul@Zama
4d1b917045 feat(shortint): add multibit noise squashing 2025-08-11 16:30:59 +02:00
Mayeul@Zama
a85b30a7b2 refactor(shortint): change NoiseSquashingPrivateKeyView fields 2025-08-11 16:30:59 +02:00
Mayeul@Zama
81fa0e43ee feat(core): add conformance for Fourier128LweMultiBitBootstrapKey 2025-08-11 16:30:59 +02:00
Mayeul@Zama
15bc0c6792 style(core): destructure conformance parameters 2025-08-11 16:30:59 +02:00
Mayeul@Zama
8b5de6d57d feat(core): add support for InputScalar!=OutputScalar for multi_bit_bootstrap 2025-08-11 16:30:59 +02:00
Nicolas Sarlin
54c6b9e50a feat(versionable): impl Versionize for Btree{Map, Set} 2025-08-11 13:47:27 +02:00
Arthur Meyre
e31333b2c7 feat: add missing into/from_raw_parts functions for compressed KSK material 2025-08-11 13:02:21 +02:00
Arthur Meyre
37ed32cf4f chore: fix typo in into_raw_parts function 2025-08-11 13:02:21 +02:00
Guillermo Oyarzun
4a3be71bd7 fix(gpu): create message extract lut only when needed 2025-08-11 10:38:31 +02:00
Arthur Meyre
a63207af9e chore(ci): add MSRV build to check we are compliant with what we announce
- have to downgrade param_dedup edition as 1.84 cannot handle 2024 in a
workspace
2025-08-08 18:06:29 +02:00
Arthur Meyre
4c4c7a47a5 chore(ci): remove old backward compat mechanism for branch fetching
- nowadays backward compat data is directly in the repo which made the old
mechanism obsolete
2025-08-08 18:06:29 +02:00
Arthur Meyre
dbc3924989 chore(ci): enable extended types in the docs.rs build 2025-08-08 18:06:29 +02:00
Arthur Meyre
04d4ccc16c chore(ci): remove TFHE_SPEC from Makefile
- this is a leftover from a complicated attempt at backward compatibility
no need to keep this
2025-08-08 18:06:29 +02:00
Arthur Meyre
9d4a9fe71e chore: check packing is possible before packing in integer noise squashing 2025-08-08 10:35:16 +02:00
David Testé
3b42f9873a chore(bench): write params to file for each zk benchmark on gpu
To be parsable each benchmark criterion ID must have their crypto
details written to a file.
2025-08-07 15:17:33 +02:00
pgardratzama
afd8f58a8d feat(hpu): update backend to support multiple V80 device, id of v80 is its serial number
- update psi64 to replace fw with stable version (3.1.0), remove psi16.hpu
2025-08-07 14:58:39 +02:00
Guillermo Oyarzun
1b92bcf476 feat(gpu): extra optimizations for 2_2 params kernels and bugs fixes 2025-08-07 09:34:32 +02:00
Guillermo Oyarzun
79d5db66d4 feat(gpu): use warp level optimizations for fft 2025-08-07 09:34:32 +02:00
Guillermo Oyarzun
d741e55218 feat(gpu): write specialized pbs keybundle for 2_2 params 2025-08-07 09:34:32 +02:00
Guillermo Oyarzun
ef5a391dc2 feat(gpu): write specialized pbs accumulate for 2_2 params 2025-08-07 09:34:32 +02:00
Enzo Di Maria
d1c417bf71 refactor(gpu): cleaning compression 2025-08-07 09:31:55 +02:00
Arthur Meyre
46a7229c81 chore: fix minimum version for cargo check
- this only works if the current major is the major we expect
2025-08-05 17:30:07 +02:00
Enzo Di Maria
852a06b330 refactor(gpu): orpf with grouped processing and for multi-gpu 2025-08-05 09:58:25 +02:00
Guillermo Oyarzun
ea200c3548 chore(gpu): enable nvidia mps in long run tests 2025-08-04 16:18:48 +02:00
Arthur Meyre
1a1b88362c chore: fix noise checks timeout again as there are TWO timeout locations 2025-08-04 09:48:39 +02:00
Mayeul@Zama
fe2dde0e0c chore(gpu): fix index type 2025-08-01 10:38:09 +02:00
Afounso Souza
e7e095b924 fix(gpu): fix typo
fix(gpu): fix typo
2025-08-01 10:21:54 +02:00
Andrei Stoian
7bf2ec6ff2 chore(gpu): fix warnings detection 2025-07-31 18:47:08 +02:00
Agnes Leroy
2d7e1b2293 chore(gpu): change active gpu count logic 2025-07-31 16:10:45 +01:00
Andrei Stoian
79aeeca3b2 fix(gpu): install gcc requested by the workflow 2025-07-31 15:52:39 +01:00
Andrei Stoian
e89d2f8b05 fix(gpu): install gcc requested by the workflow 2025-07-31 15:52:39 +01:00
Andrei Stoian
0b3ea4be9e fix(gpu): install gcc requested by the workflow 2025-07-31 15:52:39 +01:00
Andrei Stoian
68a7520e73 fix(gpu): install gcc requested by the workflow 2025-07-31 15:52:39 +01:00
Guillermo Oyarzun
a411e5720d fix(gpu): update soon deprecated version nvtx 2025-07-31 16:52:05 +02:00
Mayeul@Zama
5ee5569d0d chore: remove redundant gpu feature gate 2025-07-31 14:11:33 +01:00
Agnes Leroy
54d038ef30 chore(gpu): enhance scatter to check gpu count is ok 2025-07-31 13:11:52 +01:00
Agnes Leroy
b6e6abb066 chore(gpu): add corner case test for mul 2025-07-31 13:11:29 +01:00
Arthur Meyre
82a5cc7f2d chore(ci): increase timeout for noise checks 2025-07-31 12:00:15 +02:00
Guillermo Oyarzun
908922171d fix(gpu): remove unused pointer in squash and add some extra checks 2025-07-31 09:52:34 +01:00
Kendra Karol Sevilla
84f6a8082d fix(cuda): correct radix block mismatch check in LWE array validation 2025-07-31 09:28:48 +01:00
otc group
0bc59dca59 chore: fix typo in comment
chore: fix typo in comment
2025-07-31 09:20:49 +01:00
Agnes Leroy
09ffc39b15 fix(gpu): fix inconsistent types 2025-07-31 08:14:45 +01:00
swarnabhasinha
099345df02 fix(api): Add min/max on owned types 2025-07-30 21:33:58 +02:00
Agnes Leroy
48c10e91f7 chore(gpu): fix gpu setup action for ci 2025-07-30 14:22:13 +01:00
Andrei Stoian
36eceaf05e feat(gpu): utility debug workflows in ci 2025-07-30 12:55:40 +01:00
Arthur Meyre
e8986cbd7c chore: setup CI for noise checks 2025-07-29 15:29:24 +02:00
Arthur Meyre
fc8063a59b test: add noise simulation framework for basic operators
- test secret key encryption + start of compute atomic pattern in shortint
- only supports classic PBS with drift mitigation currently
2025-07-29 15:29:24 +02:00
Arthur Meyre
65b034ef70 chore(core): update noise formulas 2025-07-29 15:29:24 +02:00
cryptoraph
2aa83c99ea fix(core_crypto): correct typo in GLWE keyswitch assertion message 2025-07-28 17:09:56 +02:00
cryptoraph
d78266e141 fix(cuda): correct typo in keyswitch error message 2025-07-28 17:09:56 +02:00
Pedro Alves
62e6504ef0 fix(gpu): fixes some wrong indexes used in cuda_set_device() 2025-07-28 12:43:13 +01:00
bigbear
209a8f1ad9 fix: correct typo in InvalidRangeError message 2025-07-25 17:18:57 +02:00
Guillermo Oyarzun
3621dd1ae7 chore(gpu): correct pfail in readme 2025-07-25 16:46:03 +02:00
Guillermo Oyarzun
b5a7199c15 chore(gpu): update cuda version in ci 2025-07-25 15:57:25 +02:00
Mayeul@Zama
09aaa4e045 chore: enable unused_imports lint in doctests 2025-07-23 11:09:23 +02:00
Mayeul@Zama
63203c58aa refactor(shortint): cleanup decompression key 2025-07-23 10:08:01 +02:00
Mayeul@Zama
03f8a134b3 chore: remove unused gitignore entry 2025-07-21 10:15:56 +02:00
Maximilian Hubert
981da1d3fc fix: fix typo 2025-07-18 18:16:52 +02:00
Nicolas Sarlin
0386090048 chore: missing from/into_raw_parts for noise squash comp priv key 2025-07-18 13:34:23 +02:00
Pedro Alves
7ecda32b41 fix(gpu): refactor broadcast_lut() so make it less error prone 2025-07-17 10:58:52 +01:00
tmontaigu
a4cfc12941 chore: add missing into/from_raw_parts for SQCompression 2025-07-17 11:20:30 +02:00
Alex Pikme
82b6f45785 fix: correct tuple element in izip! macro for 7 parameters 2025-07-17 10:54:45 +02:00
tmontaigu
82cc9d3884 docs: add UpgradeKeyChain 2025-07-16 17:57:10 +02:00
Agnes Leroy
c7785b7214 chore(gpu): add checks on noise levels in debug mode 2025-07-16 11:00:09 +01:00
Enzo Di Maria
a5c876fdac refactor(gpu): creating CudaScalarDivisorFFI for storing decomposed scalars and their metadata 2025-07-16 07:59:20 +01:00
Nicolas Sarlin
2d8ea2de16 feat(shortint): add pbs_order method to AtomicPatternKind 2025-07-15 17:35:47 +02:00
Andrei Stoian
494e0e0601 chore(gpu): add short op sequence test for GPU on PRs 2025-07-15 16:03:45 +02:00
tmontaigu
8c838da209 chore(integer): improve measurements
It seems that in
```rust
bench_group.bench_function(&bench_id, |b| {
  // some code
  b.iter(|| {
      // function to bench
  })
});
```
If we put code in the '// some code' part, it affects the measurements
the slower this code is the worse the measurements can be.

For many operations the gap is small (a few ms or no gap),
but for the division the gap was around 500ms.

So to reduce this, we move out what we can, moving
the keycache access is the most important aspect as it
cost around 70ms to 100ms.

A LazyCell is used in order only access the keycache is the bench is not
filtered out. Which is the behaviour we had before this commit, and the
behaviour we want to keep so that running specific benches via regex
selection stay fast.

Also, for clean input benches, we use `iter` instead of `iter_batched`
as it makes more sense and should give more accurate results as
iter_batched timing include other things that just the timing of the
function.
2025-07-15 12:46:38 +02:00
tmontaigu
c13587b713 fix(integer): fix non-parallel prop with noisy block 2025-07-15 12:43:41 +02:00
tmontaigu
8dea5cf145 feat(integer): truncate carry prop on trivial zeros
This changes the full_propagate_parallelized to not propagate
most significant blocks which are trivial zeros.

This is a small performance improvement, especially interesting
when having a bunch of FheUintX data, casted to FheUintY (Y > X)
and summing them (e.g. n FheUint2, casted to FheUint32  and doing the
sum to get the result on 32 bit)
2025-07-15 12:43:41 +02:00
Agnes Leroy
0d41b4f445 chore(gpu): add bench command for cuda and update weekly bench 2025-07-11 14:04:32 +01:00
Agnes Leroy
068cbc0f41 chore(gpu): add hl api noise squash latency and throughput bench 2025-07-11 14:04:32 +01:00
Agnes Leroy
f8947ddff3 chore(gpu): remove nightly schedule now that ci is lighter 2025-07-11 12:43:36 +01:00
Pedro Alves
1b98312e2c fix(gpu): fix regression on ERC20 throughput
- partially revert changes done in fd79c4f972
- transfers for the GPU case should be measured using sequential
  operations (without rayon!)
2025-07-11 08:57:19 +01:00
Pedro Alves
d3dd010deb fix(gpu): reduces number of elements in the ZK throughput benchmark 2025-07-11 08:57:01 +01:00
Agnes Leroy
15762623d1 chore(gpu): minor refactor in sum ctxt 2025-07-10 16:24:02 +01:00
Beka Barbakadze
c6865ab880 fix(gpu): fix pbs128 multi-gpu bug
Signed-off-by: Beka Barbakadze <beka.barbakadze@zama.ai>
2025-07-10 15:54:27 +01:00
Enzo Di Maria
e376df2fa4 refactor(gpu): moving unsigned_scalar_div_rem and signed_scalar_div_rem to the backend 2025-07-10 09:24:13 +02:00
Arthur Meyre
bd739c2d48 chore(docs): uniformize paths in docs to use "-" instead of "_"
- this is to avoid conflicts with gitbook
2025-07-09 14:36:04 +02:00
Pedro Alves
9960f5e8b6 fix(gpu): Fix expand bench on multi-gpus 2025-07-09 09:17:55 +01:00
Nicolas Sarlin
776f08b534 chore(ci): remove close_data_pr workflow 2025-07-09 09:31:29 +02:00
David Testé
ac13eed3b1 chore(ci): allow git lfs sync between repositories
Since integration of HPU backend, some Git LFS references need to be synced along with the rest of the codebase. The usage of valtech-sd/git-sync action, which is a fork of wei/git-sync, allows to push git lfs reference to another repository.
2025-07-09 09:07:48 +02:00
Arthur Meyre
17d3a492b6 chore: only run backward compat clippy on x86 machines
- older versions of the crates are only compilable with x86, disable on arm
for now
- revisit when the crates are split ?
2025-07-09 08:29:12 +02:00
Enzo Di Maria
ba87f1ba5e chore(gpu): removing useless arguments 2025-07-08 16:17:51 +02:00
Nicolas Sarlin
c70ad3374e chore(ci): allow workflows to run concurrently on main 2025-07-08 09:57:25 +02:00
Nicolas Sarlin
c7ec835e5f chore: adds params_to_file for noise squashing compression 2025-07-07 17:31:28 +02:00
Agnes Leroy
075b2259d3 chore(gpu): reduce ci time by reducing testing of unused parameters 2025-07-07 16:30:35 +01:00
Pedro Alves
23ebd42209 fix(gpu): fix compression throughput benchmark 2025-07-07 16:30:24 +01:00
Nicolas Sarlin
bb1ff363d3 chore(ci): use Cargo.lock for installed tools 2025-07-07 13:10:55 +02:00
Nicolas Sarlin
7bcd6b94da chore: use script to pull hpu files 2025-07-07 13:10:55 +02:00
Nicolas Sarlin
57cbab9fe1 chore(backward): integrate backward compat data
Code is taken from
59a6179831

Adapted to make ci work
2025-07-07 13:10:55 +02:00
Andrei Stoian
97ce0f6ecf feat(gpu): update GPU documentation 2025-07-07 09:44:43 +02:00
Nicolas Sarlin
b6c21ef1fe docs: describe noise squashed compression 2025-07-07 09:32:51 +02:00
Nicolas Sarlin
e599608831 chore(shortint): make decrypt_no_decode public 2025-07-07 09:30:14 +02:00
Arthur Meyre
f243491442 chore(docs): add features to the rust_configuration page 2025-07-04 17:06:15 +02:00
Arthur Meyre
b5248930a2 chore(docs): add handbook in explanation section 2025-07-04 17:06:15 +02:00
Arthur Meyre
2d280d98d2 chore(docs): add handbook in the security and cryptography section 2025-07-04 17:06:15 +02:00
Arthur Meyre
10b57f8a8e chore(docs): add link to GPU and HPU backend docs in the installation page 2025-07-04 17:06:15 +02:00
Arthur Meyre
242df05eb2 chore(docs): add links to GPU and HPU backend on front page 2025-07-04 17:06:15 +02:00
Arthur Meyre
899d4a7750 docs: add noise squashing documentation 2025-07-04 16:08:25 +02:00
Agnes Leroy
48dfeb21dc chore(gpu): refactor size tracker to avoid future bugs 2025-07-04 14:37:02 +01:00
Skylar Ray
a46ce3fb51 chore: fix typo in classic.rs 2025-07-04 13:33:15 +02:00
Arthur Meyre
192777bde6 chore(ci): handle unverified PRs to autoclose 2025-07-04 13:18:35 +02:00
Dmitry
3aa198311c fix: broken GPU arg due to typo 2025-07-04 11:04:14 +01:00
David Testé
7034d4ceb4 doc(bench): update benchmark results tables
All the results are using parameters set with p-fail of 2**-128.
CPU tables using parameters set with p-fail 2**-64  are removed.
GPU tables for 1xH100 and 2xH100 are now replace with the new
hardware standard: 8xH100-SXM5.
HPU results are added to the backend comparison table and integrate
latest operations available.
2025-07-04 10:06:14 +02:00
Arthur Meyre
799ae92f59 chore: remove dead link from docs 2025-07-04 10:04:22 +02:00
Arthur Meyre
36e9371fdf test: use hamming weight = 1/2 for core noise tests
- allows to have less variability and matches exactly what the noise
formulas expect for uniform binary secret keys
2025-07-04 09:55:35 +02:00
Pedro Alves
8c88678ee8 feat(gpu): implement 128-bit multi-bit PBS 2025-07-03 20:34:32 -03:00
leopardracer
e1beea5ecb chore: Update test_user_docs.rs 2025-07-03 20:08:13 +02:00
Agnes Leroy
701411044b chore(gpu): update SXM5 cost 2025-07-03 17:00:02 +01:00
JJ-hw
405fdec6b9 fix(hpu): Fix iop_propagate_msb_to_lsb_blockv: propagation in application was not done correctly 2025-07-03 14:31:59 +02:00
Agnes Leroy
b3355e2b2f chore(gpu): remove template from sum ciphertexts, add two missing delete 2025-07-03 12:51:29 +01:00
Agnes Leroy
e4d856afdf chore(gpu): update noise squashing parameters 2025-07-03 12:51:19 +01:00
Pedro Alves
22ddba7145 fix(gpu): refactor the (128-bit and regular) classical PBS entry point to remove the num_samples parameter
- fixes the throughput for those PBSs
- also fixes the throughput benchmark for regular PBSs
2025-07-03 08:23:09 -03:00
David Testé
d955696fe0 chore(bench): reduce number of bit sizes to benchmark
This is done to reduce execution time since 4 bits precision is not useful to measure.
2025-07-03 12:45:02 +02:00
Baptiste Roux
eb0b9643bb fix(hpu): Fix clippy_hpu_mockup makefile entry 2025-07-03 10:28:52 +02:00
Arthur Meyre
d68305e984 chore: change link to point to the FHE.org discord for support 2025-07-03 10:28:10 +02:00
Enzo Di Maria
3d64316c66 refactor(gpu): moving signed_scalar_div_async and get_signed_scalar_div to the backend 2025-07-03 08:52:04 +01:00
Agnes Leroy
4bba35e926 chore(gpu): remove m3_c3 & gf 3 params from multi-gpu tests to reduce ci time 2025-07-02 17:18:26 +01:00
Baptiste Roux
187159d9f9 chore(hpu): bump backend version 2025-07-02 17:31:45 +02:00
Nicolas Sarlin
0cf9f9f3bd chore(zk): bump tfhe-zk-pok to 0.7.0 2025-07-02 17:31:02 +02:00
tmontaigu
dcb6049441 chore: backward data test for CompressedSquashedNoiseCiphertextList 2025-07-02 16:51:05 +02:00
tmontaigu
7203cc3564 feat(hlapi): add CompressedSquashedNoiseCiphertextList 2025-07-02 16:51:05 +02:00
Agnes Leroy
b198c18498 chore(gpu): bump backend version 2025-07-02 15:34:10 +01:00
pgardratzama
916e6e6a61 chore(hpu): fix typo in comment of Event implementation
Co-authored-by: emmmm <155267286+eeemmmmmm@users.noreply.github.com>
2025-07-02 15:32:57 +02:00
pgardratzama
9ac776185a doc(hpu): fix spelling issue in data_versioning.md
Co-authored-by: futreall <86553580+futreall@users.noreply.github.com>
2025-07-02 15:32:57 +02:00
pgardratzama
28e44ca237 doc(hpu): Fix link to FPGA repository in the README
Co-authored-by: MozirDmitriy <dmitriymozir@gmail.com>
2025-07-02 15:32:57 +02:00
Baptiste Roux
6432b98591 chore(mockup): Add clippy target for tfhe_hpu_mockup
Also fix all clippy lint
2025-07-02 14:41:41 +02:00
Helder Campos
15cce9f641 fix(hpu): Fixing the llt scheduler
In RTL simulations, it is possible that a very strange HPU with huge
amount of batches and very little registers is randomized. In this case,
if the scheduler was configured to fill the batch before flushing, it
would run out of registers. The solution is to force flush in this
scenario.
2025-07-02 14:41:41 +02:00
Baptiste Roux
5090e9152b chore: Revert "chore: allow to not perform the half case correction for mean compensation"
This reverts commit 00ffa3efdc.
2025-07-02 14:41:41 +02:00
Baptiste Roux
24572edb1c feat(hpu): Add support for centered modswitch.
Add new field in HpuPBSParameters (log2_pfail and modulus_switch_type).
Also add new parameters set definition in shortint for benchmark matching.

Remove the used of use_mean_compensation register, this information is now embedded inside the parameters set definition.
Update psi64.hpu archive with newest bitstream
2025-07-02 14:41:41 +02:00
Helder Campos
303f67fe11 fix(hpu): Fixing the multiplication algorithm in LLT
It was failing before for nu > 5. Also corrected the initial degree
after the partial products, which decreases the number of PBSs to do
with nu > 5.
2025-07-02 14:41:41 +02:00
Arthur Meyre
86a40bcea9 chore: move gated import to section with feature gate in HL erc20 bench 2025-07-02 13:14:31 +02:00
Agnes Leroy
97c0290ff7 fix(gpu): revert avoid copy to host in sum ciphertexts
This reverts commit 2b57fc7bd8.
2025-07-02 08:30:12 +01:00
Agnes Leroy
3ba6a72166 chore(gpu): move sum ctxt lut allocation to host to save memory 2025-07-02 08:30:12 +01:00
tmontaigu
dbd158c641 feat(integer): add CompressedSquashedNoiseCiphertextList 2025-07-02 08:51:26 +02:00
Nicolas Sarlin
0a738c368a chore(backward): update backward data repo branch 2025-07-01 14:18:10 +02:00
Arthur Meyre
4325da72cf chore: allow to not perform the half case correction for mean compensation 2025-07-01 14:18:10 +02:00
Mayeul@Zama
e1620d4087 feat(shortint): add support for centered modulus switch in parameters 2025-07-01 14:18:10 +02:00
Mayeul@Zama
6805778cb8 feat: add centered modulus switch 2025-07-01 14:18:10 +02:00
Mayeul@Zama
802945fa52 feat(core): add missing APIs 2025-07-01 14:18:10 +02:00
Mayeul@Zama
fff86fb3b4 fix: fix feature gate 2025-07-01 14:18:10 +02:00
Nicolas Sarlin
950915a108 chore(ci): use the correct data branch in clippy_ws_tests 2025-07-01 14:18:10 +02:00
Andrei Stoian
5e6562878a chore(gpu): add cuda debug target for integer tests 2025-07-01 10:37:17 +02:00
Andrei Stoian
d0743e9d3d chore(gpu): refactor the gpu oom checker 2025-07-01 10:37:05 +02:00
Guillermo Oyarzun
981083360e feat(gpu): increase keyswitch occupancy 2025-07-01 09:54:14 +02:00
tmontaigu
848f9d165c feat: add upgrade key chain
This adds an UpgradeKeyChain struct
that can be used to easily upgrade parameters of ciphertexts
if some some upgrade keys are provided
2025-07-01 09:37:16 +02:00
Beka Barbakadze
2b57fc7bd8 feat(gpu): Avoid copy to host in sum ciphertexts 2025-07-01 07:58:09 +01:00
Andrei Stoian
e3d90341cf chore(gpu): add abs to random op sequence test on GPU 2025-06-30 21:37:09 +02:00
Nicolas Sarlin
dd94d6f823 feat(zk)!: allow to forbid specific configs in zk conformance
BREAKING CHANGE:
- conformance for `CompactPkeProof` is now `CompactPkeProofConformanceParams`
- conformance for `shortint::ciphertext::zk::ProvenCompactCiphertextList` is now
	`ProvenCompactCiphertextListConformanceParams`
2025-06-30 18:05:27 +02:00
Helder Campos
25362b2db2 feat(hpu): Adding support for modulus switch mean compensation
Including the pfail 2e-128 parameter set.

Note: The HPU mockup still does not support mean compensation.
2025-06-30 16:01:39 +01:00
Arthur Meyre
fe5542f39e chore: add SLSA badge
Co-authored-by: Olexandr88 <radole1203@gmail.com>
2025-06-30 15:48:55 +02:00
Agnes Leroy
42112c53c2 chore(gpu): restore mul mem usage 2025-06-30 09:10:54 +01:00
Agnes Leroy
bc2e595cf5 fix(gpu): fix size tracker value 2025-06-27 17:12:11 +01:00
Enzo Di Maria
378b84946f refactor(gpu): moving get_scalar_div_size_on_gpu to backend and fixing gpu tests 2025-06-27 17:02:50 +02:00
Enzo Di Maria
8a4c5ba8ef refactor(gpu): moving unchecked_scalar_div_async to backend 2025-06-27 17:02:50 +02:00
Nicolas Sarlin
940a9ba860 chore(zk): enable tfhe-lints on zk pok 2025-06-27 14:34:25 +02:00
Nicolas Sarlin
c475dc058e feat(zk): add compact hash mode for zkv2 2025-06-27 14:34:25 +02:00
Arthur Meyre
215ded90c0 chore: make multi bit pbs 128 more flexible 2025-06-20 17:15:11 +02:00
Agnes Leroy
8a2d93aaa8 fix(gpu): compression memory check bug, size computation was incorrect 2025-06-20 15:45:01 +02:00
Arthur Meyre
5a48483247 fix(shortint): wrong LweDimension returned by prf multibit mod switched ct
- added multi bit param to uniformity PRF check
2025-06-20 12:08:19 +02:00
pgardratzama
702989f796 fix(hpu): it seems transfer_safe is not totally safe with HPU 2025-06-20 10:04:16 +02:00
pgardratzama
cb1e298ebe chore(hpu): modify workflow to fetch & pull bitstreams using to get git-lfs 2025-06-20 10:04:16 +02:00
Baptiste Roux
a271cedb05 fix(hpu): Remove some hardcoded filename in tandem
Also enhance error handling related to user misconfiguration.
And remove a bug with ami devn reading
2025-06-20 09:04:22 +02:00
Arthur Meyre
9eb0e831f5 chore: fix use proper parameter for wasm bench
javascript and their nonsensical fallbacks be damned to eternal suffering
2025-06-19 19:34:04 +02:00
Enzo Di Maria
7e4abfa4ff refactor(gpu): moving extend_radix_with_sign_msb_async to backend 2025-06-19 14:51:02 +02:00
Nicolas Sarlin
ce7c15585e chore(zk): refactor hashes to reuse code between proof and verify 2025-06-19 13:48:20 +02:00
Nicolas Sarlin
58f7457660 chore(zk): rename verify_inner to verify_impl to match the proof 2025-06-19 13:48:20 +02:00
David Testé
2d224e75a1 chore(ci): set pull-requests permission to write in commit checks
This is mandatory according to the action documentation,
notably to be able to write issue comment within the pull-request.
2025-06-19 13:45:44 +02:00
Agnes Leroy
e5a9145cce fix(gpu): fix perf regression introduced in 1936ec6d84 2025-06-19 13:34:36 +02:00
tmontaigu
f5f7213289 feat: improve division for 2_2 parameters
The improvement is to compute the quotient digit by digit and
not bit by bit.

This could also probably work for 3_3 and 4_4 but it is not a priority

This brings the 64bits division down to ~5.5s from 8.6s
2025-06-19 13:03:40 +02:00
tmontaigu
b917cf4530 feat(core): plug XofSeed 2025-06-19 09:57:29 +02:00
Mayeul@Zama
1873b627d6 chore: add TODO for Glwe MS 2025-06-18 16:54:12 +02:00
Mayeul@Zama
cb8d753ea6 refactor(core): cleanup unused function 2025-06-18 16:54:12 +02:00
Mayeul@Zama
88e8fa6da9 refactor(core): separate BR and MS 2025-06-18 16:54:12 +02:00
Mayeul@Zama
0ea7c29dbd refactor(core): separate BR and MS 2025-06-18 16:54:12 +02:00
Mayeul@Zama
d90bd8bf89 feat(core): add grouping_factor to MultiBitModulusSwitchedCt trait 2025-06-18 16:54:12 +02:00
Mayeul@Zama
bf5e4474a2 feat(core): add ModulusSwitchedCt trait 2025-06-18 16:54:12 +02:00
Mayeul@Zama
7fd5321b78 refactor(core): std_multi_bit_blind_rotate_assign takes msed input 2025-06-18 16:54:12 +02:00
Mayeul@Zama
c168dea284 refactor(core): rename MultiBitModulusSwitchedCt MultiBitModulusSwitchedLweCiphertext 2025-06-18 16:54:12 +02:00
Beka Barbakadze
1936ec6d84 refactor(gpu): refactor and optimize sum_ciphertext in cuda backend 2025-06-18 16:44:20 +02:00
Agnes Leroy
9864dba009 fix(gpu): fix degrees after scalar bitxor 2025-06-18 15:50:59 +02:00
Nicolas Sarlin
8c1ece4fd9 refactor(shortint): improve handling of empty compressed ct list 2025-06-18 11:08:59 +02:00
Nicolas Sarlin
343cad641c chore: TFHE-rs 1.3.0 2025-06-18 10:20:49 +02:00
David Testé
39d77299ed chore(bench): harmonize dex benchmark function names 2025-06-18 09:47:57 +02:00
Arthur Meyre
c841e3be6e chore: add new codeowners for HPU and CUDA code 2025-06-17 11:07:38 +02:00
Nicolas Sarlin
aaeb46f074 feat(shortint): add compression for squashed noise ciphertexts 2025-06-16 18:08:51 +02:00
Nicolas Sarlin
8f7281c219 fix(shortint): handle empty list in compression 2025-06-16 18:08:51 +02:00
tmontaigu
11e86e6162 chore(csprng): bump to 0.6.0
Some (breaking) changes were made to a trait in CSPRNG
2025-06-16 14:05:47 +02:00
David Testé
41c92e06a8 chore(ci): add missing env variable in cuda release workflow 2025-06-16 12:55:31 +02:00
Pedro Alves
f25f394763 feat(gpu): add support to GPU-accelerated expand to HL's CompactCiphertextList
- Drops integer's CudaCompactCiphertextList
2025-06-13 19:18:11 +02:00
pgardratzama
3230c4bb97 chore(hpu): devo was still there, tool compiled in release mode 2025-06-13 16:46:31 +02:00
Baptiste Roux
159a85fc8c chore(hpu): Fix some lint in hpu-v80 ffi 2025-06-13 16:46:31 +02:00
Helder Campos
4cec6fb247 chore(hpu): Fixed some README.md typos.
Also rename pdi_mgmt to hpu_archive_mgmt to match new naming
2025-06-13 16:46:31 +02:00
Baptiste Roux
16c997d686 feat(hpu): Add support for tandem pdi
Add support for hpu archive and Fpga reloading.
Rely on Tandem implementation for hot-reloading of FPGA.
Add reload procedure inside ffi/v80 backend.

Now when starting application on HpuV80, a first check of version is done.
If version mismatch, a pdi reload is triggered
2025-06-13 16:46:31 +02:00
David Testé
dcd1af72d4 chore(ci): fix missing env variable for 4090 benchmark 2025-06-13 11:43:16 +02:00
Agnes Leroy
9bf9107e9e feat(gpu): add memory tracking functions for rand/rand bounded 2025-06-12 17:50:15 +02:00
Andrei Stoian
7986e0bf1d chore(gpu): skip packing ks test if it needs more ram than available 2025-06-12 17:47:10 +02:00
Agnes Leroy
55179c52a7 chore(gpu): fix ci on H100 2025-06-12 16:22:57 +02:00
Emmanuel Ferdman
c103f0380c fix(hpu): modernize logger interface
Signed-off-by: Emmanuel Ferdman <emmanuelferdman@gmail.com>
2025-06-12 09:04:31 +02:00
Nicolas Sarlin
8024753be0 fix(zk): test failed with trivial ct equal to 0 2025-06-11 18:40:32 +02:00
Nicolas Sarlin
506fdfbdd1 chore(zk): use Shake256 XoF instead of rand to generate gamma values 2025-06-11 18:03:12 +02:00
David Testé
a0e08b80e6 chore(ci): use gh cli instead of api calls done with curl
This refactor simplifies reading of data_pr_close workflow.
2025-06-11 16:55:37 +02:00
Enzo Di Maria
d06c3e3926 refactor(gpu): moving sub_assign_async to backend 2025-06-11 16:34:46 +02:00
Arthur Meyre
2bf9d25402 feat: add multi bit pbs 128 with GGSW preparation in FFT domain 2025-06-11 14:47:05 +02:00
Arthur Meyre
fe73f101cc test: add test for an adapted parameter set for std multi bit pbs 128 2025-06-11 14:47:05 +02:00
Arthur Meyre
d12de58284 feat: add std pbs 128 2025-06-11 14:47:05 +02:00
Arthur Meyre
d4ea8cd85f feat: add parallel conversion fo Fourier 128 for LweMultiBitBootstrappigKey 2025-06-11 14:47:05 +02:00
Arthur Meyre
64f2befd6c feat: add Fourier 128 variant of LweMultiBitBootstrappingKey 2025-06-11 14:47:05 +02:00
Arthur Meyre
ce372dcea9 refactor(core): split the LWE multi bit bsk entity file
- to prepare for fft128 variant addition
2025-06-11 14:47:05 +02:00
Agnes Leroy
46b4958c9c feat(gpu): add memory tracking functions for booleans 2025-06-11 13:32:06 +02:00
Agnes Leroy
b25bcbc607 feat(gpu): add mem tracking for eq/ne 2025-06-11 13:32:06 +02:00
Agnes Leroy
5dfacc7975 feat(gpu): add memory tracking for compression and decompression 2025-06-11 11:49:09 +02:00
Guillermo Oyarzun
3d857f62cc refactor(gpu): return trivial indexes after ms noise reduction 2025-06-11 11:28:10 +02:00
Nicolas Sarlin
54c314cd71 chore(ci): always run zk tests 2025-06-11 10:29:53 +02:00
Nicolas Sarlin
38a9853140 chore(zk): check crs conformance in backward compat test 2025-06-11 10:29:53 +02:00
Nicolas Sarlin
360097d70e chore(zk): use random seed in tests 2025-06-11 10:29:53 +02:00
Nicolas Sarlin
c94a76a85a fix(zk): overflow in noise tests 2025-06-11 10:29:53 +02:00
Nicolas Sarlin
be1ade6dd2 chore(zk)!: use 8 bytes dsep and 128bits SID in hash functions
BREAKING_CHANGE:
- PublicParams::from_vec methods have been updated to take 8 bytes dsep and an
  SID. CRS generated before this PR are still supported.
2025-06-11 10:29:53 +02:00
Pedro Alves
53845b298a fix(gpu): fix the packing keyswitch buffer not being allocated on large parameter sets 2025-06-11 08:58:09 +02:00
David Testé
11c0340eca chore(bench): plug server-side proof in zk benchmarks 2025-06-10 18:00:39 +02:00
Baptiste Roux
5e966a3d78 chore(hpu): changes based on code review 2025-06-10 17:43:35 +02:00
Baptiste Roux
443e02215f feat(hpu): Add recent IOp in integer benchmarks 2025-06-10 17:43:35 +02:00
Baptiste Roux
3c632c06ba chore(hpu): Fix/Changes to be compliant with CI 2025-06-10 17:43:35 +02:00
Baptiste Roux
833b593845 feat(hpu): Add support for Lead/Trail/Count/ilog2 in high-level API 2025-06-10 17:43:35 +02:00
JJ-hw
a20c90b090 feat(hpu): Add ILOG2/COUNT0/COUNT1/LEAD0/LEAD1/TRAIL0/TRAIL1 IOp.
Those IOp are tested within new bitcnt category
2025-06-10 17:43:35 +02:00
Baptiste Roux
71e86f0522 feat(hpu): Add support for Shift/Rotate in high-level API
Scalar version not supported yet
2025-06-10 17:43:35 +02:00
Baptiste Roux
cb45f7f429 feat(hpu): Add Rot/Shift IOp
Proper implementation of Scalar version need update in the firmware.
And thus it wasn't done yet.
2025-06-10 17:43:35 +02:00
Baptiste Roux
05a51d47fa feat(hpu): Add check with Pbs gid definition
Currently it's a runtime check, but prevent attribution of same Gid for two != lut
2025-06-10 17:43:35 +02:00
Baptiste Roux
2eb1ccd128 feat(hpu): Add support for DivRem in high-level API 2025-06-10 17:43:35 +02:00
JJ-hw
b7a518b9ee chore(hpu): Cleanup code following clippy advices
Also applied cargo fmt
2025-06-10 17:43:35 +02:00
JJ-hw
39faca219f feat(hpu): Add modulo. Note: not optimized version. Use same algo as the division. 2025-06-10 17:43:35 +02:00
JJ-hw
fe7a8915bc feat(hpu): Add DIV/Divs IOp support
Thoses IOp outputs quotient and remainder for numerator and divider of same size.
2025-06-10 17:43:35 +02:00
Baptiste Roux
96c8c44c71 feat(hpu): Enable some erc20 impl
With the support of overflowing ops, those impl are now available to Hpu
2025-06-10 17:43:35 +02:00
Baptiste Roux
24d581afeb feat(hpu): Add support for Neg operation 2025-06-10 17:43:35 +02:00
Baptiste Roux
3c383ba18f feat(hpu): Add support for overflowing ops in the high-level-api 2025-06-10 17:43:35 +02:00
Baptiste Roux
7622122d90 feat(hpu): add support for overflowing iop
Add new Hpu IOp with overflowing flags. Currently only have simple Ilp implementation.
Should be extend to llt one in the future.
2025-06-10 17:43:35 +02:00
Baptiste Roux
949d3e2153 feat(hpu): Add support for Min/Max in hl-api 2025-06-10 17:43:35 +02:00
Baptiste Roux
fb82c652e2 feat(hpu): Remove nops features
This feature was supersed by the trivial one.
Nops option wasn't relevant and usefull anymore.
2025-06-10 17:43:35 +02:00
Baptiste Roux
3af1937250 feat(hpu): add trivial execution in mockup
Enhance FW debug with quicker execution time.
Also add trivial option in the hpu regression test for quick regression
of the FW generation with the mockup.
2025-06-10 17:43:35 +02:00
Baptiste Roux
05f7869c88 feat(mockup): Add support for trivial ciphertext display
Here to enhance IOp firmware debug experience
2025-06-10 17:43:35 +02:00
Nicolas Sarlin
ab0ec4a238 chore(zk): mark non-pke proofs as experimental 2025-06-10 17:07:33 +02:00
dependabot[bot]
167329c52a chore(deps): bump dtolnay/rust-toolchain
Bumps [dtolnay/rust-toolchain](https://github.com/dtolnay/rust-toolchain) from 888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 to b3b07ba8b418998c39fb20f53e8b695cdcc8de1b.
- [Release notes](https://github.com/dtolnay/rust-toolchain/releases)
- [Commits](888c2e1ea6...b3b07ba8b4)

---
updated-dependencies:
- dependency-name: dtolnay/rust-toolchain
  dependency-version: b3b07ba8b418998c39fb20f53e8b695cdcc8de1b
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-06-10 17:04:02 +02:00
Arthur Meyre
8a280642a7 chore(shortint): alias correct parameters for KS PKE to compute
- has no impact today as parameters were the same, however given the
comment it is best to expose the proper KS from PKE to compute parameters
2025-06-10 17:03:41 +02:00
Arthur Meyre
b29e82b96e test: add noise test check for High Level API 2025-06-10 17:03:41 +02:00
Arthur Meyre
9bda365691 chore(core): add noise distribution test tooling 2025-06-10 17:03:41 +02:00
Arthur Meyre
b686d5cb6a chore: update .gitignore 2025-06-10 17:03:41 +02:00
David Testé
2829c9cc92 chore(ci): parse all pbs counts files for dex benchmarks 2025-06-10 14:19:24 +02:00
Guillermo Oyarzun
0d81623a23 feat(gpu): add squash noise in the hlapi 2025-06-10 13:14:29 +02:00
David Testé
13d797fe9b chore(ci): add bench type to zk benchmarks artifact name
This is done to avoid name collision when both latency and throughput benchmarks are executed within the same workflow run.
2025-06-10 12:30:42 +02:00
Baptiste Roux
0ba2e5c6fd chore(hpu): fix issue with linter
Issue arise following a version update.
This function is clearly unused but kept for huge memory api coherency
and should be used in the future.
2025-06-06 17:55:53 +02:00
Enzo Di Maria
ad3edf3cc3 refactor(gpu): moving scalar_mul_high_async to backend 2025-06-06 17:55:53 +02:00
Pedro Alves
16ff092ce4 fix(gpu): fix race condition on expand when on multi-gpu 2025-06-06 09:34:09 +02:00
Pedro Alves
f511bdc279 chore(gpu): add HL test for GPU expand and fix an issue with exception handling 2025-06-06 09:34:09 +02:00
Agnes Leroy
d3f36b1d86 chore(gpu): fallback to a6000 in case L40's are out of stock 2025-06-05 16:55:10 +02:00
Mayeul@Zama
fbb6b9ace3 refactor(shortint): rename apply_blind_rotate apply_ms_blind_rotate 2025-06-05 14:56:09 +02:00
Mayeul@Zama
d1d1579187 refactor(core): separate modulus_switch from multi_bit_blind_rotate_assign 2025-06-05 14:56:09 +02:00
Mayeul@Zama
a268dced53 refactor(core): refactor MultiBitModulusSwitchedCt 2025-06-05 14:56:09 +02:00
Mayeul@Zama
13b2372624 refactor(core): refactor packing 2025-06-05 14:56:09 +02:00
Agnes Leroy
e2d622b186 feat(gpu): add memory tracking for neg and scalar div 2025-06-05 13:35:01 +02:00
Agnes Leroy
f55007c6fb feat(gpu): add mem tracking for div 2025-06-05 13:35:01 +02:00
Agnes Leroy
3ca85bf904 feat(boolean): add move_to_current_device for booleans 2025-06-05 11:45:54 +02:00
Andrei Stoian
ec78318af3 chore(gpu): prevent nvToolsExt inclusion when not profiling
chore(gpu): prevent nvToolsExt inclusion when not profiling

fix(gpu): stdint
2025-06-05 11:45:32 +02:00
David Testé
8a312afbb7 chore(ci): fix benchmark matrix parameters generation
Previous implementation was done to please Zizmor and avoid
template-injection findings during analysis. This had a downside,
using env directive implies a double-interpolation that messes with
fromJSON() later and build badly formatted matrix parameters.
2025-06-05 11:15:39 +02:00
David Testé
b61f1d864c chore(ci): check ks32 parameters with lattice estimator
A small refactoring has been done to handle ciphertext modulus in a more convenient way.
2025-06-04 17:19:17 +02:00
Agnes Leroy
15983a0718 feat(gpu): add memory tracking for mul/scalar mul 2025-06-04 16:42:51 +02:00
David Testé
856fc1a709 chore(ci): ignore stale action refs on rust-toolchain action
This action doesn't create releases so the action refs doesn't point to a known tag.
If this zizmor findings is not ignored, then continuous integration pipeline is broken.
2025-06-04 11:48:01 +02:00
Pedro Alves
fe0a195630 chore(gpu): switches from the TBC PBS to the other variants for many inputs 2025-06-04 05:45:53 -03:00
tmontaigu
aca7e79585 feat(csprng): add Xof random generation
This adds a new kind of seed to the csprng

When created which such seed, the AES-CTR random generator
initialization changes:
- The AES-KEY used is initialized differently
- The AES-CTR starts with a CTR that may not be 0

The changes make it so that the counter still goes from 0..MAX,
but now the AES-CTR will encrypt the counter + some offset allowing
to keep the regular behavior and the new one
2025-06-04 09:57:18 +02:00
tmontaigu
c0e89a53ef fix(csprng): fix and endian for the counter
This commit fixes an endian (little) for the counter
representation of the counter used in the AES-CTR counter.

This is so that, the random bytes generated are the same not matter
the endian of the system.

A test case with known answers is added, as well as make command
to run the test in an emulated big-endian arch using the `cross`
utility.

This also include a small refactor where now the block cipher
do not encrypt `AesIndex`. This is done as it makes more sense
(AES encrypts bytes, not numbers), so this allows to move and centralize
the concept of endian as well a centralize where batch created.
2025-06-04 09:57:18 +02:00
David Testé
312952007f chore(ci): lock zizmor version to avoid breaking ci pipelines
Newer version of Zizmor can trigger errors due to new findings in workflows. To avoid breaking any ongoing pull-request, due to this unhandled update, zizmor version is locked.
2025-06-03 12:29:36 +02:00
Enzo Di Maria
ff51ed3f34 refactor(gpu): moving trim_radix_blocks_lsb_async to backend 2025-06-03 11:42:18 +02:00
Agnes Leroy
9737bdcb98 fix(gpu): fix degrees after bitxor 2025-06-03 08:47:12 +02:00
tmontaigu
87a43a4900 chore(integer): add determinism check for sum 2025-06-02 17:37:21 +02:00
Agnes Leroy
345bdbf17f feat(gpu): add memory tracking function for cmux 2025-06-02 17:29:17 +02:00
Agnes Leroy
cc54ba2236 chore(gpu): fix overflow in div in long run tests 2025-06-02 17:05:09 +02:00
David Testé
11df6c69ee chore(ci): fix workflow security warnings
Since Zizmor v1.9.0, new pedantic warnings are detected especially
regarding template-injection patterns.
2025-06-02 14:46:14 +02:00
Guillermo Oyarzun
b76f4dbfe0 fix(gpu): fix hardcoded use of message modulus 2025-06-02 10:43:14 +02:00
Enzo Di Maria
be21c15c80 refactor(gpu): moving extend_radix_with_trivial_zero_blocks_msb to backend 2025-06-02 09:19:51 +02:00
tmontaigu
aa51b25313 chore(ci): fix test_user_docs run and add hpu
Due to #[cfg] before the test_user_docs module, the module would
not actually be compiled (thus run user doc test) unless all required
features where activated when running.

So we remove these cfg, as each hardware doc supports its own set of
features and its better to have a test fail because a feature is
missing rather than silently not run anything

Also, add commands and ci stuff to check HPU docs
2025-05-30 16:36:56 +02:00
tmontaigu
300c95fe3d fix(doc): finish HPU example fix 2025-05-30 16:36:56 +02:00
pgardratzama
524adda8f6 fix(doc): hpu example was not compiling 2025-05-30 16:36:56 +02:00
tmontaigu
dedcf205b4 feat(integer): improve default neg 2025-05-30 15:02:35 +02:00
tmontaigu
2c8d4c0fb0 feat(hlapi): add overflowing_neg 2025-05-30 15:02:35 +02:00
tmontaigu
3370fb5b7e feat(gpu): add overflowing_neg 2025-05-30 15:02:35 +02:00
tmontaigu
cd77eac42b feat(integer): add overflowing_neg 2025-05-30 15:02:35 +02:00
Baptiste Roux
40f20b4ecb fix(hpu): Rewrite hpu_bench iteration loop
hpu_bench example was wrong for iter > 1 following clippy modifications.
NB: Vector is collect but intermediate value are explicitly drop to enable long-time stressed tests.
2025-05-28 14:45:45 +02:00
823 changed files with 53359 additions and 14000 deletions

3
.gitattributes vendored Normal file
View File

@@ -0,0 +1,3 @@
*.hpu filter=lfs diff=lfs merge=lfs -text
*.bcode filter=lfs diff=lfs merge=lfs -text
*.cbor filter=lfs diff=lfs merge=lfs -text

View File

@@ -23,36 +23,58 @@ runs:
echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
sha256sum -c checksum
sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
sudo apt remove -y unattended-upgrades
sudo apt update
sudo apt install -y cmake-format libclang-dev
env:
CMAKE_VERSION: 3.29.6
CMAKE_SCRIPT_SHA: "6e4fada5cba3472ae503a11232b6580786802f0879cead2741672bf65d97488a"
- name: Install GCC
if: inputs.github-instance == 'true'
shell: bash
env:
GCC_VERSION: ${{ inputs.gcc-version }}
run: |
sudo apt-get install gcc-"{GCC_VERSION}" g++-"{GCC_VERSION}"
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"{GCC_VERSION}" 20
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"{GCC_VERSION}" 20
- name: Check GCC
shell: bash
env:
GCC_VERSION: ${{ inputs.gcc-version }}
run: |
which gcc-"${GCC_VERSION}"
- name: Install CUDA
if: inputs.github-instance == 'true'
shell: bash
env:
CUDA_VERSION: ${{ inputs.cuda-version }}
CUDA_KEYRING_PACKAGE: cuda-keyring_1.1-1_all.deb
CUDA_KEYRING_SHA: "d93190d50b98ad4699ff40f4f7af50f16a76dac3bb8da1eaaf366d47898ff8df"
run: |
TOOLKIT_VERSION="$(echo ${CUDA_VERSION} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/${env.CUDA_KEYRING_PACKAGE}
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/${CUDA_KEYRING_PACKAGE}
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
sha256sum -c checksum
sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
sudo apt update
sudo apt -y install cuda-toolkit-"${TOOLKIT_VERSION}"
env:
CUDA_VERSION: ${{ inputs.cuda-version }}
CUDA_KEYRING_PACKAGE: cuda-keyring_1.1-1_all.deb
CUDA_KEYRING_SHA: "d93190d50b98ad4699ff40f4f7af50f16a76dac3bb8da1eaaf366d47898ff8df"
- name: Export CUDA variables
shell: bash
run: |
find /usr/local -executable -name "nvcc"
CUDA_PATH=/usr/local/cuda-"${CUDA_VERSION}"
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH";
echo "CUDA_MODULE_LOADER=EAGER";
echo "PATH=$PATH:$CUDA_PATH/bin";
} >> "${GITHUB_ENV}"
{
echo "PATH=$PATH:$CUDA_PATH/bin";
@@ -72,6 +94,11 @@ runs:
env:
GCC_VERSION: ${{ inputs.gcc-version }}
- name: Check setup
shell: bash
run: |
which nvcc
- name: Check device is detected
shell: bash
run: nvidia-smi

View File

@@ -67,49 +67,30 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Use specific data branch
if: ${{ contains(github.event.pull_request.labels.*.name, 'data_PR') }}
env:
PR_BRANCH: ${{ github.head_ref || github.ref_name }}
# Cache key is an aggregated hash of lfs files hashes
- name: Get LFS data sha
id: hash-lfs-data
run: |
echo "BACKWARD_COMPAT_DATA_BRANCH=${PR_BRANCH}" >> "${GITHUB_ENV}"
- name: Get backward compat branch
id: backward_compat_branch
run: |
BRANCH="$(make backward_compat_branch)"
echo "branch=${BRANCH}" >> "${GITHUB_OUTPUT}"
- name: Get backward compat branch head SHA
id: backward_compat_sha
run: |
SHA=$(git ls-remote "${REPO_URL}" refs/heads/"${BACKWARD_COMPAT_BRANCH}" | awk '{print $1}')
SHA=$(git lfs ls-files -l -I utils/tfhe-backward-compat-data | sha256sum | cut -d' ' -f1)
echo "sha=${SHA}" >> "${GITHUB_OUTPUT}"
env:
REPO_URL: "https://github.com/zama-ai/tfhe-backward-compat-data"
BACKWARD_COMPAT_BRANCH: ${{ steps.backward_compat_branch.outputs.branch }}
- name: Retrieve data from cache
id: retrieve-data-cache
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
with:
path: tests/tfhe-backward-compat-data
key: ${{ steps.backward_compat_branch.outputs.branch }}_${{ steps.backward_compat_sha.outputs.sha }}
path: |
utils/tfhe-backward-compat-data/**/*.cbor
utils/tfhe-backward-compat-data/**/*.bcode
key: ${{ steps.hash-lfs-data.outputs.sha }}
- name: Clone test data
- name: Pull test data
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
repository: zama-ai/tfhe-backward-compat-data
path: tests/tfhe-backward-compat-data
lfs: 'true'
ref: ${{ steps.backward_compat_branch.outputs.branch }}
run: |
make pull_backward_compat_data
- name: Run backward compatibility tests
run: |
@@ -118,17 +99,20 @@ jobs:
- name: Store data in cache
if: steps.retrieve-data-cache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
with:
path: tests/tfhe-backward-compat-data
key: ${{ steps.backward_compat_branch.outputs.branch }}_${{ steps.backward_compat_sha.outputs.sha }}
path: |
utils/tfhe-backward-compat-data/**/*.cbor
utils/tfhe-backward-compat-data/**/*.bcode
key: ${{ steps.hash-lfs-data.outputs.sha }}
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -174,7 +174,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -182,9 +182,11 @@ jobs:
if: needs.should-run.outputs.csprng_test == 'true'
run: |
make test_tfhe_csprng
make test_tfhe_csprng_big_endian
- name: Run tfhe-zk-pok tests
if: needs.should-run.outputs.zk_pok_test == 'true'
# Always run it to catch non deterministic bugs earlier
# if: needs.should-run.outputs.zk_pok_test == 'true'
run: |
make test_zk_pok
@@ -214,7 +216,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
with:
path: |
~/.nvm
@@ -227,7 +229,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -272,9 +274,10 @@ jobs:
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}

View File

@@ -103,7 +103,7 @@ jobs:
name: Unsigned integer tests
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -114,7 +114,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -142,9 +142,10 @@ jobs:
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -0,0 +1,115 @@
name: Run noise checks on CPU
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
permissions:
contents: read
jobs:
setup-instance:
name: Setup instance (noise-checks)
runs-on: ubuntu-latest
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: aws
# We want an hpc7a more compute, will be faster
profile: bench
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "Cannot run this without secrets"
exit 1
noise-checks:
name: CPU noise checks
needs: setup-instance
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
timeout-minutes: 1440
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1 # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Run noise checks
timeout-minutes: 1440
run: |
make test_noise_check
- name: Set pull-request URL
if: ${{ !success() }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ !success() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Noise checks tests finished with status: ${{ job.status }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: Teardown instance (noise-checks)
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, noise-checks ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ !success() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (noise-checks) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -104,7 +104,7 @@ jobs:
name: Signed integer tests
needs: setup-instance
concurrency:
group: ${{ github.workflow_ref }}
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
steps:
@@ -115,7 +115,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -147,9 +147,10 @@ jobs:
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -185,7 +185,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -254,9 +254,10 @@ jobs:
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -68,7 +68,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -78,7 +78,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
with:
path: |
~/.nvm
@@ -91,7 +91,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -123,9 +123,10 @@ jobs:
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -58,14 +58,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -114,8 +117,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -58,14 +58,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -107,8 +110,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -58,14 +58,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -95,15 +98,27 @@ jobs:
env:
REF_NAME: ${{ github.ref_name }}
- name: Parse swap request PBS counts
- name: Parse swap request update PBS counts
run: |
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_request_pbs_count.csv "${RESULTS_FILENAME}" \
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_request_update_dex_balance_pbs_count.csv "${RESULTS_FILENAME}" \
--object-sizes \
--append-results
- name: Parse swap claim PBS counts
- name: Parse swap request finalize PBS counts
run: |
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_claim_pbs_count.csv "${RESULTS_FILENAME}" \
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_request_finalize_pbs_count.csv "${RESULTS_FILENAME}" \
--object-sizes \
--append-results
- name: Parse swap claim prepare PBS counts
run: |
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_claim_prepare_pbs_count.csv "${RESULTS_FILENAME}" \
--object-sizes \
--append-results
- name: Parse swap claim update PBS counts
run: |
python3 ./ci/benchmark_parser.py tfhe-benchmark/dex_swap_claim_update_dex_balance_pbs_count.csv "${RESULTS_FILENAME}" \
--object-sizes \
--append-results
@@ -116,8 +131,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -59,14 +59,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -111,8 +114,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -31,6 +31,7 @@ on:
- ks
- ks_pbs
- integer_zk
- hlapi_noise_squash
op_flavor:
description: "Operations set to run"
type: choice

View File

@@ -46,15 +46,18 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
echo "FAST_BENCH=TRUE";
} >> "${GITHUB_ENV}"
echo "FAST_BENCH=TRUE" >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -93,8 +96,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}
@@ -124,14 +130,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -159,7 +168,8 @@ jobs:
--commit-date "${COMMIT_DATE}" \
--bench-date "${BENCH_DATE}" \
--walk-subdirs \
env:
REF_NAME: ${{ github.ref_name }}
- name: Upload parsed results artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
@@ -170,8 +180,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -120,25 +120,24 @@ jobs:
env:
INPUTS_PARAMS_TYPE: ${{ inputs.params_type }}
- name: Set command output
id: set_command
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "command=${{ toJSON(env.COMMAND) }}" >> "${GITHUB_OUTPUT}"
- name: Set operation flavor output
id: set_op_flavor
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
- name: Set benchmark types output
id: set_bench_type
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
- name: Set parameters types output
id: set_params_type
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "params_type=${{ toJSON(env.PARAMS_TYPE) }}" >> "${GITHUB_OUTPUT}"
setup-instance:
@@ -193,7 +192,7 @@ jobs:
matrix:
# explicit include-based build matrix, of known valid options
include:
- cuda: "12.2"
- cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs repo
@@ -225,8 +224,10 @@ jobs:
params_type: ${{ fromJSON(needs.prepare-matrix.outputs.params_type) }}
# explicit include-based build matrix, of known valid options
include:
- cuda: "12.2"
- cuda: "12.8"
gcc: 11
env:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
steps:
- name: Checkout tfhe-rs repo with tags
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
@@ -237,18 +238,20 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
# Re-export environment variables as dependencies setup perform this task in the previous job.
# Local env variables are cleaned at the end of each job.
- name: Export CUDA variables
shell: bash
run: |
CUDA_PATH=/usr/local/cuda-${{ matrix.cuda }}
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "PATH=$PATH:$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib64:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
@@ -258,13 +261,15 @@ jobs:
shell: bash
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
} >> "${GITHUB_ENV}"
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -317,8 +322,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
slack-notify:
name: Slack Notification

View File

@@ -100,7 +100,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs repo with tags
@@ -119,14 +119,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -167,8 +170,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
slack-notify:
name: Slack Notification

View File

@@ -101,7 +101,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs repo with tags
@@ -120,14 +120,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -168,8 +171,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
slack-notify:
name: Slack Notification

View File

@@ -10,37 +10,16 @@ on:
permissions: {}
jobs:
run-benchmarks-1-h100:
name: Run integer benchmarks (1xH100)
run-benchmarks-8-h100-sxm5-integer:
name: Run integer benchmarks (8xH100-SXM5)
if: github.repository == 'zama-ai/tfhe-rs'
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: single-h100
hardware_name: n3-H100x1
command: integer,integer_multi_bit
op_flavor: default
bench_type: latency
all_precisions: true
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-2-h100:
name: Run integer benchmarks (2xH100)
if: github.repository == 'zama-ai/tfhe-rs'
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: 2-h100
hardware_name: n3-H100x2
profile: multi-h100-sxm5
hardware_name: n3-H100x8-SXM5
command: integer_multi_bit
op_flavor: default
bench_type: latency
bench_type: both
all_precisions: true
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
@@ -52,16 +31,16 @@ jobs:
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-8-h100:
name: Run integer benchmarks (8xH100)
run-benchmarks-8-h100-sxm5-integer-compression:
name: Run integer compression benchmarks (8xH100-SXM5)
if: github.repository == 'zama-ai/tfhe-rs'
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100
hardware_name: n3-H100x8
command: integer_multi_bit
profile: multi-h100-sxm5
hardware_name: n3-H100x8-SXM5
command: integer_compression
op_flavor: default
bench_type: latency
bench_type: both
all_precisions: true
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
@@ -73,16 +52,37 @@ jobs:
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-l40:
name: Run integer benchmarks (L40)
run-benchmarks-8-h100-sxm5-integer-zk:
name: Run integer zk benchmarks (8xH100-SXM5)
if: github.repository == 'zama-ai/tfhe-rs'
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: l40
hardware_name: n3-L40x1
command: integer_multi_bit,integer_compression,pbs,ks
profile: multi-h100-sxm5
hardware_name: n3-H100x8-SXM5
command: integer_zk
op_flavor: default
bench_type: latency
bench_type: both
all_precisions: true
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
REPO_CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN }}
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_ACTION_TOKEN: ${{ secrets.SLAB_ACTION_TOKEN }}
SLAB_URL: ${{ secrets.SLAB_URL }}
SLAB_BASE_URL: ${{ secrets.SLAB_BASE_URL }}
run-benchmarks-8-h100-sxm5-noise-squash:
name: Run integer zk benchmarks (8xH100-SXM5)
if: github.repository == 'zama-ai/tfhe-rs'
uses: ./.github/workflows/benchmark_gpu_common.yml
with:
profile: multi-h100-sxm5
hardware_name: n3-H100x8-SXM5
command: hlapi_noise_squash
op_flavor: default
bench_type: both
all_precisions: true
secrets:
BOT_USERNAME: ${{ secrets.BOT_USERNAME }}

View File

@@ -33,18 +33,22 @@ jobs:
with:
fetch-depth: 0
persist-credentials: 'false'
lfs: true
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -58,6 +62,7 @@ jobs:
- name: Run benchmarks
run: |
make pull_hpu_files
make bench_integer_hpu
make bench_hlapi_erc20_hpu
@@ -84,5 +89,8 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}

View File

@@ -78,12 +78,12 @@ jobs:
- name: Set operation flavor output
id: set_op_flavor
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
- name: Set benchmark types output
id: set_bench_type
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
setup-instance:
@@ -128,14 +128,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -193,8 +196,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -47,7 +47,7 @@ jobs:
- name: Set operation flavor output
id: set_op_flavor
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
setup-instance:
@@ -89,14 +89,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -150,8 +153,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -78,12 +78,12 @@ jobs:
- name: Set operation flavor output
id: set_op_flavor
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "op_flavor=${{ toJSON(env.OP_FLAVOR) }}" >> "${GITHUB_OUTPUT}"
- name: Set benchmark types output
id: set_bench_type
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
setup-instance:
@@ -128,14 +128,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -185,8 +188,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -48,7 +48,7 @@ jobs:
name: Execute FFT benchmarks in EC2
needs: setup-ec2
concurrency:
group: ${{ github.workflow_ref }}
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: true
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
steps:
@@ -61,11 +61,14 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
@@ -107,8 +110,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -48,7 +48,7 @@ jobs:
name: Execute NTT benchmarks in EC2
needs: setup-ec2
concurrency:
group: ${{ github.workflow_ref }}
group: ${{ github.workflow_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: true
runs-on: ${{ needs.setup-ec2.outputs.runner-name }}
steps:
@@ -61,11 +61,14 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af
@@ -107,8 +110,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -98,14 +98,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -141,7 +144,7 @@ jobs:
- name: Upload parsed results artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: ${{ github.sha }}_tfhe_zk_pok
name: ${{ github.sha }}_tfhe_zk_pok_${{ env.BENCH_TYPE }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
@@ -155,8 +158,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -96,14 +96,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -113,7 +116,7 @@ jobs:
- name: Node cache restoration
id: node-cache
uses: actions/cache/restore@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
with:
path: |
~/.nvm
@@ -126,7 +129,7 @@ jobs:
make install_node
- name: Node cache save
uses: actions/cache/save@5a3ec84eff668545956fd18022155c47e93e2684 #v4.2.3
uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 #v4.2.4
if: steps.node-cache.outputs.cache-hit != 'true'
with:
path: |
@@ -136,12 +139,16 @@ jobs:
- name: Install web resources
run: |
make install_${{ matrix.browser }}_browser
make install_${{ matrix.browser }}_web_driver
make install_"${BROWSER}"_browser
make install_"${BROWSER}"_web_driver
env:
BROWSER: ${{ matrix.browser }}
- name: Run benchmarks
run: |
make bench_web_js_api_parallel_${{ matrix.browser }}_ci
make bench_web_js_api_parallel_"${BROWSER}"_ci
env:
BROWSER: ${{ matrix.browser }}
- name: Parse results
run: |
@@ -188,8 +195,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -92,7 +92,7 @@ jobs:
- name: Set benchmark types output
id: set_bench_type
run: |
run: | # zizmor: ignore[template-injection] this env variable is safe
echo "bench_type=${{ toJSON(env.BENCH_TYPE) }}" >> "${GITHUB_OUTPUT}"
setup-instance:
@@ -140,14 +140,17 @@ jobs:
- name: Get benchmark details
run: |
COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict "${SHA}");
{
echo "BENCH_DATE=$(date --iso-8601=seconds)";
echo "COMMIT_DATE=$(git --no-pager show -s --format=%cd --date=iso8601-strict ${{ github.sha }})";
echo "COMMIT_DATE=${COMMIT_DATE}";
echo "COMMIT_HASH=$(git describe --tags --dirty)";
} >> "${GITHUB_ENV}"
env:
SHA: ${{ github.sha }}
- name: Install rust
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: nightly
@@ -191,7 +194,7 @@ jobs:
- name: Upload parsed results artifact
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02
with:
name: ${{ github.sha }}_integer_zk
name: ${{ github.sha }}_integer_zk_${{ matrix.bench_type }}
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo
@@ -205,8 +208,11 @@ jobs:
- name: Send data to Slab
shell: bash
run: |
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${{ secrets.JOB_SECRET }}" \
--slab-url "${{ secrets.SLAB_URL }}"
python3 slab/scripts/data_sender.py "${RESULTS_FILENAME}" "${JOB_SECRET}" \
--slab-url "${SLAB_URL}"
env:
JOB_SECRET: ${{ secrets.JOB_SECRET }}
SLAB_URL: ${{ secrets.SLAB_URL }}
- name: Slack Notification
if: ${{ failure() || (cancelled() && github.event_name != 'pull_request') }}

View File

@@ -35,7 +35,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -59,6 +59,11 @@ jobs:
run: |
make build_tfhe_csprng
- name: Build with MSRV
if: ${{ contains(matrix.os, 'ubuntu') }}
run: |
make build_tfhe_msrv
- name: Build Release core
if: ${{ contains(matrix.os, 'ubuntu') }}
run: |

View File

@@ -13,7 +13,7 @@ env:
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
group: ${{ github.workflow }}-${{ github.head_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: true
permissions:

View File

@@ -13,7 +13,7 @@ env:
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
group: ${{ github.workflow }}-${{ github.head_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: true
permissions:

View File

@@ -3,14 +3,15 @@ name: Check commit and PR compliance
on:
pull_request:
permissions:
contents: read
pull-requests: read # Permission needed to scan commits in a pull-request
permissions: {}
jobs:
check-commit-pr:
name: Check commit and PR
runs-on: ubuntu-latest
permissions:
contents: read
pull-requests: write # Permission needed to scan commits in a pull-request and write issue comment
steps:
- name: Check first line
uses: gsactions/commit-message-checker@16fa2d5de096ae0d35626443bcd24f1e756cafee

View File

@@ -25,10 +25,10 @@ jobs:
- name: Get actionlint
run: |
wget "https://github.com/rhysd/actionlint/releases/download/v${{ env.ACTIONLINT_VERSION }}/actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz"
echo "${{ env.ACTIONLINT_CHECKSUM }} actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz" > checksum
wget "https://github.com/rhysd/actionlint/releases/download/v${ACTIONLINT_VERSION}/actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz"
echo "${ACTIONLINT_CHECKSUM} actionlint_${ACTIONLINT_VERSION}_linux_amd64.tar.gz" > checksum
sha256sum -c checksum
tar -xf actionlint_${{ env.ACTIONLINT_VERSION }}_linux_amd64.tar.gz actionlint
tar -xf actionlint_"${ACTIONLINT_VERSION}"_linux_amd64.tar.gz actionlint
ln -s "$(pwd)/actionlint" /usr/local/bin/
- name: Lint workflows

View File

@@ -54,7 +54,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable

View File

@@ -66,7 +66,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable

View File

@@ -1,137 +0,0 @@
name: Close or Merge corresponding PR on the data repo
# When a PR with the data_PR tag is closed or merged, this will close the corresponding PR in the data repo.
env:
TARGET_REPO_API_URL: ${{ github.api_url }}/repos/zama-ai/tfhe-backward-compat-data
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
PR_BRANCH: ${{ github.head_ref || github.ref_name }}
CLOSE_TYPE: ${{ github.event.pull_request.merged && 'merge' || 'close' }}
# only trigger on pull request closed events
on:
pull_request:
types: [ closed ]
# The same pattern is used for jobs that use the github api:
# - save the result of the API call in the env var "GH_API_RES". Since the var is multiline
# we use this trick: https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#example-of-a-multiline-string
# - "set +e" will make sure we reach the last "echo EOF" even in case of error
# - "set -o" pipefail makes one line piped command return the error of the first failure
# - 'RES="$?"' and 'exit $RES' are used to return the error code if a command failed. Without it, with "set +e"
# the script will always return 0 because of the "echo EOF".
permissions: {}
jobs:
auto_close_job:
if: ${{ contains(github.event.pull_request.labels.*.name, 'data_PR') }}
runs-on: ubuntu-latest
steps:
- name: Find corresponding Pull Request in the data repo
run: |
{
set +e
set -o pipefail
echo 'TARGET_REPO_PR<<EOF'
curl --fail-with-body --no-progress-meter -L -X GET \
-H "Accept: application/vnd.github+json" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${TARGET_REPO_API_URL}"/pulls\?head="${REPO_OWNER}":"${PR_BRANCH}" | jq -e '.[0]' | sed 's/null/{ "message": "corresponding PR not found" }/'
RES="$?"
echo EOF
} >> "${GITHUB_ENV}"
exit $RES
env:
REPO_OWNER: ${{ github.repository_owner }}
- name: Comment on the PR to indicate the reason of the close
run: |
BODY="'{ \"body\": \"PR ${CLOSE_TYPE}d because the corresponding PR in main repo was ${CLOSE_TYPE}d: ${REPO}#${EVENT_NUMBER}\" }'"
{
set +e
set -o pipefail
echo 'GH_API_RES<<EOF'
curl --fail-with-body --no-progress-meter -L -X POST \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${COMMENTS_URL}" \
-d "${BODY}"
RES="$?"
echo EOF
} >> "${GITHUB_ENV}"
exit $RES
env:
REPO: ${{ github.repository }}
EVENT_NUMBER: ${{ github.event.number }}
COMMENTS_URL: ${{ fromJson(env.TARGET_REPO_PR).comments_url }}
- name: Merge the Pull Request in the data repo
if: ${{ github.event.pull_request.merged }}
run: |
{
set +e
set -o pipefail
echo 'GH_API_RES<<EOF'
curl --fail-with-body --no-progress-meter -L -X PUT \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${TARGET_REPO_PR_URL}"/merge \
-d '{ "merge_method": "rebase" }'
RES="$?"
echo EOF
} >> "${GITHUB_ENV}"
exit $RES
env:
TARGET_REPO_PR_URL: ${{ fromJson(env.TARGET_REPO_PR).url }}
- name: Close the Pull Request in the data repo
if: ${{ !github.event.pull_request.merged }}
run: |
{
set +e
set -o pipefail
echo 'GH_API_RES<<EOF'
curl --fail-with-body --no-progress-meter -L -X PATCH \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${TARGET_REPO_PR_URL}" \
-d '{ "state": "closed" }'
RES="$?"
echo EOF
} >> "${GITHUB_ENV}"
exit $RES
env:
TARGET_REPO_PR_URL: ${{ fromJson(env.TARGET_REPO_PR).url }}
- name: Delete the associated branch in the data repo
run: |
{
set +e
set -o pipefail
echo 'GH_API_RES<<EOF'
curl --fail-with-body --no-progress-meter -L -X DELETE \
-H "Accept: application/vnd.github+json" \
-H "Authorization: Bearer ${{ secrets.FHE_ACTIONS_TOKEN }}" \
-H "X-GitHub-Api-Version: 2022-11-28" \
"${TARGET_REPO_API_URL}"/git/refs/heads/"${PR_BRANCH}"
RES="$?"
echo EOF
} >> "${GITHUB_ENV}"
exit $RES
- name: Slack Notification
if: ${{ always() && job.status == 'failure' }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Failed to auto-${{ env.CLOSE_TYPE }} PR on data repo: ${{ fromJson(env.GH_API_RES || env.TARGET_REPO_PR).message }}"

View File

@@ -45,7 +45,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable

View File

@@ -0,0 +1,148 @@
# Compile and test tfhe-cuda-backend on an AWS instance
name: Cuda - Code Validation
env:
CARGO_TERM_COLOR: always
ACTION_RUN_URL: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
RUSTFLAGS: "-C target-cpu=native"
RUST_BACKTRACE: "full"
RUST_MIN_STACK: "8388608"
SLACK_CHANNEL: ${{ secrets.SLACK_CHANNEL }}
SLACK_ICON: https://pbs.twimg.com/profile_images/1274014582265298945/OjBKP9kn_400x400.png
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
SLACKIFY_MARKDOWN: true
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
PULL_REQUEST_MD_LINK: ""
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
# Secrets will be available only to zama-ai organization members
SECRETS_AVAILABLE: ${{ secrets.JOB_SECRET != '' }}
EXTERNAL_CONTRIBUTION_RUNNER: "gpu_ubuntu-22.04"
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
types: [ labeled ]
permissions:
contents: read
jobs:
setup-instance:
name: Setup instance (cuda-tests)
runs-on: ubuntu-latest
if: github.event_name != 'pull_request' ||
(github.event.action == 'labeled' && github.event.label.name == 'approved')
outputs:
runner-name: ${{ steps.start-remote-instance.outputs.label || steps.start-github-instance.outputs.runner_group }}
steps:
- name: Start remote instance
id: start-remote-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: start
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
backend: hyperstack
profile: gpu-test
# This instance will be spawned especially for pull-request from forked repository
- name: Start GitHub instance
id: start-github-instance
if: env.SECRETS_AVAILABLE == 'false'
run: |
echo "runner_group=${EXTERNAL_CONTRIBUTION_RUNNER}" >> "$GITHUB_OUTPUT"
cuda-tests-linux:
name: CUDA Code Validation tests
needs: [ setup-instance ]
if: github.event_name != 'pull_request' ||
(github.event_name == 'pull_request' && needs.setup-instance.result != 'skipped')
concurrency:
group: ${{ github.workflow_ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
runs-on: ${{ needs.setup-instance.outputs.runner-name }}
strategy:
fail-fast: false
# explicit include-based build matrix, of known valid options
matrix:
include:
- os: ubuntu-22.04
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
persist-credentials: 'false'
token: ${{ env.CHECKOUT_TOKEN }}
- name: Setup Hyperstack dependencies
uses: ./.github/actions/gpu_setup
with:
cuda-version: ${{ matrix.cuda }}
gcc-version: ${{ matrix.gcc }}
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Find tools
run: |
find /usr -executable -name "compute-sanitizer"
- name: Install latest stable
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Run memory sanitizer
run: |
make test_high_level_api_gpu_debug
slack-notify:
name: Slack Notification
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
if: ${{ always() && needs.cuda-tests-linux.result != 'skipped' && failure() }}
continue-on-error: true
steps:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ needs.cuda-tests-linux.result }}
SLACK_MESSAGE: "GPU code validation tests finished with status: ${{ needs.cuda-tests-linux.result }}. (${{ env.PULL_REQUEST_MD_LINK }}[action run](${{ env.ACTION_RUN_URL }}))"
teardown-instance:
name: Teardown instance (cuda-tests)
if: ${{ always() && needs.setup-instance.result == 'success' }}
needs: [ setup-instance, cuda-tests-linux ]
runs-on: ubuntu-latest
steps:
- name: Stop remote instance
id: stop-instance
if: env.SECRETS_AVAILABLE == 'true'
uses: zama-ai/slab-github-runner@79939325c3c429837c10d6041e4fd8589d328bac
with:
mode: stop
github-token: ${{ secrets.SLAB_ACTION_TOKEN }}
slab-url: ${{ secrets.SLAB_BASE_URL }}
job-secret: ${{ secrets.JOB_SECRET }}
label: ${{ needs.setup-instance.outputs.runner-name }}
- name: Slack Notification
if: ${{ failure() }}
continue-on-error: true
uses: rtCamp/action-slack-notify@e31e87e03dd19038e411e38ae27cbad084a90661
env:
SLACK_COLOR: ${{ job.status }}
SLACK_MESSAGE: "Instance teardown (cuda-tests) finished with status: ${{ job.status }}. (${{ env.ACTION_RUN_URL }})"

View File

@@ -122,7 +122,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -140,10 +140,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run core crypto and internal CUDA backend tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_core_crypto_gpu
@@ -172,9 +174,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -107,7 +107,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -124,10 +124,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run core crypto and internal CUDA backend tests
run: |
make test_core_crypto_gpu
@@ -156,9 +158,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -62,7 +62,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -79,10 +79,12 @@ jobs:
gcc-version: ${{ matrix.gcc }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run core crypto, integer and internal CUDA backend tests
run: |
make test_gpu

View File

@@ -109,7 +109,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -126,10 +126,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run multi-bit CUDA integer compression tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_integer_compression_gpu
@@ -137,7 +139,7 @@ jobs:
# No need to test core_crypto and classic PBS in integer since it's already tested on single GPU.
- name: Run multi-bit CUDA integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_integer_multi_bit_gpu_ci
BIG_TESTS_INSTANCE=TRUE NO_BIG_PARAMS_GPU=TRUE make test_integer_multi_bit_gpu_ci
- name: Run user docs tests
run: |
@@ -161,9 +163,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -11,6 +11,7 @@ env:
SLACK_USERNAME: ${{ secrets.BOT_USERNAME }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
IS_PR: ${{ github.event_name == 'pull_request' }}
on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
@@ -18,6 +19,8 @@ on:
schedule:
# Nightly tests will be triggered each evening 8p.m.
- cron: "0 20 * * *"
pull_request:
permissions:
contents: read
@@ -55,7 +58,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
timeout-minutes: 4320 # 72 hours
steps:
@@ -72,13 +75,19 @@ jobs:
gcc-version: ${{ matrix.gcc }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run tests
run: |
make test_integer_long_run_gpu
if [[ "${IS_PR}" == "true" ]]; then
make test_integer_short_run_gpu
else
make test_integer_long_run_gpu
fi
slack-notify:
name: Slack Notification

View File

@@ -81,16 +81,20 @@ jobs:
if: env.SECRETS_AVAILABLE == 'false'
shell: bash
run: |
TOOLKIT_VERSION="$(echo ${{ matrix.cuda }} | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
# Use Sed to extract a value from a string, this cannot be done with the ${variable//search/replace} pattern.
# shellcheck disable=SC2001
TOOLKIT_VERSION="$(echo "${CUDA_VERSION}" | sed 's/\(.*\)\.\(.*\)/\1-\2/')"
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/"${CUDA_KEYRING_PACKAGE}"
echo "${CUDA_KEYRING_SHA} ${CUDA_KEYRING_PACKAGE}" > checksum
sha256sum -c checksum
sudo dpkg -i "${CUDA_KEYRING_PACKAGE}"
sudo apt update
sudo apt -y install "cuda-toolkit-${TOOLKIT_VERSION}" cmake-format
env:
CUDA_VERSION: ${{ matrix.cuda }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -100,17 +104,21 @@ jobs:
echo "CUDA_PATH=$CUDA_PATH" >> "${GITHUB_ENV}"
echo "$CUDA_PATH/bin" >> "${GITHUB_PATH}"
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc" >> "${GITHUB_ENV}"
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc" >> "${GITHUB_ENV}"
env:
CUDA_VERSION: ${{ matrix.cuda }}
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
} >> "${GITHUB_ENV}"
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Run fmt checks
run: |
@@ -127,9 +135,10 @@ jobs:
- name: Set pull-request URL
if: ${{ failure() && github.event_name == 'pull_request' }}
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Slack Notification
if: ${{ failure() && env.SECRETS_AVAILABLE == 'true' }}

View File

@@ -109,7 +109,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -126,10 +126,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run signed integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_gpu_ci
@@ -144,9 +146,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -122,7 +122,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -140,10 +140,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run signed integer multi-bit tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_signed_integer_multi_bit_gpu_ci
@@ -158,9 +160,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -25,9 +25,6 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
permissions:
contents: read
@@ -113,7 +110,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -130,10 +127,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Should run nightly tests
if: github.event_name == 'schedule'
run: |
@@ -156,9 +155,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -109,7 +109,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -126,10 +126,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run unsigned integer tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_gpu_ci
@@ -144,9 +146,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -122,7 +122,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -140,10 +140,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Run unsigned integer multi-bit tests
run: |
BIG_TESTS_INSTANCE=TRUE make test_unsigned_integer_multi_bit_gpu_ci
@@ -158,9 +160,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -25,9 +25,6 @@ on:
# Allows you to run this workflow manually from the Actions tab as an alternative.
workflow_dispatch:
pull_request:
schedule:
# Nightly tests @ 1AM after each work day
- cron: "0 1 * * MON-FRI"
permissions:
contents: read
@@ -113,7 +110,7 @@ jobs:
matrix:
include:
- os: ubuntu-22.04
cuda: "12.2"
cuda: "12.8"
gcc: 11
steps:
- name: Checkout tfhe-rs
@@ -130,10 +127,12 @@ jobs:
github-instance: ${{ env.SECRETS_AVAILABLE == 'false' }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
- name: Enable nvidia multi-process service
run: |
nvidia-cuda-mps-control -d
- name: Should run nightly tests
if: github.event_name == 'schedule'
run: |
@@ -156,9 +155,10 @@ jobs:
- name: Set pull-request URL
if: env.SECRETS_AVAILABLE == 'true' && github.event_name == 'pull_request'
run: |
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${{ github.event.pull_request.number }}), " >> "${GITHUB_ENV}"
echo "PULL_REQUEST_MD_LINK=[pull-request](${PR_BASE_URL}${PR_NUMBER}), " >> "${GITHUB_ENV}"
env:
PR_BASE_URL: ${{ vars.PR_BASE_URL }}
PR_NUMBER: ${{ github.event.pull_request.number }}
- name: Send message
if: env.SECRETS_AVAILABLE == 'true'

View File

@@ -13,7 +13,7 @@ env:
CHECKOUT_TOKEN: ${{ secrets.REPO_CHECKOUT_TOKEN || secrets.GITHUB_TOKEN }}
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
group: ${{ github.workflow }}-${{ github.head_ref }}${{ github.ref == 'refs/heads/main' && github.sha || '' }}
cancel-in-progress: true
@@ -70,4 +70,4 @@ jobs:
source setup_hpu.sh
just -f mockups/tfhe-hpu-mockup/Justfile BUILD_PROFILE=release mockup &
make HPU_CONFIG=sim test_high_level_api_hpu
make HPU_CONFIG=sim test_user_doc_hpu

View File

@@ -57,7 +57,7 @@ jobs:
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable

View File

@@ -46,7 +46,7 @@ jobs:
token: ${{ env.CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable

View File

@@ -100,20 +100,23 @@ jobs:
run: |
echo "NPM_TAG=latest" >> "${GITHUB_ENV}"
- name: Download artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
with:
name: crate
path: target/package
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
if: ${{ inputs.push_to_crates }}
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p tfhe --token "${CRATES_TOKEN}" ${DRY_RUN}
cargo publish -p tfhe ${DRY_RUN}
- name: Generate hash
id: published_hash

View File

@@ -67,7 +67,7 @@ jobs:
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -78,19 +78,24 @@ jobs:
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc";
} >> "${GITHUB_ENV}"
env:
CUDA_VERSION: ${{ matrix.cuda }}
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Prepare package
run: |
cargo package -p tfhe-cuda-backend
@@ -129,7 +134,7 @@ jobs:
CUDA_PATH: /usr/local/cuda-${{ matrix.cuda }}
steps:
- name: Install latest stable
uses: dtolnay/rust-toolchain@888c2e1ea69ab0d4330cbf0af1ecc7b68f368cc1
uses: dtolnay/rust-toolchain@b3b07ba8b418998c39fb20f53e8b695cdcc8de1b # zizmor: ignore[stale-action-refs] this action doesn't create releases
with:
toolchain: stable
@@ -140,29 +145,37 @@ jobs:
{
echo "CUDA_PATH=$CUDA_PATH";
echo "LD_LIBRARY_PATH=$CUDA_PATH/lib:$LD_LIBRARY_PATH";
echo "CUDACXX=/usr/local/cuda-${{ matrix.cuda }}/bin/nvcc";
echo "CUDACXX=/usr/local/cuda-${CUDA_VERSION}/bin/nvcc";
} >> "${GITHUB_ENV}"
env:
CUDA_VERSION: ${{ matrix.cuda }}
# Specify the correct host compilers
- name: Export gcc and g++ variables
if: ${{ !cancelled() }}
run: |
{
echo "CC=/usr/bin/gcc-${{ matrix.gcc }}";
echo "CXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CUDAHOSTCXX=/usr/bin/g++-${{ matrix.gcc }}";
echo "CC=/usr/bin/gcc-${GCC_VERSION}";
echo "CXX=/usr/bin/g++-${GCC_VERSION}";
echo "CUDAHOSTCXX=/usr/bin/g++-${GCC_VERSION}";
echo "HOME=/home/ubuntu";
} >> "${GITHUB_ENV}"
env:
GCC_VERSION: ${{ matrix.gcc }}
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p tfhe-cuda-backend --token "${CRATES_TOKEN}" ${DRY_RUN}
cargo publish -p tfhe-cuda-backend ${DRY_RUN}
- name: Generate hash
id: published_hash

View File

@@ -74,15 +74,19 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p tfhe-hpu-backend --token "${CRATES_TOKEN}" ${DRY_RUN}
cargo publish -p tfhe-hpu-backend ${DRY_RUN}
- name: Generate hash
id: published_hash

View File

@@ -75,19 +75,22 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
with:
name: crate-tfhe-csprng
path: target/package
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p tfhe-csprng --token "${CRATES_TOKEN}" ${DRY_RUN}
cargo publish -p tfhe-csprng ${DRY_RUN}
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

View File

@@ -75,15 +75,19 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p tfhe-fft --token "${CRATES_TOKEN}" ${DRY_RUN}
cargo publish -p tfhe-fft ${DRY_RUN}
- name: Generate hash
id: published_hash

View File

@@ -75,15 +75,19 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p tfhe-ntt --token "${CRATES_TOKEN}" ${DRY_RUN}
cargo publish -p tfhe-ntt ${DRY_RUN}
- name: Generate hash
id: published_hash

View File

@@ -68,15 +68,18 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
with:
name: crate-tfhe-versionable-derive
path: target/package
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
run: |
cargo publish -p tfhe-versionable-derive --token "${CRATES_TOKEN}"
cargo publish -p tfhe-versionable-derive
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"
@@ -145,15 +148,18 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
with:
name: crate-tfhe-versionable
path: target/package
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
run: |
cargo publish -p tfhe-versionable --token "${CRATES_TOKEN}"
cargo publish -p tfhe-versionable
- name: Generate hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

View File

@@ -72,19 +72,22 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: Download artifact
uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0 # v5.0.0
with:
name: crate-zk-pok
path: target/package
- name: Authenticate on registry
uses: rust-lang/crates-io-auth-action@e919bc7605cde86df457cf5b93c5e103838bd879 # v1.0.1
id: auth
- name: Publish crate.io package
env:
CRATES_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
DRY_RUN: ${{ inputs.dry_run && '--dry-run' || '' }}
run: |
# DRY_RUN expansion cannot be double quoted when variable contains empty string otherwise cargo publish
# would fail. This is safe since DRY_RUN is handled in the env section above.
# shellcheck disable=SC2086
cargo publish -p tfhe-zk-pok --token "${CRATES_TOKEN}" ${DRY_RUN}
cargo publish -p tfhe-zk-pok ${DRY_RUN}
- name: Verify hash
id: published_hash
run: cd target/package && echo "pub_hash=$(sha256sum ./*.crate | base64 -w0)" >> "${GITHUB_OUTPUT}"

View File

@@ -21,7 +21,7 @@ jobs:
persist-credentials: 'false'
token: ${{ secrets.REPO_CHECKOUT_TOKEN }}
- name: git-sync
uses: wei/git-sync@55c6b63b4f21607da0e9877ca9b4d11a29fc6d83
uses: valtech-sd/git-sync@e734cfe9485a92e720eac5af8a4555dde5fecf88
with:
source_repo: "zama-ai/tfhe-rs"
source_branch: "main"

26
.github/workflows/unverified_prs.yml vendored Normal file
View File

@@ -0,0 +1,26 @@
name: 'Close unverified PRs'
on:
schedule:
- cron: '30 1 * * *'
permissions: {}
jobs:
stale:
runs-on: ubuntu-latest
permissions:
issues: read
pull-requests: write
steps:
- uses: actions/stale@5bef64f19d7facfb25b37b414482c7164d639639 # v9.1.0
with:
stale-pr-message: 'This PR is unverified and has been open for 2 days, it will now be closed. If you want to contribute please sign the CLA as indicated by the bot.'
days-before-stale: 2
days-before-close: 0
# We are not interested in suppressing issues so have a currently non existent label
# if we ever accept issues to become stale/closable this label will be the signal for that
only-issue-labels: can-be-auto-closed
# Only unverified PRs are an issue
exempt-pr-labels: cla-signed
# We don't want people commenting to keep an unverified PR
ignore-updates: true

6
.gitignore vendored
View File

@@ -36,7 +36,7 @@ package-lock.json
.env
__pycache__
# Dir used for backward compatibility test data
# First directive is to ignore symlinks
tests/tfhe-backward-compat-data
ci/
# In case someone clones the lattice-estimator locally to verify security
/lattice-estimator

View File

@@ -10,6 +10,7 @@ ignore:
- keys
- coverage
- utils/tfhe-lints/ui/main.stderr
- utils/tfhe-backward-compat-data/**/*.ron # ron files are autogenerated
rules:
# checks if file ends in a newline character

View File

@@ -1,18 +1,28 @@
# Specifying a path without code owners means that path won't have owners and is akin to a negation
# i.e. the `core_crypto` dir is owned and needs owner approval/review, but not the `gpu` sub dir
# See https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners#example-of-a-codeowners-file
/backends/tfhe-cuda-backend/ @agnesLeroy
/backends/tfhe-hpu-backend/ @zama-ai/hardware
/tfhe/examples/hpu @zama-ai/hardware
/tfhe/src/core_crypto/ @IceTDrinker
/tfhe/src/core_crypto/gpu
/tfhe/src/core_crypto/gpu @agnesLeroy
/tfhe/src/core_crypto/hpu @zama-ai/hardware
/tfhe/src/shortint/ @mayeul-zama
/tfhe/src/integer/ @tmontaigu
/tfhe/src/integer/gpu
/tfhe/src/integer/gpu @agnesLeroy
/tfhe/src/integer/hpu @zama-ai/hardware
/tfhe/src/high_level_api/ @tmontaigu
/Makefile @IceTDrinker @soonum
/mockups/tfhe-hpu-mockup @zama-ai/hardware
/.github/ @soonum
/CODEOWNERS @IceTDrinker

View File

@@ -170,6 +170,8 @@ On the contrary, these changes are *not* data breaking:
* Renaming a type (unless it implements the `Named` trait).
* Adding a variant to the end of an enum.
Historical data from previous TFHE-rs versions are stored inside `utils/tfhe-backward-compat-data`. They are used to check on every PR that backward compatibility has been preserved.
## Example: adding a field
Suppose you want to add an i32 field to a type named `MyType`. The original type is defined as:

View File

@@ -18,7 +18,7 @@ members = [
]
exclude = [
"tests/backward_compatibility_tests",
"utils/tfhe-backward-compat-data",
"utils/tfhe-lints",
"apps/trivium",
]
@@ -30,12 +30,13 @@ itertools = "0.14"
num-complex = "0.4"
pulp = { version = "0.21", default-features = false }
rand = "0.8"
rayon = "1"
rayon = "1.11"
serde = { version = "1.0", default-features = false }
wasm-bindgen = "0.2.100"
[profile.bench]
lto = "fat"
lto = "off"
#lto = "fat"
[profile.release]
lto = "fat"

352
Makefile
View File

@@ -22,11 +22,7 @@ BENCH_TYPE?=latency
BENCH_PARAM_TYPE?=classical
BENCH_PARAMS_SET?=default
NODE_VERSION=22.6
BACKWARD_COMPAT_DATA_URL=https://github.com/zama-ai/tfhe-backward-compat-data.git
BACKWARD_COMPAT_DATA_BRANCH?=$(shell ./scripts/backward_compat_data_version.py)
BACKWARD_COMPAT_DATA_PROJECT=tfhe-backward-compat-data
BACKWARD_COMPAT_DATA_DIR=$(BACKWARD_COMPAT_DATA_PROJECT)
TFHE_SPEC:=tfhe
BACKWARD_COMPAT_DATA_DIR=utils/tfhe-backward-compat-data
WASM_PACK_VERSION="0.13.1"
# We are kind of hacking the cut here, the version cannot contain a quote '"'
WASM_BINDGEN_VERSION:=$(shell grep '^wasm-bindgen[[:space:]]*=' Cargo.toml | cut -d '"' -f 2 | xargs)
@@ -100,6 +96,12 @@ install_rs_build_toolchain:
( echo "Unable to install $(RS_BUILD_TOOLCHAIN) toolchain, check your rustup installation. \
Rustup can be downloaded at https://rustup.rs/" && exit 1 )
.PHONY: install_rs_msrv_toolchain # Install the msrv toolchain
install_rs_msrv_toolchain:
@rustup toolchain install --profile default "$(MIN_RUST_VERSION)" || \
( echo "Unable to install $(MIN_RUST_VERSION) toolchain, check your rustup installation. \
Rustup can be downloaded at https://rustup.rs/" && exit 1 )
.PHONY: install_build_wasm32_target # Install the wasm32 toolchain used for builds
install_build_wasm32_target: install_rs_build_toolchain
rustup +$(RS_BUILD_TOOLCHAIN) target add wasm32-unknown-unknown || \
@@ -159,20 +161,24 @@ install_tarpaulin: install_rs_build_toolchain
.PHONY: install_cargo_dylint # Install custom tfhe-rs lints
install_cargo_dylint:
cargo install cargo-dylint dylint-link
cargo install --locked cargo-dylint dylint-link
.PHONY: install_typos_checker # Install typos checker
install_typos_checker: install_rs_build_toolchain
@typos --version > /dev/null 2>&1 || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install typos-cli || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked typos-cli || \
( echo "Unable to install typos-cli, unknown error." && exit 1 )
.PHONY: install_zizmor # Install zizmor workflow security checker
install_zizmor: install_rs_build_toolchain
@zizmor --version > /dev/null 2>&1 || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install zizmor || \
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked zizmor --version ~1.9 || \
( echo "Unable to install zizmor, unknown error." && exit 1 )
.PHONY: install_cargo_cross # Install cross for big endian tests
install_cargo_cross: install_rs_build_toolchain
cargo $(CARGO_RS_BUILD_TOOLCHAIN) install --locked cross
.PHONY: setup_venv # Setup Python virtualenv for wasm tests
setup_venv:
python3 -m venv venv
@@ -248,6 +254,9 @@ install_mlc: install_rs_build_toolchain
.PHONY: fmt # Format rust code
fmt: install_rs_check_toolchain
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C $(BACKWARD_COMPAT_DATA_DIR) fmt
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C utils/tfhe-lints fmt
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C apps/trivium fmt
.PHONY: fmt_js # Format javascript code
fmt_js: check_nvm_installed
@@ -269,6 +278,9 @@ fmt_c_tests:
.PHONY: check_fmt # Check rust code format
check_fmt: install_rs_check_toolchain
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" fmt --check
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C $(BACKWARD_COMPAT_DATA_DIR) fmt --check
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C utils/tfhe-lints fmt --check
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options -C apps/trivium fmt --check
.PHONY: check_fmt_c_tests # Check C tests format
check_fmt_c_tests:
@@ -296,28 +308,28 @@ clippy_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats,extended-types,zk-pok \
--all-targets \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: check_gpu # Run check on tfhe with "gpu" enabled
check_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" check \
--features=boolean,shortint,integer,internal-keycache,gpu,pbs-stats \
--all-targets \
-p $(TFHE_SPEC)
-p tfhe
.PHONY: clippy_hpu # Run clippy lints on tfhe with "hpu" enabled
clippy_hpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,hpu,pbs-stats,extended-types \
--all-targets \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_gpu_hpu # Run clippy lints on tfhe with "gpu" and "hpu" enabled
clippy_gpu_hpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean,shortint,integer,internal-keycache,gpu,hpu,pbs-stats,extended-types,zk-pok \
--all-targets \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: fix_newline # Fix newline at end of file issues to be UNIX compliant
fix_newline: check_linelint_installed
@@ -338,55 +350,55 @@ check_workflow_security: install_zizmor
.PHONY: clippy_core # Run clippy lints on core_crypto with and without experimental features
clippy_core: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=nightly-avx512 \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=experimental,nightly-avx512 \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=zk-pok \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_boolean # Run clippy lints enabling the boolean features
clippy_boolean: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_shortint # Run clippy lints enabling the shortint features
clippy_shortint: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=shortint \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=shortint,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=zk-pok,shortint \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_integer # Run clippy lints enabling the integer features
clippy_integer: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=integer \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=integer,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=integer,experimental,extended-types \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy # Run clippy lints enabling the boolean, shortint, integer
clippy: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_rustdoc # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
clippy_rustdoc: install_rs_check_toolchain
@@ -397,7 +409,7 @@ clippy_rustdoc: install_rs_check_toolchain
CARGO_TERM_QUIET=true CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental \
-p $(TFHE_SPEC)
-p tfhe
.PHONY: clippy_rustdoc_gpu # Run clippy lints on doctests enabling the boolean, shortint, integer and zk-pok
clippy_rustdoc_gpu: install_rs_check_toolchain
@@ -408,22 +420,22 @@ clippy_rustdoc_gpu: install_rs_check_toolchain
CARGO_TERM_QUIET=true CLIPPYFLAGS="-D warnings" RUSTDOCFLAGS="--no-run --nocapture --test-builder ./scripts/clippy_driver.sh -Z unstable-options" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" test --doc \
--features=boolean,shortint,integer,zk-pok,pbs-stats,strings,experimental,gpu \
-p $(TFHE_SPEC)
-p tfhe
.PHONY: clippy_c_api # Run clippy lints enabling the boolean, shortint and the C API
clippy_c_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-c-api,shortint-c-api,high-level-c-api,extended-types \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_js_wasm_api # Run clippy lints enabling the boolean, shortint, integer and the js wasm API
clippy_js_wasm_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,zk-pok,extended-types \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--features=boolean-client-js-wasm-api,shortint-client-js-wasm-api,integer-client-js-wasm-api,high-level-client-js-wasm-api,extended-types \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_tasks # Run clippy lints on helper tasks crate.
clippy_tasks: install_rs_check_toolchain
@@ -444,10 +456,10 @@ clippy_ws_tests: install_rs_check_toolchain
clippy_all_targets: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,extended-types \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings,pbs-stats,extended-types,experimental \
-p $(TFHE_SPEC) -- --no-deps -D warnings
-p tfhe -- --no-deps -D warnings
.PHONY: clippy_tfhe_csprng # Run clippy lints on tfhe-csprng
clippy_tfhe_csprng: install_rs_check_toolchain
@@ -458,6 +470,8 @@ clippy_tfhe_csprng: install_rs_check_toolchain
clippy_zk_pok: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-zk-pok -- --no-deps -D warnings
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-zk-pok --features=experimental -- --no-deps -D warnings
.PHONY: clippy_versionable # Run clippy lints on tfhe-versionable
clippy_versionable: install_rs_check_toolchain
@@ -477,10 +491,22 @@ clippy_param_dedup: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p param_dedup -- --no-deps -D warnings
.PHONY: clippy_backward_compat_data # Run clippy lints on tfhe-backward-compat-data
clippy_backward_compat_data: install_rs_check_toolchain # the toolchain is selected with toolchain.toml
@# Some old crates are x86 specific, only run in that case
@if uname -a | grep -q x86; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" -Z unstable-options \
-C $(BACKWARD_COMPAT_DATA_DIR) clippy --all-targets \
-- --no-deps -D warnings; \
else \
echo "Cannot run clippy for backward compat crate on non x86 platform for now."; \
fi
.PHONY: clippy_all # Run all clippy targets
clippy_all: clippy_rustdoc clippy clippy_boolean clippy_shortint clippy_integer clippy_all_targets \
clippy_c_api clippy_js_wasm_api clippy_tasks clippy_core clippy_tfhe_csprng clippy_zk_pok clippy_trivium \
clippy_versionable clippy_tfhe_lints clippy_ws_tests clippy_bench clippy_param_dedup
clippy_versionable clippy_tfhe_lints clippy_ws_tests clippy_bench clippy_param_dedup \
clippy_backward_compat_data
.PHONY: clippy_fast # Run main clippy targets
clippy_fast: clippy_rustdoc clippy clippy_all_targets clippy_c_api clippy_js_wasm_api clippy_tasks \
@@ -496,6 +522,12 @@ clippy_hpu_backend: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy --all-targets \
-p tfhe-hpu-backend -- --no-deps -D warnings
.PHONY: clippy_hpu_mockup # Run clippy lints on tfhe-hpu-mockup
clippy_hpu_mockup: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" clippy \
--all-targets \
-p tfhe-hpu-mockup -- --no-deps -D warnings
.PHONY: check_rust_bindings_did_not_change # Check rust bindings are up to date for tfhe-cuda-backend
check_rust_bindings_did_not_change:
cargo build -p tfhe-cuda-backend && "$(MAKE)" fmt_gpu && \
@@ -508,67 +540,79 @@ check_rust_bindings_did_not_change:
tfhe_lints: install_cargo_dylint
RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe --no-deps -- \
--features=boolean,shortint,integer,strings,zk-pok
RUSTFLAGS="$(RUSTFLAGS)" cargo dylint --all -p tfhe-zk-pok --no-deps -- \
--features=experimental
.PHONY: build_core # Build core_crypto without experimental features
build_core: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
-p $(TFHE_SPEC)
-p tfhe
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=nightly-avx512 -p $(TFHE_SPEC); \
--features=nightly-avx512 -p tfhe; \
fi
.PHONY: build_core_experimental # Build core_crypto with experimental features
build_core_experimental: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=experimental -p $(TFHE_SPEC)
--features=experimental -p tfhe
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=experimental,nightly-avx512 -p $(TFHE_SPEC); \
--features=experimental,nightly-avx512 -p tfhe; \
fi
.PHONY: build_boolean # Build with boolean enabled
build_boolean: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean -p $(TFHE_SPEC) --all-targets
--features=boolean -p tfhe --all-targets
.PHONY: build_shortint # Build with shortint enabled
build_shortint: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=shortint -p $(TFHE_SPEC) --all-targets
--features=shortint -p tfhe --all-targets
.PHONY: build_integer # Build with integer enabled
build_integer: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=integer -p $(TFHE_SPEC) --all-targets
--features=integer -p tfhe --all-targets
.PHONY: build_tfhe_full # Build with boolean, shortint and integer enabled
build_tfhe_full: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer -p $(TFHE_SPEC) --all-targets
--features=boolean,shortint,integer -p tfhe --all-targets
.PHONY: build_tfhe_coverage # Build with test coverage enabled
build_tfhe_coverage: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) --tests
--features=boolean,shortint,integer,internal-keycache -p tfhe --tests
# As of 05/08/2025 this is the set of features that can be easily compiled without additional
# toolkits
.PHONY: build_tfhe_msrv # Build with msrv compiler
build_tfhe_msrv: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo +$(MIN_RUST_VERSION) build --profile dev \
--features=boolean,extended-types,hpu,hpu-debug \
--features=hpu-v80,integer,noise-asserts \
--features=pbs-stats,shortint,strings,zk-pok -p tfhe
.PHONY: build_c_api # Build the C API for boolean, shortint and integer
build_c_api: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types \
-p $(TFHE_SPEC)
-p tfhe
.PHONY: build_c_api_gpu # Build the C API for boolean, shortint and integer
build_c_api_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,extended-types,gpu \
-p $(TFHE_SPEC)
-p tfhe
.PHONY: build_c_api_experimental_deterministic_fft # Build the C API for boolean, shortint and integer with experimental deterministic FFT
build_c_api_experimental_deterministic_fft: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) build --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api,zk-pok,experimental-force_fft_algo_dif4 \
-p $(TFHE_SPEC)
-p tfhe
.PHONY: build_web_js_api # Build the js API targeting the web browser
build_web_js_api: install_rs_build_toolchain install_wasm_pack
@@ -603,10 +647,10 @@ build_tfhe_csprng: install_rs_build_toolchain
.PHONY: test_core_crypto # Run the tests of the core_crypto module including experimental ones
test_core_crypto: install_rs_build_toolchain install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=experimental,zk-pok -p $(TFHE_SPEC) -- core_crypto::
--features=experimental,zk-pok -p tfhe -- core_crypto::
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=experimental,zk-pok,nightly-avx512 -p $(TFHE_SPEC) -- core_crypto::; \
--features=experimental,zk-pok,nightly-avx512 -p tfhe -- core_crypto::; \
fi
.PHONY: test_core_crypto_cov # Run the tests of the core_crypto module with code coverage
@@ -615,13 +659,13 @@ test_core_crypto_cov: install_rs_build_toolchain install_rs_check_toolchain inst
--out xml --output-dir coverage/core_crypto --line --engine llvm --timeout 500 \
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
--features=experimental,internal-keycache \
-p $(TFHE_SPEC) -- core_crypto::
-p tfhe -- core_crypto::
@if [[ "$(AVX512_SUPPORT)" == "ON" ]]; then \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) tarpaulin --profile $(CARGO_PROFILE) \
--out xml --output-dir coverage/core_crypto_avx512 --line --engine llvm --timeout 500 \
--implicit-test-threads $(COVERAGE_EXCLUDED_FILES) \
--features=experimental,internal-keycache,nightly-avx512 \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time core_crypto::; \
-p tfhe -- -Z unstable-options --report-time core_crypto::; \
fi
.PHONY: test_cuda_backend # Run the internal tests of the CUDA backend
@@ -638,16 +682,41 @@ test_gpu: test_core_crypto_gpu test_integer_gpu test_cuda_backend
.PHONY: test_core_crypto_gpu # Run the tests of the core_crypto module including experimental on the gpu backend
test_core_crypto_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
--features=gpu -p tfhe -- core_crypto::gpu::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=gpu -p $(TFHE_SPEC) -- core_crypto::gpu::
--features=gpu -p tfhe -- core_crypto::gpu::
.PHONY: test_integer_gpu # Run the tests of the integer module including experimental on the gpu backend
test_integer_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key:: --test-threads=4
--features=integer,gpu -p tfhe -- integer::gpu::server_key:: --test-threads=4
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::server_key::
--features=integer,gpu -p tfhe -- integer::gpu::server_key::
.PHONY: test_integer_gpu_debug # Run the tests of the integer module with Debug flags for CUDA
test_integer_gpu_debug: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile release_lto_off \
--features=integer,gpu-debug -vv -p tfhe -- integer::gpu::server_key:: --test-threads=1 --nocapture
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile release_lto_off \
--features=integer,gpu-debug -p tfhe -- integer::gpu::server_key::
.PHONY: test_high_level_api_gpu_debug # Run the tests of the integer module with Debug flags for CUDA
test_high_level_api_gpu_debug: install_rs_build_toolchain install_cargo_nextest
export RUSTFLAGS="$(RUSTFLAGS)" && \
export CARGO_RS_BUILD_TOOLCHAIN="$(CARGO_RS_BUILD_TOOLCHAIN)" && \
export TFHE_SPEC="tfhe" && \
export CARGO_PROFILE="$(CARGO_PROFILE)" && scripts/check_memory_errors.sh
.PHONY: test_integer_hl_test_gpu_check_warnings
test_integer_hl_test_gpu_check_warnings: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) build \
--features=integer,internal-keycache,gpu-debug,zk-pok -vv -p tfhe &> /tmp/gpu_compile_output
WARNINGS=$$(cat /tmp/gpu_compile_output | grep ": warning #" | grep "\[tfhe-cuda-backend" | grep -v "inline qualifier" || true) && \
if [[ "$${WARNINGS}" != "" ]]; then \
echo "FAILING BECAUSE CUDA COMPILATION WARNINGS WERE DETECTED: " && \
echo "$${WARNINGS}" && exit 1; \
fi
.PHONY: test_integer_long_run_gpu # Run the long run integer tests on the gpu backend
test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
@@ -655,21 +724,27 @@ test_integer_long_run_gpu: install_rs_check_toolchain install_cargo_nextest
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)" --backend "gpu"
--tfhe-package "tfhe" --backend "gpu"
.PHONY: test_integer_short_run_gpu # Run the long run integer tests on the gpu backend
test_integer_short_run_gpu: install_rs_check_toolchain install_cargo_nextest
TFHE_RS_TEST_LONG_TESTS_MINIMAL=TRUE \
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,gpu -p tfhe -- integer::gpu::server_key::radix::tests_long_run::test_random_op_sequence integer::gpu::server_key::radix::tests_long_run::test_signed_random_op_sequence --test-threads=1 --nocapture
.PHONY: test_integer_compression
test_integer_compression: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compressed_ciphertext_list::tests::
--features=integer -p tfhe -- integer::ciphertext::compressed_ciphertext_list::tests::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer -p $(TFHE_SPEC) -- integer::ciphertext::compress
--features=integer -p tfhe -- integer::ciphertext::compress
.PHONY: test_integer_compression_gpu
test_integer_compression_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
--features=integer,gpu -p tfhe -- integer::gpu::ciphertext::compressed_ciphertext_list::tests::
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --doc --profile $(CARGO_PROFILE) \
--features=integer,gpu -p $(TFHE_SPEC) -- integer::gpu::ciphertext::compress
--features=integer,gpu -p tfhe -- integer::gpu::ciphertext::compress
.PHONY: test_integer_gpu_ci # Run the tests for integer ci on gpu backend
test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -678,7 +753,7 @@ test_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
--tfhe-package "$(TFHE_SPEC)"
--tfhe-package "tfhe"
.PHONY: test_unsigned_integer_gpu_ci # Run the tests for unsigned integer ci on gpu backend
test_unsigned_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -687,7 +762,7 @@ test_unsigned_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
--unsigned-only --tfhe-package "tfhe"
.PHONY: test_signed_integer_gpu_ci # Run the tests for signed integer ci on gpu backend
test_signed_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -696,7 +771,7 @@ test_signed_integer_gpu_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --backend "gpu" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
--signed-only --tfhe-package "tfhe"
.PHONY: test_integer_multi_bit_gpu_ci # Run the tests for integer ci on gpu backend running only multibit tests
test_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -705,7 +780,7 @@ test_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
--tfhe-package "$(TFHE_SPEC)"
--tfhe-package "tfhe"
.PHONY: test_unsigned_integer_multi_bit_gpu_ci # Run the tests for unsigned integer ci on gpu backend running only multibit tests
test_unsigned_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -714,7 +789,7 @@ test_unsigned_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
--unsigned-only --tfhe-package "tfhe"
.PHONY: test_signed_integer_multi_bit_gpu_ci # Run the tests for signed integer ci on gpu backend running only multibit tests
test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_nextest
@@ -723,34 +798,34 @@ test_signed_integer_multi_bit_gpu_ci: install_rs_check_toolchain install_cargo_n
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --backend "gpu" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
--signed-only --tfhe-package "tfhe"
.PHONY: test_integer_hpu_ci # Run the tests for integer ci on hpu backend
test_integer_hpu_ci: install_rs_check_toolchain install_cargo_nextest
cargo test --release -p $(TFHE_SPEC) --features hpu-v80 --test hpu
cargo test --release -p tfhe --features hpu-v80 --test hpu
.PHONY: test_integer_hpu_mockup_ci # Run the tests for integer ci on hpu backend and mockup
test_integer_hpu_mockup_ci: install_rs_check_toolchain install_cargo_nextest
source ./setup_hpu.sh --config sim ; \
cargo build --release --bin hpu_mockup; \
coproc target/release/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_pfail64_psi64.toml > mockup.log; \
coproc target/release/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_pfail64_psi64.toml > mockup.log; \
HPU_TEST_ITER=1 \
cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
cargo test --profile devo -p tfhe --features hpu --test hpu -- u32 && \
kill %1
.PHONY: test_integer_hpu_mockup_ci_fast # Run the quick tests for integer ci on hpu backend and mockup.
test_integer_hpu_mockup_ci_fast: install_rs_check_toolchain install_cargo_nextest
source ./setup_hpu.sh --config sim ; \
cargo build --profile devo --bin hpu_mockup; \
coproc target/devo/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_fast.toml > mockup.log; \
coproc target/devo/hpu_mockup --params mockups/tfhe-hpu-mockup/params/tuniform_64b_fast.toml > mockup.log; \
HPU_TEST_ITER=1 \
cargo test --profile devo -p $(TFHE_SPEC) --features hpu --test hpu -- u32 && \
cargo test --profile devo -p tfhe --features hpu --test hpu -- u32 && \
kill %1
.PHONY: test_boolean # Run the tests of the boolean module
test_boolean: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean -p $(TFHE_SPEC) -- boolean::
--features=boolean -p tfhe -- boolean::
.PHONY: test_boolean_cov # Run the tests of the boolean module with code coverage
test_boolean_cov: install_rs_check_toolchain install_tarpaulin
@@ -758,13 +833,13 @@ test_boolean_cov: install_rs_check_toolchain install_tarpaulin
--out xml --output-dir coverage/boolean --line --engine llvm --timeout 500 \
$(COVERAGE_EXCLUDED_FILES) \
--features=boolean,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time boolean::
-p tfhe -- -Z unstable-options --report-time boolean::
.PHONY: test_c_api_rs # Run the rust tests for the C API
test_c_api_rs: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean-c-api,shortint-c-api,high-level-c-api \
-p $(TFHE_SPEC) \
-p tfhe \
c_api
.PHONY: test_c_api_c # Run the C tests for the C API
@@ -783,19 +858,19 @@ test_shortint_ci: install_rs_build_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "$(TFHE_SPEC)"
--cargo-profile "$(CARGO_PROFILE)" --tfhe-package "tfhe"
.PHONY: test_shortint_multi_bit_ci # Run the tests for shortint ci running only multibit tests
test_shortint_multi_bit_ci: install_rs_build_toolchain install_cargo_nextest
BIG_TESTS_INSTANCE="$(BIG_TESTS_INSTANCE)" \
FAST_TESTS="$(FAST_TESTS)" \
./scripts/shortint-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "$(TFHE_SPEC)"
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --tfhe-package "tfhe"
.PHONY: test_shortint # Run all the tests for shortint
test_shortint: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=shortint,internal-keycache -p $(TFHE_SPEC) -- shortint::
--features=shortint,internal-keycache -p tfhe -- shortint::
.PHONY: test_shortint_cov # Run the tests of the shortint module with code coverage
test_shortint_cov: install_rs_check_toolchain install_tarpaulin
@@ -803,7 +878,7 @@ test_shortint_cov: install_rs_check_toolchain install_tarpaulin
--out xml --output-dir coverage/shortint --line --engine llvm --timeout 500 \
$(COVERAGE_EXCLUDED_FILES) \
--features=shortint,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time shortint::
-p tfhe -- -Z unstable-options --report-time shortint::
.PHONY: test_integer_ci # Run the tests for integer ci
test_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -812,7 +887,7 @@ test_integer_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"
--tfhe-package "tfhe"
.PHONY: test_unsigned_integer_ci # Run the tests for unsigned integer ci
test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -821,7 +896,7 @@ test_unsigned_integer_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
--unsigned-only --tfhe-package "tfhe"
.PHONY: test_signed_integer_ci # Run the tests for signed integer ci
test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
@@ -830,7 +905,7 @@ test_signed_integer_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
--signed-only --tfhe-package "tfhe"
.PHONY: test_integer_multi_bit_ci # Run the tests for integer ci running only multibit tests
test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -839,7 +914,7 @@ test_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"
--tfhe-package "tfhe"
.PHONY: test_unsigned_integer_multi_bit_ci # Run the tests for unsigned integer ci running only multibit tests
test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -848,7 +923,7 @@ test_unsigned_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nex
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--unsigned-only --tfhe-package "$(TFHE_SPEC)"
--unsigned-only --tfhe-package "tfhe"
.PHONY: test_signed_integer_multi_bit_ci # Run the tests for signed integer ci running only multibit tests
test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nextest
@@ -857,7 +932,7 @@ test_signed_integer_multi_bit_ci: install_rs_check_toolchain install_cargo_nexte
NIGHTLY_TESTS="$(NIGHTLY_TESTS)" \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_CHECK_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --multi-bit --avx512-support "$(AVX512_SUPPORT)" \
--signed-only --tfhe-package "$(TFHE_SPEC)"
--signed-only --tfhe-package "tfhe"
.PHONY: test_integer_long_run # Run the long run integer tests
test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
@@ -865,22 +940,31 @@ test_integer_long_run: install_rs_check_toolchain install_cargo_nextest
LONG_TESTS=TRUE \
./scripts/integer-tests.sh --rust-toolchain $(CARGO_RS_BUILD_TOOLCHAIN) \
--cargo-profile "$(CARGO_PROFILE)" --avx512-support "$(AVX512_SUPPORT)" \
--tfhe-package "$(TFHE_SPEC)"
--tfhe-package "tfhe"
.PHONY: test_noise_check # Run dedicated noise and pfail check tests
test_noise_check: install_rs_check_toolchain
@# First run the sanity checks to make sure the atomic patterns are correct
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,nightly-avx512 -p tfhe -- sanity_check
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,nightly-avx512 -p tfhe -- noise_check \
--test-threads=1 --nocapture
.PHONY: test_safe_serialization # Run the tests for safe serialization
test_safe_serialization: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,internal-keycache -p $(TFHE_SPEC) -- safe_serialization::
--features=boolean,shortint,integer,internal-keycache -p tfhe -- safe_serialization::
.PHONY: test_zk # Run the tests for the zk module of the TFHE-rs crate
test_zk: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=shortint,zk-pok -p $(TFHE_SPEC) -- zk::
--features=shortint,zk-pok -p tfhe -- zk::
.PHONY: test_integer # Run all the tests for integer
test_integer: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=integer,internal-keycache -p $(TFHE_SPEC) -- integer::
--features=integer,internal-keycache -p tfhe -- integer::
.PHONY: test_integer_cov # Run the tests of the integer module with code coverage
test_integer_cov: install_rs_check_toolchain install_tarpaulin
@@ -889,17 +973,17 @@ test_integer_cov: install_rs_check_toolchain install_tarpaulin
--implicit-test-threads \
--exclude-files $(COVERAGE_EXCLUDED_FILES) \
--features=integer,internal-keycache \
-p $(TFHE_SPEC) -- -Z unstable-options --report-time integer::
-p tfhe -- -Z unstable-options --report-time integer::
.PHONY: test_high_level_api # Run all the tests for high_level_api
test_high_level_api: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p $(TFHE_SPEC) \
--features=boolean,shortint,integer,internal-keycache,zk-pok,strings -p tfhe \
-- high_level_api::
test_high_level_api_gpu: install_rs_build_toolchain install_cargo_nextest
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
--test-threads=4 --features=integer,internal-keycache,gpu,zk-pok -p tfhe \
-E "test(/high_level_api::.*gpu.*/)"
test_high_level_api_hpu: install_rs_build_toolchain install_cargo_nextest
@@ -907,13 +991,13 @@ ifeq ($(HPU_CONFIG), v80)
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
--build-jobs=$(CARGO_BUILD_JOBS) \
--test-threads=1 \
--features=integer,internal-keycache,hpu,hpu-v80 -p $(TFHE_SPEC) \
--features=integer,internal-keycache,hpu,hpu-v80 -p tfhe \
-E "test(/high_level_api::.*hpu.*/)"
else
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) nextest run --cargo-profile $(CARGO_PROFILE) \
--build-jobs=$(CARGO_BUILD_JOBS) \
--test-threads=1 \
--features=integer,internal-keycache,hpu -p $(TFHE_SPEC) \
--features=integer,internal-keycache,hpu -p tfhe \
-E "test(/high_level_api::.*hpu.*/)"
endif
@@ -921,7 +1005,7 @@ endif
.PHONY: test_strings # Run the tests for strings ci
test_strings: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=shortint,integer,strings -p $(TFHE_SPEC) \
--features=shortint,integer,strings -p tfhe \
-- strings::
@@ -929,15 +1013,27 @@ test_strings: install_rs_build_toolchain
test_user_doc: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=boolean,shortint,integer,internal-keycache,pbs-stats,zk-pok,strings \
-p $(TFHE_SPEC) \
-p tfhe \
-- test_user_docs::
.PHONY: test_user_doc_gpu # Run tests for GPU from the .md documentation
test_user_doc_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=boolean,shortint,integer,internal-keycache,gpu,zk-pok -p $(TFHE_SPEC) \
--features=internal-keycache,integer,zk-pok,gpu -p tfhe \
-- test_user_docs::
.PHONY: test_user_doc_hpu # Run tests for HPU from the .md documentation
test_user_doc_hpu: install_rs_build_toolchain
ifeq ($(HPU_CONFIG), v80)
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=internal-keycache,integer,hpu,hpu-v80 -p tfhe \
-- test_user_docs::
else
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) --doc \
--features=internal-keycache,integer,hpu -p tfhe \
-- test_user_docs::
endif
.PHONY: test_regex_engine # Run tests for regex_engine example
@@ -968,10 +1064,16 @@ test_tfhe_csprng: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-p tfhe-csprng
.PHONY: test_tfhe_csprng_big_endian # Run tfhe-csprng tests on an emulated big endian system
test_tfhe_csprng_big_endian: install_rs_build_toolchain install_cargo_cross
RUSTFLAGS="" cross $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-p tfhe-csprng --target=powerpc64-unknown-linux-gnu
.PHONY: test_zk_pok # Run tfhe-zk-pok tests
test_zk_pok: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
-p tfhe-zk-pok
-p tfhe-zk-pok --features experimental
.PHONY: test_zk_wasm_x86_compat_ci
test_zk_wasm_x86_compat_ci: check_nvm_installed
@@ -1001,16 +1103,11 @@ test_tfhe_lints: install_cargo_dylint
# Here we use the "patch" functionality of Cargo to make sure the repo used for the data is the same as the one used for the code.
.PHONY: test_backward_compatibility_ci
test_backward_compatibility_ci: install_rs_build_toolchain
TFHE_BACKWARD_COMPAT_DATA_DIR="$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--config "patch.'$(BACKWARD_COMPAT_DATA_URL)'.$(BACKWARD_COMPAT_DATA_PROJECT).path=\"tests/$(BACKWARD_COMPAT_DATA_DIR)\"" \
TFHE_BACKWARD_COMPAT_DATA_DIR="../$(BACKWARD_COMPAT_DATA_DIR)" RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --profile $(CARGO_PROFILE) \
--features=shortint,integer,zk-pok -p tests test_backward_compatibility -- --nocapture
.PHONY: test_backward_compatibility # Same as test_backward_compatibility_ci but tries to clone the data repo first if needed
test_backward_compatibility: tests/$(BACKWARD_COMPAT_DATA_DIR) test_backward_compatibility_ci
.PHONY: backward_compat_branch # Prints the required backward compatibility branch
backward_compat_branch:
@echo "$(BACKWARD_COMPAT_DATA_BRANCH)"
test_backward_compatibility: pull_backward_compat_data test_backward_compatibility_ci
.PHONY: doc # Build rust doc
doc: install_rs_check_toolchain
@@ -1018,7 +1115,7 @@ doc: install_rs_check_toolchain
DOCS_RS=1 \
RUSTDOCFLAGS="--html-in-header katex-header.html" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p $(TFHE_SPEC)
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok --no-deps -p tfhe
.PHONY: docs # Build rust doc alias for doc
docs: doc
@@ -1029,7 +1126,7 @@ lint_doc: install_rs_check_toolchain
DOCS_RS=1 \
RUSTDOCFLAGS="--html-in-header katex-header.html -Dwarnings" \
cargo "$(CARGO_RS_CHECK_TOOLCHAIN)" doc \
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p $(TFHE_SPEC) --no-deps
--features=boolean,shortint,integer,strings,gpu,internal-keycache,experimental,zk-pok -p tfhe --no-deps
.PHONY: lint_docs # Build rust doc with linting enabled alias for lint_doc
lint_docs: lint_doc
@@ -1055,6 +1152,10 @@ check_intra_md_links: install_mlc
check_md_links: install_mlc
mlc --match-file-extension tfhe/docs
.PHONY: check_doc_paths_use_dash # Check paths use "-" instead of "_" in docs for gitbook compatibility
check_doc_paths_use_dash:
python3 ./scripts/check_doc_paths_use_dash.py
.PHONY: check_parameter_export_ok # Checks exported "current" shortint parameter module is correct
check_parameter_export_ok:
python3 ./scripts/check_current_param_export.py
@@ -1063,7 +1164,7 @@ check_parameter_export_ok:
check_compile_tests: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--features=experimental,boolean,shortint,integer,internal-keycache \
-p $(TFHE_SPEC)
-p tfhe
@if [[ "$(OS)" == "Linux" || "$(OS)" == "Darwin" ]]; then \
"$(MAKE)" build_c_api && \
@@ -1074,7 +1175,7 @@ check_compile_tests: install_rs_build_toolchain
check_compile_tests_benches_gpu: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --no-run \
--features=experimental,boolean,shortint,integer,internal-keycache,gpu,zk-pok \
-p $(TFHE_SPEC)
-p tfhe
mkdir -p "$(TFHECUDA_BUILD)" && \
cd "$(TFHECUDA_BUILD)" && \
cmake .. -DCMAKE_BUILD_TYPE=Debug -DTFHE_CUDA_BACKEND_BUILD_TESTS=ON -DTFHE_CUDA_BACKEND_BUILD_BENCHMARKS=ON && \
@@ -1412,6 +1513,20 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench -p tfhe-zk-pok --
.PHONY: bench_hlapi_noise_squash # Run benchmarks for noise squash operation
bench_hlapi_noise_squash: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
.PHONY: bench_hlapi_noise_squash_gpu # Run benchmarks for noise squash operation on GPU
bench_hlapi_noise_squash_gpu: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" __TFHE_RS_BENCH_TYPE=$(BENCH_TYPE) \
cargo $(CARGO_RS_CHECK_TOOLCHAIN) bench \
--bench hlapi-noise-squash \
--features=integer,gpu,internal-keycache,pbs-stats,nightly-avx512 -p tfhe-benchmark --
#
# Utility tools
#
@@ -1420,13 +1535,13 @@ bench_tfhe_zk_pok: install_rs_check_toolchain
gen_key_cache: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS) --cfg tarpaulin" cargo $(CARGO_RS_BUILD_TOOLCHAIN) run --profile $(CARGO_PROFILE) \
--example generates_test_keys \
--features=boolean,shortint,experimental,internal-keycache -p $(TFHE_SPEC) \
--features=boolean,shortint,experimental,internal-keycache -p tfhe \
-- $(MULTI_BIT_ONLY) $(COVERAGE_ONLY)
.PHONY: gen_key_cache_core_crypto # Run function to generate keys and cache them for core_crypto tests
gen_key_cache_core_crypto: install_rs_build_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_BUILD_TOOLCHAIN) test --tests --profile $(CARGO_PROFILE) \
--features=experimental,internal-keycache -p $(TFHE_SPEC) -- --nocapture \
--features=experimental,internal-keycache -p tfhe -- --nocapture \
core_crypto::keycache::generate_keys
.PHONY: measure_hlapi_compact_pk_ct_sizes # Measure sizes of public keys and ciphertext for high-level API
@@ -1467,13 +1582,15 @@ parse_wasm_benchmarks: install_rs_check_toolchain
.PHONY: write_params_to_file # Gather all crypto parameters into a file with a Sage readable format.
write_params_to_file: install_rs_check_toolchain
RUSTFLAGS="$(RUSTFLAGS)" cargo $(CARGO_RS_CHECK_TOOLCHAIN) run \
--example write_params_to_file --features=boolean,shortint,internal-keycache
--example write_params_to_file --features=boolean,shortint,hpu,internal-keycache
.PHONY: clone_backward_compat_data # Clone the data repo needed for backward compatibility tests
clone_backward_compat_data:
./scripts/clone_backward_compat_data.sh $(BACKWARD_COMPAT_DATA_URL) $(BACKWARD_COMPAT_DATA_BRANCH) tests/$(BACKWARD_COMPAT_DATA_DIR)
.PHONY: pull_backward_compat_data # Pull the data files needed for backward compatibility tests
pull_backward_compat_data:
./scripts/pull_lfs_data.sh $(BACKWARD_COMPAT_DATA_DIR)
tests/$(BACKWARD_COMPAT_DATA_DIR): clone_backward_compat_data
.PHONY: pull_hpu_files # Pull the hpu files
pull_hpu_files:
./scripts/pull_lfs_data.sh backends/tfhe-hpu-backend/
#
# Real use case examples
@@ -1499,19 +1616,20 @@ sha256_bool: install_rs_check_toolchain
.PHONY: pcc # pcc stands for pre commit checks (except GPU)
pcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
check_md_docs_are_tested check_intra_md_links clippy_all check_compile_tests test_tfhe_lints \
check_md_docs_are_tested check_intra_md_links check_doc_paths_use_dash \
clippy_all check_compile_tests test_tfhe_lints \
tfhe_lints
.PHONY: pcc_gpu # pcc stands for pre commit checks for GPU compilation
pcc_gpu: check_rust_bindings_did_not_change clippy_rustdoc_gpu \
clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu
clippy_gpu clippy_cuda_backend clippy_bench_gpu check_compile_tests_benches_gpu test_integer_hl_test_gpu_check_warnings
.PHONY: pcc_hpu # pcc stands for pre commit checks for HPU compilation
pcc_hpu: clippy_hpu clippy_hpu_backend test_integer_hpu_mockup_ci_fast
pcc_hpu: clippy_hpu clippy_hpu_backend clippy_hpu_mockup test_integer_hpu_mockup_ci_fast
.PHONY: fpcc # pcc stands for pre commit checks, the f stands for fast
fpcc: no_tfhe_typo no_dbg_log check_parameter_export_ok check_fmt check_typos lint_doc \
check_md_docs_are_tested clippy_fast check_compile_tests
check_md_docs_are_tested check_intra_md_links check_doc_paths_use_dash clippy_fast check_compile_tests
.PHONY: conformance # Automatically fix problems that can be fixed
conformance: fix_newline fmt fmt_js

View File

@@ -18,6 +18,7 @@
<a href="https://github.com/zama-ai/tfhe-rs/releases"><img src="https://img.shields.io/github/v/release/zama-ai/tfhe-rs?style=flat-square"></a>
<a href="LICENSE"><img src="https://img.shields.io/badge/License-BSD--3--Clause--Clear-%23ffb243?style=flat-square"></a>
<a href="https://github.com/zama-ai/bounty-program"><img src="https://img.shields.io/badge/Contribute-Zama%20Bounty%20Program-%23ffd208?style=flat-square"></a>
<a href="https://slsa.dev"><img alt="SLSA 3" src="https://slsa.dev/images/gh-badge-level3.svg" /></a>
</p>
## About
@@ -148,7 +149,7 @@ To run this code, use the following command:
> Note that when running code that uses `TFHE-rs`, it is highly recommended
to run in release mode with cargo's `--release` flag to have the best performances possible.
*Find an example with more explanations in [this part of the documentation](https://docs.zama.ai/tfhe-rs/get-started/quick_start)*
*Find an example with more explanations in [this part of the documentation](https://docs.zama.ai/tfhe-rs/get-started/quick-start)*
<p align="right">
<a href="#about" > ↑ Back to top </a>
@@ -203,7 +204,7 @@ When a new update is published in the Lattice Estimator, we update parameters ac
By default, the parameter sets used in the High-Level API with the x86 CPU backend have a failure probability $\le 2^{128}$ to securely work in the IND-CPA^D model using the algorithmic techniques provided in our code base [1].
If you want to work within the IND-CPA security model, which is less strict than the IND-CPA-D model, the parameter sets can easily be changed and would have slightly better performance. More details can be found in the [TFHE-rs documentation](https://docs.zama.ai/tfhe-rs).
The default parameters used in the High-Level API with the GPU backend are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at $p_{error} \le 2^{-64}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [2].
The default parameters used in the High-Level API with the GPU backend are chosen considering the IND-CPA security model, and are selected with a bootstrapping failure probability fixed at $p_{error} \le 2^{-128}$. In particular, it is assumed that the results of decrypted computations are not shared by the secret key owner with any third parties, as such an action can lead to leakage of the secret encryption key. If you are designing an application where decryptions must be shared, you will need to craft custom encryption parameters which are chosen in consideration of the IND-CPA^D security model [2].
[1] Bernard, Olivier, et al. "Drifting Towards Better Error Probabilities in Fully Homomorphic Encryption Schemes". https://eprint.iacr.org/2024/1718.pdf

View File

@@ -129,7 +129,7 @@ Other sizes than 64 bit are expected to be available in the future.
# FHE shortint Trivium implementation
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
The same implementation is also available for generic Ciphertexts representing bits (meant to be used with parameters `V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128`).
It uses a lower level API of tfhe-rs, so the syntax is a little bit different. It also implements the `TransCiphering` trait. For optimization purposes, it does not internally run
on the same cryptographic parameters as the high level API of tfhe-rs. As such, it requires the usage of a casting key, to switch from one parameter space to another, which makes
its setup a little more intricate.
@@ -137,10 +137,10 @@ its setup a little more intricate.
Example code:
```rust
use tfhe::shortint::prelude::*;
use tfhe::shortint::parameters::v1_2::{
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
use tfhe::shortint::parameters::current_params::{
V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
};
use tfhe::{ConfigBuilder, generate_keys, FheUint64};
use tfhe::prelude::*;
@@ -148,17 +148,17 @@ use tfhe_trivium::TriviumStreamShortint;
fn test_shortint() {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let (client_key, server_key): (ClientKey, ServerKey) = gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128_2M128,
);
let key_string = "0053A6F94C9FF24598EB".to_string();

View File

@@ -1,9 +1,9 @@
use criterion::Criterion;
use tfhe::prelude::*;
use tfhe::shortint::parameters::v1_2::{
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
use tfhe::shortint::parameters::current_params::{
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
};
use tfhe::shortint::prelude::*;
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
@@ -11,19 +11,19 @@ use tfhe_trivium::{KreyviumStreamShortint, TransCiphering};
pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -64,19 +64,19 @@ pub fn kreyvium_shortint_warmup(c: &mut Criterion) {
pub fn kreyvium_shortint_gen(c: &mut Criterion) {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();
@@ -112,19 +112,19 @@ pub fn kreyvium_shortint_gen(c: &mut Criterion) {
pub fn kreyvium_shortint_trans(c: &mut Criterion) {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();

View File

@@ -1,9 +1,9 @@
use criterion::Criterion;
use tfhe::prelude::*;
use tfhe::shortint::parameters::v1_2::{
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
use tfhe::shortint::parameters::current_params::{
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
};
use tfhe::shortint::prelude::*;
use tfhe::{generate_keys, ConfigBuilder, FheUint64};
@@ -11,19 +11,19 @@ use tfhe_trivium::{TransCiphering, TriviumStreamShortint};
pub fn trivium_shortint_warmup(c: &mut Criterion) {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -64,19 +64,19 @@ pub fn trivium_shortint_warmup(c: &mut Criterion) {
pub fn trivium_shortint_gen(c: &mut Criterion) {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB".to_string();
@@ -112,19 +112,19 @@ pub fn trivium_shortint_gen(c: &mut Criterion) {
pub fn trivium_shortint_trans(c: &mut Criterion) {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB".to_string();

View File

@@ -1,9 +1,9 @@
use crate::{KreyviumStream, KreyviumStreamByte, KreyviumStreamShortint, TransCiphering};
use tfhe::prelude::*;
use tfhe::shortint::parameters::v1_2::{
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
use tfhe::shortint::parameters::current_params::{
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
};
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
// Values for these tests come from the github repo renaud1239/Kreyvium,
@@ -221,19 +221,19 @@ use tfhe::shortint::prelude::*;
#[test]
fn kreyvium_test_shortint_long() {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB000000000000".to_string();

View File

@@ -1,9 +1,9 @@
use crate::{TransCiphering, TriviumStream, TriviumStreamByte, TriviumStreamShortint};
use tfhe::prelude::*;
use tfhe::shortint::parameters::v1_2::{
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
use tfhe::shortint::parameters::current_params::{
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128,
};
use tfhe::{generate_keys, ConfigBuilder, FheBool, FheUint64, FheUint8};
// Values for these tests come from the github repo cantora/avr-crypto-lib, commit 2a5b018,
@@ -357,19 +357,19 @@ use tfhe::shortint::prelude::*;
#[test]
fn trivium_test_shortint_long() {
let config = ConfigBuilder::default()
.use_custom_parameters(V1_2_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.use_custom_parameters(V1_3_PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128)
.build();
let (hl_client_key, hl_server_key) = generate_keys(config);
let underlying_ck: tfhe::shortint::ClientKey = (*hl_client_key.as_ref()).clone().into();
let underlying_sk: tfhe::shortint::ServerKey = (*hl_server_key.as_ref()).clone().into();
let (client_key, server_key): (ClientKey, ServerKey) =
gen_keys(V1_2_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
gen_keys(V1_3_PARAM_MESSAGE_1_CARRY_1_KS_PBS_GAUSSIAN_2M128);
let ksk = KeySwitchingKey::new(
(&client_key, Some(&server_key)),
(&underlying_ck, &underlying_sk),
V1_2_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
V1_3_PARAM_KEYSWITCH_1_1_KS_PBS_TO_2_2_KS_PBS_GAUSSIAN_2M128,
);
let key_string = "0053A6F94C9FF24598EB".to_string();

View File

@@ -1,6 +1,6 @@
[package]
name = "tfhe-cuda-backend"
version = "0.10.0"
version = "0.11.0"
edition = "2021"
authors = ["Zama team"]
license = "BSD-3-Clause-Clear"
@@ -19,3 +19,4 @@ bindgen = "0.71"
[features]
experimental-multi-arch = []
profile = []
debug = []

View File

@@ -53,6 +53,10 @@ fn main() {
cmake_config.define("USE_NVTOOLS", "OFF");
}
if cfg!(feature = "debug") {
cmake_config.define("CMAKE_BUILD_TYPE", "Debug");
}
// Build the CMake project
let dest = cmake_config.build();
println!("cargo:rustc-link-search=native={}", dest.display());

View File

@@ -52,6 +52,8 @@ endif()
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
else()
message("Building CUDA backend in ${CMAKE_BUILD_TYPE}")
endif()
# Add OpenMP support
@@ -76,8 +78,10 @@ endif()
add_compile_definitions(CUDA_ARCH=${CUDA_ARCH})
string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LOWERCASE)
# Check if the DEBUG flag is defined
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
if(CMAKE_BUILD_TYPE_LOWERCASE STREQUAL "debug")
# Debug mode
message("Compiling in Debug mode")
add_definitions(-DDEBUG)

View File

@@ -26,6 +26,7 @@ inline void cuda_error(cudaError_t code, const char *file, int line) {
std::abort(); \
}
uint32_t cuda_get_device();
void cuda_set_device(uint32_t gpu_index);
cudaEvent_t cuda_create_event(uint32_t gpu_index);
@@ -49,12 +50,13 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index);
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
uint32_t gpu_index,
uint64_t *size_tracker,
uint64_t &size_tracker,
bool allocate_gpu_memory);
void *cuda_malloc_async(uint64_t size, cudaStream_t stream, uint32_t gpu_index);
bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index);
uint64_t cuda_device_total_memory(uint32_t gpu_index);
void cuda_memcpy_with_size_tracking_async_to_gpu(void *dest, const void *src,
uint64_t size,

View File

@@ -6,6 +6,7 @@
extern std::mutex m;
extern bool p2p_enabled;
extern const int THRESHOLD_MULTI_GPU;
extern "C" {
int32_t cuda_setup_multi_gpu(int device_0_id);
@@ -24,7 +25,15 @@ using LweArrayVariant = std::variant<std::vector<Torus *>, Torus *>;
return std::get<Torus *>(variant); \
} \
}()
// Macro to define the visitor logic using std::holds_alternative for vectors
#define GET_VARIANT_ELEMENT_64BIT(variant, index) \
[&] { \
if (std::holds_alternative<std::vector<uint64_t *>>(variant)) { \
return std::get<std::vector<uint64_t *>>(variant)[index]; \
} else { \
return std::get<uint64_t *>(variant); \
} \
}()
int get_active_gpu_count(int num_inputs, int gpu_count);
int get_num_inputs_on_gpu(int total_num_inputs, int gpu_index, int gpu_count);

View File

@@ -3,6 +3,26 @@
#include "../../pbs/pbs_enums.h"
typedef struct {
void *ptr;
uint32_t num_radix_blocks;
uint32_t lwe_dimension;
} CudaLweCiphertextListFFI;
typedef struct {
void *ptr;
uint32_t storage_log_modulus;
uint32_t lwe_per_glwe;
// Input LWEs are grouped by groups of `lwe_per_glwe`(the last group may be
// smaller)
// Each group is then packed into one GLWE with `lwe_per_glwe` bodies (one for
// each LWE of the group). In the end the total number of bodies is equal to
// the number of input LWE
uint32_t total_lwe_bodies_count;
uint32_t glwe_dimension;
uint32_t polynomial_size;
} CudaPackedGlweCiphertextListFFI;
extern "C" {
uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -10,28 +30,29 @@ uint64_t scratch_cuda_integer_compress_radix_ciphertext_64(
uint32_t compression_polynomial_size, uint32_t lwe_dimension,
uint32_t ks_level, uint32_t ks_base_log, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t lwe_per_glwe, uint32_t storage_log_modulus,
bool allocate_gpu_memory);
uint32_t lwe_per_glwe, bool allocate_gpu_memory);
uint64_t scratch_cuda_integer_decompress_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t encryption_glwe_dimension,
uint32_t encryption_polynomial_size, uint32_t compression_glwe_dimension,
uint32_t compression_polynomial_size, uint32_t lwe_dimension,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t num_radix_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t storage_log_modulus, uint32_t body_count, bool allocate_gpu_memory,
uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t num_blocks_to_decompress, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, bool allocate_gpu_memory,
bool allocate_ms_array);
void cuda_integer_compress_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *glwe_array_out, void const *lwe_array_in, void *const *fp_ksk,
uint32_t num_nths, int8_t *mem_ptr);
CudaPackedGlweCiphertextListFFI *glwe_array_out,
CudaLweCiphertextListFFI const *lwe_array_in, void *const *fp_ksk,
int8_t *mem_ptr);
void cuda_integer_decompress_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
void *lwe_array_out, void const *glwe_in, uint32_t const *indexes_array,
uint32_t indexes_array_size, void *const *bsks, int8_t *mem_ptr);
CudaLweCiphertextListFFI *lwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_in,
uint32_t const *indexes_array, void *const *bsks, int8_t *mem_ptr);
void cleanup_cuda_integer_compress_radix_ciphertext_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,

View File

@@ -5,11 +5,6 @@
template <typename Torus> struct int_compression {
int_radix_params compression_params;
uint32_t storage_log_modulus;
uint32_t lwe_per_glwe;
uint32_t body_count;
// Compression
int8_t *fp_ks_buffer;
Torus *tmp_lwe;
@@ -19,16 +14,12 @@ template <typename Torus> struct int_compression {
int_compression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, int_radix_params compression_params,
uint32_t num_radix_blocks, uint32_t lwe_per_glwe,
uint32_t storage_log_modulus, bool allocate_gpu_memory,
uint64_t *size_tracker) {
bool allocate_gpu_memory, uint64_t &size_tracker) {
gpu_memory_allocated = allocate_gpu_memory;
this->compression_params = compression_params;
this->lwe_per_glwe = lwe_per_glwe;
this->storage_log_modulus = storage_log_modulus;
this->body_count = num_radix_blocks;
Torus glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
compression_params.polynomial_size;
uint64_t glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
compression_params.polynomial_size;
tmp_lwe = (Torus *)cuda_malloc_with_size_tracking_async(
num_radix_blocks * (compression_params.small_lwe_dimension + 1) *
@@ -38,7 +29,7 @@ template <typename Torus> struct int_compression {
lwe_per_glwe * glwe_accumulator_size * sizeof(Torus), streams[0],
gpu_indexes[0], size_tracker, allocate_gpu_memory);
*size_tracker += scratch_packing_keyswitch_lwe_list_to_glwe_64(
size_tracker += scratch_packing_keyswitch_lwe_list_to_glwe_64(
streams[0], gpu_indexes[0], &fp_ks_buffer,
compression_params.small_lwe_dimension,
compression_params.glwe_dimension, compression_params.polynomial_size,
@@ -58,11 +49,7 @@ template <typename Torus> struct int_compression {
template <typename Torus> struct int_decompression {
int_radix_params encryption_params;
int_radix_params compression_params;
uint32_t storage_log_modulus;
uint32_t num_radix_blocks;
uint32_t body_count;
uint32_t num_blocks_to_decompress;
Torus *tmp_extracted_glwe;
Torus *tmp_extracted_lwe;
@@ -74,34 +61,31 @@ template <typename Torus> struct int_decompression {
int_decompression(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count, int_radix_params encryption_params,
int_radix_params compression_params,
uint32_t num_radix_blocks, uint32_t body_count,
uint32_t storage_log_modulus, bool allocate_gpu_memory,
uint64_t *size_tracker) {
uint32_t num_blocks_to_decompress, bool allocate_gpu_memory,
uint64_t &size_tracker) {
gpu_memory_allocated = allocate_gpu_memory;
this->encryption_params = encryption_params;
this->compression_params = compression_params;
this->storage_log_modulus = storage_log_modulus;
this->num_radix_blocks = num_radix_blocks;
this->body_count = body_count;
this->num_blocks_to_decompress = num_blocks_to_decompress;
Torus glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
compression_params.polynomial_size;
Torus lwe_accumulator_size = (compression_params.glwe_dimension *
compression_params.polynomial_size +
1);
uint64_t glwe_accumulator_size = (compression_params.glwe_dimension + 1) *
compression_params.polynomial_size;
uint64_t lwe_accumulator_size = (compression_params.glwe_dimension *
compression_params.polynomial_size +
1);
decompression_rescale_lut = new int_radix_lut<Torus>(
streams, gpu_indexes, gpu_count, encryption_params, 1, num_radix_blocks,
allocate_gpu_memory, size_tracker);
streams, gpu_indexes, gpu_count, encryption_params, 1,
num_blocks_to_decompress, allocate_gpu_memory, size_tracker);
tmp_extracted_glwe = (Torus *)cuda_malloc_with_size_tracking_async(
num_radix_blocks * glwe_accumulator_size * sizeof(Torus), streams[0],
gpu_indexes[0], size_tracker, allocate_gpu_memory);
num_blocks_to_decompress * glwe_accumulator_size * sizeof(Torus),
streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
tmp_indexes_array = (uint32_t *)cuda_malloc_with_size_tracking_async(
num_radix_blocks * sizeof(uint32_t), streams[0], gpu_indexes[0],
num_blocks_to_decompress * sizeof(uint32_t), streams[0], gpu_indexes[0],
size_tracker, allocate_gpu_memory);
tmp_extracted_lwe = (Torus *)cuda_malloc_with_size_tracking_async(
num_radix_blocks * lwe_accumulator_size * sizeof(Torus), streams[0],
gpu_indexes[0], size_tracker, allocate_gpu_memory);
num_blocks_to_decompress * lwe_accumulator_size * sizeof(Torus),
streams[0], gpu_indexes[0], size_tracker, allocate_gpu_memory);
// Rescale is done using an identity LUT
// Here we do not divide by message_modulus
@@ -124,7 +108,7 @@ template <typename Torus> struct int_decompression {
encryption_params.carry_modulus, decompression_rescale_f,
gpu_memory_allocated);
decompression_rescale_lut->broadcast_lut(streams, gpu_indexes, 0);
decompression_rescale_lut->broadcast_lut(streams, gpu_indexes);
}
void release(cudaStream_t const *streams, uint32_t const *gpu_indexes,
uint32_t gpu_count) {

View File

@@ -37,6 +37,10 @@ enum SIGNED_OPERATION { ADDITION = 1, SUBTRACTION = -1 };
enum outputFlag { FLAG_NONE = 0, FLAG_OVERFLOW = 1, FLAG_CARRY = 2 };
enum Direction { Trailing = 0, Leading = 1 };
enum BitValue { Zero = 0, One = 1 };
extern "C" {
typedef struct {
@@ -48,6 +52,34 @@ typedef struct {
uint32_t lwe_dimension;
} CudaRadixCiphertextFFI;
typedef struct {
uint64_t const *chosen_multiplier_has_at_least_one_set;
uint64_t const *decomposed_chosen_multiplier;
uint32_t const num_scalars;
uint32_t const active_bits;
uint64_t const shift_pre;
uint32_t const shift_post;
uint32_t const ilog2_chosen_multiplier;
uint32_t const chosen_multiplier_num_bits;
bool const is_chosen_multiplier_zero;
bool const is_abs_chosen_multiplier_one;
bool const is_chosen_multiplier_negative;
bool const is_chosen_multiplier_pow2;
bool const chosen_multiplier_has_more_bits_than_numerator;
// if signed: test if chosen_multiplier >= 2^{num_bits - 1}
bool const is_chosen_multiplier_geq_two_pow_numerator;
uint32_t const ilog2_divisor;
bool const is_divisor_zero;
bool const is_abs_divisor_one;
bool const is_divisor_negative;
bool const is_divisor_pow2;
bool const divisor_has_more_bits_than_numerator;
} CudaScalarDivisorFFI;
uint64_t scratch_cuda_apply_univariate_lut_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, void const *input_lut, uint32_t lwe_dimension,
@@ -395,7 +427,8 @@ uint64_t scratch_cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks_in_radix, uint32_t max_num_radix_in_vec,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, bool allocate_ms_array);
bool reduce_degrees_for_single_carry_propagation, bool allocate_gpu_memory,
bool allocate_ms_array);
void cuda_integer_radix_partial_sum_ciphertexts_vec_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -414,7 +447,8 @@ uint64_t scratch_cuda_integer_scalar_mul_kb_64(
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, bool allocate_gpu_memory, bool allocate_ms_array);
PBS_TYPE pbs_type, uint32_t num_scalar_bits, bool allocate_gpu_memory,
bool allocate_ms_array);
void cuda_scalar_multiplication_integer_radix_ciphertext_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
@@ -538,5 +572,232 @@ void cleanup_cuda_integer_is_at_least_one_comparisons_block_true(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
void extend_radix_with_trivial_zero_blocks_msb_64(
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
void *const *streams, uint32_t const *gpu_indexes);
void trim_radix_blocks_lsb_64(CudaRadixCiphertextFFI *output,
CudaRadixCiphertextFFI const *input,
void *const *streams,
uint32_t const *gpu_indexes);
uint64_t scratch_cuda_apply_noise_squashing_kb(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t input_glwe_dimension,
uint32_t input_polynomial_size, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_radix_blocks, uint32_t num_original_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, bool allocate_ms_array);
void cuda_apply_noise_squashing_kb(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_radix_lwe,
CudaRadixCiphertextFFI const *input_radix_lwe, int8_t *mem_ptr,
void *const *ksks,
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
void *const *bsks);
void cleanup_cuda_apply_noise_squashing_kb(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_sub_and_propagate_single_carry_kb_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t big_lwe_dimension, uint32_t small_lwe_dimension, uint32_t ks_level,
uint32_t ks_base_log, uint32_t pbs_level, uint32_t pbs_base_log,
uint32_t grouping_factor, uint32_t num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, uint32_t requested_flag,
bool allocate_gpu_memory, bool allocate_ms_array);
void cuda_sub_and_propagate_single_carry_kb_64_inplace(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *lhs_array, const CudaRadixCiphertextFFI *rhs_array,
CudaRadixCiphertextFFI *carry_out, const CudaRadixCiphertextFFI *carry_in,
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
uint32_t requested_flag, uint32_t uses_carry);
void cleanup_cuda_sub_and_propagate_single_carry(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_unsigned_scalar_div_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
bool allocate_gpu_memory, bool allocate_ms_array);
void cuda_integer_unsigned_scalar_div_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *numerator_ct, int8_t *mem_ptr, void *const *bsks,
void *const *ksks,
const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
const CudaScalarDivisorFFI *scalar_divisor_ffi);
void cleanup_cuda_integer_unsigned_scalar_div_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_extend_radix_with_sign_msb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t num_additional_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, bool allocate_ms_array);
void cuda_extend_radix_with_sign_msb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output, CudaRadixCiphertextFFI const *input,
int8_t *mem_ptr, uint32_t num_additional_blocks, void *const *bsks,
void *const *ksks,
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
void cleanup_cuda_extend_radix_with_sign_msb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_signed_scalar_div_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
bool allocate_gpu_memory, bool allocate_ms_array);
void cuda_integer_signed_scalar_div_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *numerator_ct, int8_t *mem_ptr, void *const *bsks,
void *const *ksks,
const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
const CudaScalarDivisorFFI *scalar_divisor_ffi, uint32_t numerator_bits);
void cleanup_cuda_integer_signed_scalar_div_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_integer_unsigned_scalar_div_rem_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
bool allocate_ms_array);
void cuda_integer_unsigned_scalar_div_rem_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *quotient_ct, CudaRadixCiphertextFFI *remainder_ct,
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key,
const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint64_t const *divisor_has_at_least_one_set,
uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
void const *clear_blocks, void const *h_clear_blocks,
uint32_t num_clear_blocks);
void cleanup_cuda_integer_unsigned_scalar_div_rem_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_integer_signed_scalar_div_rem_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t message_modulus, uint32_t carry_modulus,
PBS_TYPE pbs_type, const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint32_t const active_bits_divisor, bool allocate_gpu_memory,
bool allocate_ms_array);
void cuda_integer_signed_scalar_div_rem_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *quotient_ct, CudaRadixCiphertextFFI *remainder_ct,
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
const CudaScalarDivisorFFI *scalar_divisor_ffi,
uint64_t const *divisor_has_at_least_one_set,
uint64_t const *decomposed_divisor, uint32_t const num_scalars_divisor,
uint32_t numerator_bits);
void cleanup_cuda_integer_signed_scalar_div_rem_radix_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_integer_count_of_consecutive_bits_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks, uint32_t counter_num_blocks, uint32_t message_modulus,
uint32_t carry_modulus, PBS_TYPE pbs_type, Direction direction,
BitValue bit_value, bool allocate_gpu_memory, bool allocate_ms_array);
void cuda_integer_count_of_consecutive_bits_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_ct, CudaRadixCiphertextFFI const *input_ct,
int8_t *mem_ptr, void *const *bsks, void *const *ksks,
const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key);
void cleanup_cuda_integer_count_of_consecutive_bits_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_cuda_integer_grouped_oprf_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t num_blocks_to_process, uint32_t num_blocks,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
bool allocate_gpu_memory, uint32_t message_bits_per_block,
uint32_t total_random_bits, bool allocate_ms_array);
void cuda_integer_grouped_oprf_async_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *radix_lwe_out, const void *seeded_lwe_input,
uint32_t num_blocks_to_process, int8_t *mem, void *const *bsks,
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key);
void cleanup_cuda_integer_grouped_oprf_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count,
int8_t **mem_ptr_void);
uint64_t scratch_integer_ilog2_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
int8_t **mem_ptr, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t lwe_dimension, uint32_t ks_level, uint32_t ks_base_log,
uint32_t pbs_level, uint32_t pbs_base_log, uint32_t grouping_factor,
uint32_t message_modulus, uint32_t carry_modulus, PBS_TYPE pbs_type,
uint32_t input_num_blocks, uint32_t counter_num_blocks,
uint32_t num_bits_in_ciphertext, bool allocate_gpu_memory,
bool allocate_ms_array);
void cuda_integer_ilog2_kb_64(
void *const *streams, uint32_t const *gpu_indexes, uint32_t gpu_count,
CudaRadixCiphertextFFI *output_ct, CudaRadixCiphertextFFI const *input_ct,
CudaRadixCiphertextFFI const *trivial_ct_neg_n,
CudaRadixCiphertextFFI const *trivial_ct_2,
CudaRadixCiphertextFFI const *trivial_ct_m_minus_1_block, int8_t *mem_ptr,
void *const *bsks, void *const *ksks,
const CudaModulusSwitchNoiseReductionKeyFFI *ms_noise_reduction_key);
void cleanup_cuda_integer_ilog2_kb_64(void *const *streams,
uint32_t const *gpu_indexes,
uint32_t gpu_count,
int8_t **mem_ptr_void);
} // extern C
#endif // CUDA_INTEGER_H

View File

@@ -0,0 +1,13 @@
#ifndef CUDA_BOOTSTRAP_128_H
#define CUDA_BOOTSTRAP_128_H
#include "pbs_enums.h"
#include <stdint.h>
uint64_t scratch_cuda_programmable_bootstrap_128_vector_64(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t lwe_dimension, uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
bool allocate_gpu_memory, bool allocate_ms_array);
#endif // CUDA_BOOTSTRAP_128_H

View File

@@ -66,6 +66,9 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector(
uint32_t num_many_lut, uint32_t lut_stride);
template <typename Torus>
uint64_t get_buffer_size_full_sm_multibit_programmable_bootstrap_128_keybundle(
uint32_t polynomial_size);
template <typename Torus>
uint64_t get_buffer_size_full_sm_multibit_programmable_bootstrap_keybundle(
uint32_t polynomial_size);
template <typename Torus>
@@ -95,8 +98,12 @@ uint64_t get_buffer_size_full_sm_tbc_multibit_programmable_bootstrap(
template <typename Torus, class params>
uint32_t get_lwe_chunk_size(uint32_t gpu_index, uint32_t max_num_pbs,
uint32_t polynomial_size);
uint32_t polynomial_size,
uint64_t full_sm_keybundle);
template <typename Torus, class params>
uint32_t get_lwe_chunk_size_128(uint32_t gpu_index, uint32_t max_num_pbs,
uint32_t polynomial_size,
uint64_t full_sm_keybundle);
template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
int8_t *d_mem_keybundle = NULL;
int8_t *d_mem_acc_step_one = NULL;
@@ -115,7 +122,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, uint32_t lwe_chunk_size,
PBS_VARIANT pbs_variant, bool allocate_gpu_memory,
uint64_t *size_tracker) {
uint64_t &size_tracker) {
gpu_memory_allocated = allocate_gpu_memory;
cuda_set_device(gpu_index);
@@ -281,4 +288,146 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::MULTI_BIT> {
}
};
template <typename InputTorus>
struct pbs_buffer_128<InputTorus, PBS_TYPE::MULTI_BIT> {
int8_t *d_mem_keybundle = NULL;
int8_t *d_mem_acc_step_one = NULL;
int8_t *d_mem_acc_step_two = NULL;
int8_t *d_mem_acc_cg = NULL;
int8_t *d_mem_acc_tbc = NULL;
uint32_t lwe_chunk_size;
double *keybundle_fft;
__uint128_t *global_accumulator;
double *global_join_buffer;
PBS_VARIANT pbs_variant;
bool gpu_memory_allocated;
pbs_buffer_128(cudaStream_t stream, uint32_t gpu_index,
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
uint32_t lwe_chunk_size, PBS_VARIANT pbs_variant,
bool allocate_gpu_memory, uint64_t &size_tracker) {
gpu_memory_allocated = allocate_gpu_memory;
cuda_set_device(gpu_index);
this->pbs_variant = pbs_variant;
this->lwe_chunk_size = lwe_chunk_size;
auto max_shared_memory = cuda_get_max_shared_memory(gpu_index);
// default
uint64_t full_sm_keybundle =
get_buffer_size_full_sm_multibit_programmable_bootstrap_128_keybundle<
__uint128_t>(polynomial_size);
uint64_t full_sm_accumulate_step_one =
get_buffer_size_full_sm_multibit_programmable_bootstrap_step_one<
__uint128_t>(polynomial_size);
uint64_t full_sm_accumulate_step_two =
get_buffer_size_full_sm_multibit_programmable_bootstrap_step_two<
__uint128_t>(polynomial_size);
uint64_t partial_sm_accumulate_step_one =
get_buffer_size_partial_sm_multibit_programmable_bootstrap_step_one<
__uint128_t>(polynomial_size);
// cg
uint64_t full_sm_cg_accumulate =
get_buffer_size_full_sm_cg_multibit_programmable_bootstrap<__uint128_t>(
polynomial_size);
uint64_t partial_sm_cg_accumulate =
get_buffer_size_partial_sm_cg_multibit_programmable_bootstrap<
__uint128_t>(polynomial_size);
auto num_blocks_keybundle = input_lwe_ciphertext_count * lwe_chunk_size *
(glwe_dimension + 1) * (glwe_dimension + 1) *
level_count;
auto num_blocks_acc_step_one =
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
auto num_blocks_acc_step_two =
input_lwe_ciphertext_count * (glwe_dimension + 1);
auto num_blocks_acc_cg =
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count;
// Keybundle
if (max_shared_memory < full_sm_keybundle)
d_mem_keybundle = (int8_t *)cuda_malloc_with_size_tracking_async(
num_blocks_keybundle * full_sm_keybundle, stream, gpu_index,
size_tracker, allocate_gpu_memory);
switch (pbs_variant) {
case PBS_VARIANT::CG:
// Accumulator CG
if (max_shared_memory < partial_sm_cg_accumulate)
d_mem_acc_cg = (int8_t *)cuda_malloc_with_size_tracking_async(
num_blocks_acc_cg * full_sm_cg_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
else if (max_shared_memory < full_sm_cg_accumulate)
d_mem_acc_cg = (int8_t *)cuda_malloc_with_size_tracking_async(
num_blocks_acc_cg * partial_sm_cg_accumulate, stream, gpu_index,
size_tracker, allocate_gpu_memory);
break;
case PBS_VARIANT::DEFAULT:
// Accumulator step one
if (max_shared_memory < partial_sm_accumulate_step_one)
d_mem_acc_step_one = (int8_t *)cuda_malloc_with_size_tracking_async(
num_blocks_acc_step_one * full_sm_accumulate_step_one, stream,
gpu_index, size_tracker, allocate_gpu_memory);
else if (max_shared_memory < full_sm_accumulate_step_one)
d_mem_acc_step_one = (int8_t *)cuda_malloc_with_size_tracking_async(
num_blocks_acc_step_one * partial_sm_accumulate_step_one, stream,
gpu_index, size_tracker, allocate_gpu_memory);
// Accumulator step two
if (max_shared_memory < full_sm_accumulate_step_two)
d_mem_acc_step_two = (int8_t *)cuda_malloc_with_size_tracking_async(
num_blocks_acc_step_two * full_sm_accumulate_step_two, stream,
gpu_index, size_tracker, allocate_gpu_memory);
break;
default:
PANIC("Cuda error (PBS): unsupported implementation variant.")
}
keybundle_fft = (double *)cuda_malloc_with_size_tracking_async(
num_blocks_keybundle * (polynomial_size / 2) * 4 * sizeof(double),
stream, gpu_index, size_tracker, allocate_gpu_memory);
global_accumulator = (__uint128_t *)cuda_malloc_with_size_tracking_async(
input_lwe_ciphertext_count * (glwe_dimension + 1) * polynomial_size *
sizeof(__uint128_t),
stream, gpu_index, size_tracker, allocate_gpu_memory);
global_join_buffer = (double *)cuda_malloc_with_size_tracking_async(
level_count * (glwe_dimension + 1) * input_lwe_ciphertext_count *
(polynomial_size / 2) * 4 * sizeof(double),
stream, gpu_index, size_tracker, allocate_gpu_memory);
}
void release(cudaStream_t stream, uint32_t gpu_index) {
if (d_mem_keybundle)
cuda_drop_with_size_tracking_async(d_mem_keybundle, stream, gpu_index,
gpu_memory_allocated);
switch (pbs_variant) {
case DEFAULT:
if (d_mem_acc_step_one)
cuda_drop_with_size_tracking_async(d_mem_acc_step_one, stream,
gpu_index, gpu_memory_allocated);
if (d_mem_acc_step_two)
cuda_drop_with_size_tracking_async(d_mem_acc_step_two, stream,
gpu_index, gpu_memory_allocated);
break;
case CG:
if (d_mem_acc_cg)
cuda_drop_with_size_tracking_async(d_mem_acc_cg, stream, gpu_index,
gpu_memory_allocated);
break;
default:
PANIC("Cuda error (PBS): unsupported implementation variant.")
}
cuda_drop_with_size_tracking_async(keybundle_fft, stream, gpu_index,
gpu_memory_allocated);
cuda_drop_with_size_tracking_async(global_accumulator, stream, gpu_index,
gpu_memory_allocated);
cuda_drop_with_size_tracking_async(global_join_buffer, stream, gpu_index,
gpu_memory_allocated);
}
};
#endif // CUDA_MULTI_BIT_UTILITIES_H

View File

@@ -90,7 +90,7 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
uint32_t glwe_dimension, uint32_t polynomial_size,
uint32_t level_count, uint32_t input_lwe_ciphertext_count,
PBS_VARIANT pbs_variant, bool allocate_gpu_memory,
bool allocate_ms_array, uint64_t *size_tracker) {
bool allocate_ms_array, uint64_t &size_tracker) {
gpu_memory_allocated = allocate_gpu_memory;
cuda_set_device(gpu_index);
this->uses_noise_reduction = allocate_ms_array;
@@ -240,14 +240,15 @@ template <typename Torus> struct pbs_buffer<Torus, PBS_TYPE::CLASSICAL> {
}
};
template <PBS_TYPE pbs_type> struct pbs_buffer_128;
template <typename Torus, PBS_TYPE pbs_type> struct pbs_buffer_128;
template <> struct pbs_buffer_128<PBS_TYPE::CLASSICAL> {
template <typename InputTorus>
struct pbs_buffer_128<InputTorus, PBS_TYPE::CLASSICAL> {
int8_t *d_mem;
__uint128_t *global_accumulator;
double *global_join_buffer;
__uint128_t *temp_lwe_array_in;
InputTorus *temp_lwe_array_in;
uint64_t *trivial_indexes;
PBS_VARIANT pbs_variant;
@@ -259,17 +260,15 @@ template <> struct pbs_buffer_128<PBS_TYPE::CLASSICAL> {
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, PBS_VARIANT pbs_variant,
bool allocate_gpu_memory, bool allocate_ms_array,
uint64_t *size_tracker) {
uint64_t &size_tracker) {
gpu_memory_allocated = allocate_gpu_memory;
cuda_set_device(gpu_index);
this->pbs_variant = pbs_variant;
this->uses_noise_reduction = allocate_ms_array;
if (allocate_ms_array) {
this->temp_lwe_array_in =
(__uint128_t *)cuda_malloc_with_size_tracking_async(
(lwe_dimension + 1) * input_lwe_ciphertext_count *
sizeof(__uint128_t),
stream, gpu_index, size_tracker, allocate_ms_array);
this->temp_lwe_array_in = (InputTorus *)cuda_malloc_async(
(lwe_dimension + 1) * input_lwe_ciphertext_count * sizeof(InputTorus),
stream, gpu_index);
this->trivial_indexes = (uint64_t *)cuda_malloc_with_size_tracking_async(
input_lwe_ciphertext_count * sizeof(uint64_t), stream, gpu_index,
size_tracker, allocate_ms_array);
@@ -525,6 +524,10 @@ bool has_support_to_cuda_programmable_bootstrap_tbc(uint32_t num_samples,
uint32_t level_count,
uint32_t max_shared_memory);
bool has_support_to_cuda_programmable_bootstrap_128_cg(
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
uint32_t num_samples, uint32_t max_shared_memory);
#ifdef __CUDACC__
__device__ inline int get_start_ith_ggsw(int i, uint32_t polynomial_size,
int glwe_dimension,
@@ -541,6 +544,11 @@ __device__ T *get_ith_mask_kth_block(T *ptr, int i, int k, int level,
uint32_t polynomial_size,
int glwe_dimension, uint32_t level_count);
template <typename T, uint32_t polynomial_size, uint32_t glwe_dimension,
uint32_t level_count, uint32_t level_id>
__device__ const T *get_ith_mask_kth_block_2_2_params(const T *ptr,
int iteration, int k);
template <typename T>
__device__ T *get_ith_body_kth_block(T *ptr, int i, int k, int level,
uint32_t polynomial_size,

View File

@@ -100,7 +100,7 @@ void cuda_programmable_bootstrap_lwe_ciphertext_vector_128(
void const *lut_vector, void const *lwe_array_in,
void const *bootstrapping_key,
CudaModulusSwitchNoiseReductionKeyFFI const *ms_noise_reduction_key,
void *ms_noise_reduction_ptr, int8_t *buffer, uint32_t lwe_dimension,
void const *ms_noise_reduction_ptr, int8_t *buffer, uint32_t lwe_dimension,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t base_log,
uint32_t level_count, uint32_t num_samples);

View File

@@ -15,6 +15,11 @@ void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_64(
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
uint32_t polynomial_size, uint32_t grouping_factor);
void cuda_convert_lwe_multi_bit_programmable_bootstrap_key_128(
void *stream, uint32_t gpu_index, void *dest, void const *src,
uint32_t input_lwe_dim, uint32_t glwe_dim, uint32_t level_count,
uint32_t polynomial_size, uint32_t grouping_factor);
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_64(
void *stream, uint32_t gpu_index, int8_t **pbs_buffer,
uint32_t glwe_dimension, uint32_t polynomial_size, uint32_t level_count,
@@ -33,6 +38,25 @@ void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_64(
void cleanup_cuda_multi_bit_programmable_bootstrap(void *stream,
uint32_t gpu_index,
int8_t **pbs_buffer);
uint64_t scratch_cuda_multi_bit_programmable_bootstrap_128_vector_64(
void *stream, uint32_t gpu_index, int8_t **buffer, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t level_count,
uint32_t input_lwe_ciphertext_count, bool allocate_gpu_memory);
void cuda_multi_bit_programmable_bootstrap_lwe_ciphertext_vector_128(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_output_indexes, void const *lut_vector,
void const *lut_vector_indexes, void const *lwe_array_in,
void const *lwe_input_indexes, void const *bootstrapping_key,
int8_t *mem_ptr, uint32_t lwe_dimension, uint32_t glwe_dimension,
uint32_t polynomial_size, uint32_t grouping_factor, uint32_t base_log,
uint32_t level_count, uint32_t num_samples, uint32_t num_many_lut,
uint32_t lut_stride);
void cleanup_cuda_multi_bit_programmable_bootstrap_128(void *stream,
const uint32_t gpu_index,
int8_t **buffer);
}
#endif // CUDA_MULTI_BIT_H

View File

@@ -27,7 +27,7 @@ template <typename Torus> struct zk_expand_mem {
int_radix_params casting_params, KS_TYPE casting_key_type,
const uint32_t *num_lwes_per_compact_list,
const bool *is_boolean_array, uint32_t num_compact_lists,
bool allocate_gpu_memory, uint64_t *size_tracker)
bool allocate_gpu_memory, uint64_t &size_tracker)
: computing_params(computing_params), casting_params(casting_params),
num_compact_lists(num_compact_lists),
casting_key_type(casting_key_type) {
@@ -112,15 +112,15 @@ template <typename Torus> struct zk_expand_mem {
// Hint for future readers: if message_modulus == 4 then
// packed_messages_per_lwe becomes 2
auto packed_messages_per_lwe = log2_int(params.message_modulus);
auto num_packed_msgs = log2_int(params.message_modulus);
// Adjust indexes to permute the output and access the correct LUT
auto h_indexes_in = static_cast<Torus *>(
malloc(packed_messages_per_lwe * num_lwes * sizeof(Torus)));
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
auto h_indexes_out = static_cast<Torus *>(
malloc(packed_messages_per_lwe * num_lwes * sizeof(Torus)));
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
auto h_lut_indexes = static_cast<Torus *>(
malloc(packed_messages_per_lwe * num_lwes * sizeof(Torus)));
malloc(num_packed_msgs * num_lwes * sizeof(Torus)));
auto h_body_id_per_compact_list =
static_cast<uint32_t *>(malloc(num_lwes * sizeof(uint32_t)));
auto h_lwe_compact_input_indexes =
@@ -138,6 +138,10 @@ template <typename Torus> struct zk_expand_mem {
auto compact_list_id = 0;
auto idx = 0;
auto count = 0;
// During flattening, all num_lwes LWEs from all compact lists are stored
// sequentially on a Torus array. h_lwe_compact_input_indexes stores the
// index of the first LWE related to the compact list that contains the i-th
// LWE
for (int i = 0; i < num_lwes; i++) {
h_lwe_compact_input_indexes[i] = idx;
count++;
@@ -148,6 +152,8 @@ template <typename Torus> struct zk_expand_mem {
}
}
// Stores the index of the i-th LWE (within each compact list) related to
// the k-th compact list.
auto offset = 0;
for (int k = 0; k < num_compact_lists; k++) {
auto num_lwes_in_kth_compact_list = num_lwes_per_compact_list[k];
@@ -159,46 +165,75 @@ template <typename Torus> struct zk_expand_mem {
offset += num_lwes_in_kth_compact_list;
}
/*
* Each LWE contains encrypted data in both carry and message spaces
* that needs to be extracted.
*
* The loop processes each compact list (k) and for each LWE within that
* list:
* 1. Sets input indexes to read each LWE twice (for carry and message
* extraction)
* 2. Creates output indexes to properly reorder the results
* 3. Selects appropriate LUT index based on whether boolean sanitization is
* needed
*
* We want the output to have always first the content of the message part
* and then the content of the carry part of each LWE.
*
* i.e. msg_extract(LWE_0), carry_extract(LWE_0), msg_extract(LWE_1),
* carry_extract(LWE_1), ...
*
* Aiming that behavior, with 4 LWEs we would have:
*
* // Each LWE is processed twice
* h_indexes_in = {0, 1, 2, 3, 0, 1, 2, 3}
*
* // First 4 use message LUT, last 4 use carry LUT
* h_lut_indexes = {0, 0, 0, 0, 1, 1, 1, 1}
*
* // Reorders output so message and carry for each LWE appear together
* h_indexes_out = {0, 2, 4, 6, 1, 3, 5, 7}
*
* If an LWE contains a boolean value, its LUT index is shifted by
* num_packed_msgs to use the sanitization LUT (which ensures output is
* exactly 0 or 1).
*/
offset = 0;
for (int k = 0; k < num_compact_lists; k++) {
auto num_lwes_in_kth_compact_list = num_lwes_per_compact_list[k];
for (int i = 0;
i < packed_messages_per_lwe * num_lwes_in_kth_compact_list; i++) {
Torus j = i % num_lwes_in_kth_compact_list;
h_indexes_in[i + packed_messages_per_lwe * offset] = j + offset;
h_indexes_out[i + packed_messages_per_lwe * offset] =
packed_messages_per_lwe * (j + offset) +
(i / num_lwes_in_kth_compact_list);
auto num_lwes_in_kth = num_lwes_per_compact_list[k];
for (int i = 0; i < num_packed_msgs * num_lwes_in_kth; i++) {
auto lwe_index = i + num_packed_msgs * offset;
auto lwe_index_in_list = i % num_lwes_in_kth;
h_indexes_in[lwe_index] = lwe_index_in_list + offset;
h_indexes_out[lwe_index] =
num_packed_msgs * h_indexes_in[lwe_index] + i / num_lwes_in_kth;
// If the input relates to a boolean, shift the LUT so the correct one
// with sanitization is used
h_lut_indexes[i + packed_messages_per_lwe * offset] =
(is_boolean_array[h_indexes_out[i +
packed_messages_per_lwe * offset]]
? packed_messages_per_lwe
: 0) +
i / num_lwes_in_kth_compact_list;
auto boolean_offset =
is_boolean_array[h_indexes_out[lwe_index]] ? num_packed_msgs : 0;
h_lut_indexes[lwe_index] = i / num_lwes_in_kth + boolean_offset;
}
offset += num_lwes_in_kth_compact_list;
offset += num_lwes_in_kth;
}
message_and_carry_extract_luts->set_lwe_indexes(
streams[0], gpu_indexes[0], h_indexes_in, h_indexes_out);
auto lut_indexes = message_and_carry_extract_luts->get_lut_indexes(0, 0);
message_and_carry_extract_luts->broadcast_lut(streams, gpu_indexes, 0);
cuda_memcpy_with_size_tracking_async_to_gpu(
d_lwe_compact_input_indexes, h_lwe_compact_input_indexes,
num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
allocate_gpu_memory);
cuda_memcpy_with_size_tracking_async_to_gpu(
lut_indexes, h_lut_indexes,
packed_messages_per_lwe * num_lwes * sizeof(Torus), streams[0],
gpu_indexes[0], allocate_gpu_memory);
lut_indexes, h_lut_indexes, num_packed_msgs * num_lwes * sizeof(Torus),
streams[0], gpu_indexes[0], allocate_gpu_memory);
cuda_memcpy_with_size_tracking_async_to_gpu(
d_body_id_per_compact_list, h_body_id_per_compact_list,
num_lwes * sizeof(uint32_t), streams[0], gpu_indexes[0],
allocate_gpu_memory);
message_and_carry_extract_luts->broadcast_lut(streams, gpu_indexes);
// The expanded LWEs will always be on the casting key format
tmp_expanded_lwes = (Torus *)cuda_malloc_with_size_tracking_async(
num_lwes * (casting_params.big_lwe_dimension + 1) * sizeof(Torus),

View File

@@ -1,5 +1,6 @@
file(GLOB_RECURSE SOURCES "*.cu")
add_library(tfhe_cuda_backend STATIC ${SOURCES})
add_library(tfhe_cuda_backend STATIC ${SOURCES} pbs/programmable_bootstrap_multibit_128.cuh
pbs/programmable_bootstrap_multibit_128.cu)
set_target_properties(tfhe_cuda_backend PROPERTIES CUDA_SEPARABLE_COMPILATION ON CUDA_RESOLVE_DEVICE_SYMBOLS ON)
target_link_libraries(tfhe_cuda_backend PUBLIC cudart OpenMP::OpenMP_CXX)
target_include_directories(tfhe_cuda_backend PRIVATE .)

View File

@@ -84,6 +84,8 @@ void cuda_modulus_switch_inplace_64(void *stream, uint32_t gpu_index,
static_cast<uint64_t *>(lwe_array_out), size, log_modulus);
}
// This end point is used only for testing purposes
// its output always follows trivial ordering
void cuda_improve_noise_modulus_switch_64(
void *stream, uint32_t gpu_index, void *lwe_array_out,
void const *lwe_array_in, void const *lwe_array_indexes,

View File

@@ -1,5 +1,5 @@
#ifndef CNCRT_CRYPTO_CUH
#define CNCRT_CRPYTO_CUH
#define CNCRT_CRYPTO_CUH
#include "crypto/torus.cuh"
#include "device.h"
@@ -137,6 +137,34 @@ public:
}
};
// Performs the decomposition for 2_2 params, assumes level_count = 1
// this specialized version it is needed if we plan to keep everything in regs
template <typename T, class params, uint32_t base_log>
__device__ void decompose_and_compress_level_2_2_params(double2 *result,
T *state) {
constexpr T mask_mod_b = (1ll << base_log) - 1ll;
for (int i = 0; i < params::opt / 2; i++) {
auto input1 = state[i];
auto input2 = state[i + params::opt / 2];
T res_re = input1 & mask_mod_b;
T res_im = input2 & mask_mod_b;
input1 >>= base_log; // Update state
input2 >>= base_log; // Update state
T carry_re = ((res_re - 1ll) | input1) & res_re;
T carry_im = ((res_im - 1ll) | input2) & res_im;
carry_re >>= (base_log - 1);
carry_im >>= (base_log - 1);
res_re -= carry_re << base_log;
res_im -= carry_im << base_log;
typecast_torus_to_double(res_re, result[i].x);
typecast_torus_to_double(res_im, result[i].y);
}
}
template <typename Torus>
__device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) {
Torus res = state & mask_mod_b;
@@ -148,4 +176,4 @@ __device__ Torus decompose_one(Torus &state, Torus mask_mod_b, int base_log) {
return res;
}
#endif // CNCRT_CRPYTO_H
#endif // CNCRT_CRYPTO_CUH

View File

@@ -38,6 +38,16 @@ __device__ Torus *get_ith_block(Torus *ksk, int i, int level,
// Each thread in x are used to calculate one output.
// threads in y are used to paralelize the lwe_dimension_in loop.
// shared memory is used to store intermediate results of the reduction.
// Note: To reduce register pressure we have slightly changed the algorithm,
// the idea consists in calculating the negate value of the output. So, instead
// of accumulating subtractions using -=, we accumulate additions using += in
// the local_lwe_out. This seems to work better cause profits madd ops and save
// some regs. For this to work, we need to negate the input
// lwe_array_in[lwe_dimension_in], and negate back the output at the end to get
// the correct results. Additionally, we split the calculation of the ksk offset
// in two parts, a constant part is calculated before the loop, and a variable
// part is calculated inside the loop. This seems to help with the register
// pressure as well.
template <typename Torus>
__global__ void
keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
@@ -60,7 +70,7 @@ keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
lwe_array_in, lwe_input_indexes[blockIdx.x], lwe_dimension_in + 1);
if (tid == lwe_dimension_out && threadIdx.y == 0) {
local_lwe_out = block_lwe_array_in[lwe_dimension_in];
local_lwe_out = -block_lwe_array_in[lwe_dimension_in];
}
const Torus mask_mod_b = (1ll << base_log) - 1ll;
@@ -73,12 +83,12 @@ keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
for (int i = start_i; i < end_i; i++) {
Torus state =
init_decomposer_state(block_lwe_array_in[i], base_log, level_count);
uint32_t offset = i * level_count * (lwe_dimension_out + 1);
for (int j = 0; j < level_count; j++) {
auto ksk_block =
get_ith_block(ksk, i, j, lwe_dimension_out, level_count);
Torus decomposed = decompose_one<Torus>(state, mask_mod_b, base_log);
local_lwe_out -= (Torus)ksk_block[tid] * decomposed;
local_lwe_out +=
(Torus)ksk[tid + j * (lwe_dimension_out + 1) + offset] * decomposed;
}
}
@@ -93,7 +103,7 @@ keyswitch(Torus *lwe_array_out, const Torus *__restrict__ lwe_output_indexes,
lwe_acc_out[shmem_index + offset * blockDim.x];
}
if (threadIdx.y == 0)
block_lwe_array_out[tid] = lwe_acc_out[shmem_index];
block_lwe_array_out[tid] = -lwe_acc_out[shmem_index];
}
}
@@ -115,7 +125,7 @@ __host__ void host_keyswitch_lwe_ciphertext_vector(
int shared_mem = sizeof(Torus) * num_threads_y * num_threads_x;
if (num_blocks_per_sample > 65536)
PANIC("Cuda error (Keyswith): number of blocks per sample is too large");
PANIC("Cuda error (Keyswitch): number of blocks per sample is too large");
// In multiplication of large integers (512, 1024, 2048), the number of
// samples can be larger than 65536, so we need to set it in the first
@@ -172,14 +182,14 @@ __host__ uint64_t scratch_packing_keyswitch_lwe_list_to_glwe(
// allocate at least LWE-mask times two: to keep both decomposition state and
// decomposed intermediate value
int memory_unit = glwe_accumulator_size > lwe_dimension * 2
? glwe_accumulator_size
: lwe_dimension * 2;
uint64_t memory_unit = glwe_accumulator_size > lwe_dimension * 2
? glwe_accumulator_size
: lwe_dimension * 2;
uint64_t size_tracker;
uint64_t size_tracker = 0;
uint64_t buffer_size = 2 * num_lwes * memory_unit * sizeof(Torus);
*fp_ks_buffer = (int8_t *)cuda_malloc_with_size_tracking_async(
2 * num_lwes * memory_unit * sizeof(Torus), stream, gpu_index,
&size_tracker, allocate_gpu_memory);
buffer_size, stream, gpu_index, size_tracker, allocate_gpu_memory);
return size_tracker;
}

View File

@@ -55,7 +55,7 @@ __global__ void decompose_vectorize_init(Torus const *lwe_in, Torus *lwe_out,
lwe_out[write_state_idx] = state;
}
// Continue decomposiion of an array of Torus elements in place. Supposes
// Continue decomposition of an array of Torus elements in place. Supposes
// that the array contains already decomposed elements and
// computes the new decomposed level in place.
template <typename Torus>

View File

@@ -66,6 +66,13 @@ __device__ inline void typecast_torus_to_double<uint64_t>(uint64_t x,
r = __ll2double_rn(x);
}
template <>
__device__ inline void typecast_torus_to_double<__uint128_t>(__uint128_t x,
double &r) {
// We truncate x
r = __ll2double_rn(static_cast<uint64_t>(x));
}
template <typename T>
__device__ inline T init_decomposer_state(T input, uint32_t base_log,
uint32_t level_count) {
@@ -84,6 +91,23 @@ __device__ inline T init_decomposer_state(T input, uint32_t base_log,
return res - (need_balance << rep_bit_count);
}
template <typename T, uint32_t base_log, uint32_t level_count>
__device__ inline T init_decomposer_state_2_2_params(T input) {
constexpr T rep_bit_count = level_count * base_log;
constexpr T non_rep_bit_count = sizeof(T) * 8 - rep_bit_count;
T res = input >> (non_rep_bit_count - 1);
T rounding_bit = res & (T)(1);
res++;
res >>= 1;
constexpr T torus_max = scalar_max<T>();
constexpr T mod_mask = torus_max >> non_rep_bit_count;
res &= mod_mask;
T shifted_random = rounding_bit << (rep_bit_count - 1);
T need_balance =
(((res - (T)(1)) | shifted_random) & res) >> (rep_bit_count - 1);
return res - (need_balance << rep_bit_count);
}
template <typename T>
__device__ __forceinline__ void modulus_switch(T input, T &output,
uint32_t log_modulus) {
@@ -116,7 +140,6 @@ __host__ void host_modulus_switch_inplace(cudaStream_t stream,
int num_threads = 0, num_blocks = 0;
getNumBlocksAndThreads(size, 1024, num_blocks, num_threads);
modulus_switch_inplace<Torus>
<<<num_blocks, num_threads, 0, stream>>>(array, size, log_modulus);
check_cuda_error(cudaGetLastError());
@@ -178,10 +201,12 @@ __device__ __forceinline__ double measure_modulus_switch_noise(
// Each thread processes two elements of the lwe array
template <typename Torus>
__global__ void improve_noise_modulus_switch(
Torus *array_out, const Torus *array_in, const uint64_t *indexes,
const Torus *zeros, int lwe_size, int num_zeros, double input_variance,
double r_sigma, double bound, uint32_t log_modulus) {
__global__ void __launch_bounds__(512)
improve_noise_modulus_switch(Torus *array_out, const Torus *array_in,
const uint64_t *indexes, const Torus *zeros,
int lwe_size, int num_zeros,
double input_variance, double r_sigma,
double bound, uint32_t log_modulus) {
// First we will assume size is less than the number of threads per block
// I should switch this to dynamic shared memory
@@ -198,7 +223,8 @@ __global__ void improve_noise_modulus_switch(
sum_mask_errors[threadIdx.x] = 0.f;
sum_squared_mask_errors[threadIdx.x] = 0.f;
auto this_block_lwe_in = array_in + indexes[blockIdx.x] * lwe_size;
auto this_block_lwe_out = array_out + indexes[blockIdx.x] * lwe_size;
// We use modulus switch to gather the output in trivial order
auto this_block_lwe_out = array_out + blockIdx.x * lwe_size;
Torus input_element1 = this_block_lwe_in[threadIdx.x];
Torus input_element2 = threadIdx.x + blockDim.x < lwe_size

View File

@@ -2,6 +2,12 @@
#include <cstdint>
#include <cuda_runtime.h>
uint32_t cuda_get_device() {
int device;
check_cuda_error(cudaGetDevice(&device));
return static_cast<uint32_t>(device);
}
void cuda_set_device(uint32_t gpu_index) {
check_cuda_error(cudaSetDevice(gpu_index));
}
@@ -74,10 +80,9 @@ void *cuda_malloc(uint64_t size, uint32_t gpu_index) {
/// asynchronously.
void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
uint32_t gpu_index,
uint64_t *size_tracker,
uint64_t &size_tracker,
bool allocate_gpu_memory) {
if (size_tracker != nullptr)
*size_tracker += size;
size_tracker += size;
void *ptr = nullptr;
if (!allocate_gpu_memory)
return ptr;
@@ -106,8 +111,9 @@ void *cuda_malloc_with_size_tracking_async(uint64_t size, cudaStream_t stream,
/// asynchronously.
void *cuda_malloc_async(uint64_t size, cudaStream_t stream,
uint32_t gpu_index) {
return cuda_malloc_with_size_tracking_async(size, stream, gpu_index, nullptr,
true);
uint64_t size_tracker = 0;
return cuda_malloc_with_size_tracking_async(size, stream, gpu_index,
size_tracker, true);
}
/// Check that allocation is valid
@@ -122,6 +128,13 @@ bool cuda_check_valid_malloc(uint64_t size, uint32_t gpu_index) {
}
}
uint64_t cuda_device_total_memory(uint32_t gpu_index) {
cuda_set_device(gpu_index);
size_t total_mem = 0, free_mem = 0;
check_cuda_error(cudaMemGetInfo(&free_mem, &total_mem));
return total_mem;
}
/// Returns
/// false if Cooperative Groups is not supported.
/// true otherwise

View File

@@ -63,7 +63,7 @@ template <class params> __device__ void NSMFFT_direct(double2 *A) {
}
Index twiddle_shift = 1;
for (Index l = LOG2_DEGREE - 1; l >= 1; --l) {
for (Index l = LOG2_DEGREE - 1; l >= 5; --l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
twiddle_shift <<= 1;
@@ -96,8 +96,43 @@ template <class params> __device__ void NSMFFT_direct(double2 *A) {
tid = tid + STRIDE;
}
}
__syncthreads();
for (Index l = 4; l >= 1; --l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
twiddle_shift <<= 1;
tid = threadIdx.x;
__syncwarp();
double2 reg_A[BUTTERFLY_DEPTH];
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
reg_A[i] = (u_stays_in_register) ? v[i] : u[i];
tid = tid + STRIDE;
}
__syncwarp();
tid = threadIdx.x;
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
w = shfl_xor_double2(reg_A[i], 1 << (l - 1), 0xFFFFFFFF);
u[i] = (u_stays_in_register) ? u[i] : w;
v[i] = (u_stays_in_register) ? w : v[i];
w = negtwiddles[tid / lane_mask + twiddle_shift];
w *= v[i];
v[i] = u[i] - w;
u[i] = u[i] + w;
tid = tid + STRIDE;
}
}
__syncthreads();
// store registers in SM
tid = threadIdx.x;
#pragma unroll
@@ -109,6 +144,119 @@ template <class params> __device__ void NSMFFT_direct(double2 *A) {
__syncthreads();
}
/*
* negacyclic fft optimized for 2_2 params
it uses the twiddles from shared memory for extra performance
this is possible cause we know for 2_2 params will have memory available
the fft is returned in registers to avoid extra synchronizations
*/
template <class params>
__device__ void NSMFFT_direct_2_2_params(double2 *A, double2 *fft_out,
double2 *shared_twiddles) {
/* We don't make bit reverse here, since twiddles are already reversed
* Each thread is always in charge of "opt/2" pairs of coefficients,
* which is why we always loop through N/2 by N/opt strides
* The pragma unroll instruction tells the compiler to unroll the
* full loop, which should increase performance
*/
constexpr Index BUTTERFLY_DEPTH = params::opt >> 1;
constexpr Index LOG2_DEGREE = params::log2_degree;
constexpr Index HALF_DEGREE = params::degree >> 1;
constexpr Index STRIDE = params::degree / params::opt;
Index tid = threadIdx.x;
double2 u[BUTTERFLY_DEPTH], v[BUTTERFLY_DEPTH], w;
// switch register order
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
u[i] = fft_out[i];
v[i] = fft_out[i + params::opt / 2];
}
// level 1
// we don't make actual complex multiplication on level1 since we have only
// one twiddle, it's real and image parts are equal, so we can multiply
// it with simpler operations
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
w = v[i] * (double2){0.707106781186547461715008466854,
0.707106781186547461715008466854};
v[i] = u[i] - w;
u[i] = u[i] + w;
}
Index twiddle_shift = 1;
for (Index l = LOG2_DEGREE - 1; l >= 5; --l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
twiddle_shift <<= 1;
tid = threadIdx.x;
__syncthreads();
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
A[tid] = (u_stays_in_register) ? v[i] : u[i];
tid = tid + STRIDE;
}
__syncthreads();
tid = threadIdx.x;
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
w = A[tid ^ lane_mask];
u[i] = (u_stays_in_register) ? u[i] : w;
v[i] = (u_stays_in_register) ? w : v[i];
w = shared_twiddles[tid / lane_mask + twiddle_shift];
w *= v[i];
v[i] = u[i] - w;
u[i] = u[i] + w;
tid = tid + STRIDE;
}
}
for (Index l = 4; l >= 1; --l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
twiddle_shift <<= 1;
tid = threadIdx.x;
double2 reg_A[BUTTERFLY_DEPTH];
tid = threadIdx.x;
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
reg_A[i] = (u_stays_in_register) ? v[i] : u[i];
w = shfl_xor_double2(reg_A[i], 1 << (l - 1), 0xFFFFFFFF);
u[i] = (u_stays_in_register) ? u[i] : w;
v[i] = (u_stays_in_register) ? w : v[i];
w = shared_twiddles[tid / lane_mask + twiddle_shift];
w *= v[i];
v[i] = u[i] - w;
u[i] = u[i] + w;
tid = tid + STRIDE;
}
}
// Return result in registers, no need to synchronize here
// only with we need to use the same shared memory afterwards
for (Index i = 0; i < BUTTERFLY_DEPTH; i++) {
fft_out[i] = u[i];
fft_out[i + params::opt / 2] = v[i];
}
}
/*
* negacyclic inverse fft
*/
@@ -144,7 +292,46 @@ template <class params> __device__ void NSMFFT_inverse(double2 *A) {
}
Index twiddle_shift = DEGREE;
for (Index l = 1; l <= LOG2_DEGREE - 1; ++l) {
for (Index l = 1; l <= 4; ++l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
tid = threadIdx.x;
twiddle_shift >>= 1;
// at this point registers are ready for the butterfly
tid = threadIdx.x;
__syncwarp();
double2 reg_A[BUTTERFLY_DEPTH];
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
w = (u[i] - v[i]);
u[i] += v[i];
v[i] = w * conjugate(negtwiddles[tid / lane_mask + twiddle_shift]);
// keep one of the register for next iteration and store another one in sm
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
reg_A[i] = (u_stays_in_register) ? v[i] : u[i];
tid = tid + STRIDE;
}
__syncwarp();
// prepare registers for next butterfly iteration
tid = threadIdx.x;
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
w = shfl_xor_double2(reg_A[i], 1 << (l - 1), 0xFFFFFFFF);
u[i] = (u_stays_in_register) ? u[i] : w;
v[i] = (u_stays_in_register) ? w : v[i];
tid = tid + STRIDE;
}
}
for (Index l = 5; l <= LOG2_DEGREE - 1; ++l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
tid = threadIdx.x;
@@ -201,6 +388,126 @@ template <class params> __device__ void NSMFFT_inverse(double2 *A) {
__syncthreads();
}
/*
* negacyclic inverse fft optimized for 2_2 params
* it uses the twiddles from shared memory for extra performance
* this is possible cause we know for 2_2 params will have memory available
* the input comes from registers to avoid some synchronizations and shared mem
* usage
*/
template <class params>
__device__ void NSMFFT_inverse_2_2_params(double2 *A, double2 *buffer_regs,
double2 *shared_twiddles) {
/* We don't make bit reverse here, since twiddles are already reversed
* Each thread is always in charge of "opt/2" pairs of coefficients,
* which is why we always loop through N/2 by N/opt strides
* The pragma unroll instruction tells the compiler to unroll the
* full loop, which should increase performance
*/
constexpr Index BUTTERFLY_DEPTH = params::opt >> 1;
constexpr Index LOG2_DEGREE = params::log2_degree;
constexpr Index DEGREE = params::degree;
constexpr Index HALF_DEGREE = params::degree >> 1;
constexpr Index STRIDE = params::degree / params::opt;
size_t tid = threadIdx.x;
double2 u[BUTTERFLY_DEPTH], v[BUTTERFLY_DEPTH], w;
// load into registers and divide by compressed polynomial size
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
u[i] = buffer_regs[i];
v[i] = buffer_regs[i + params::opt / 2];
u[i] /= DEGREE;
v[i] /= DEGREE;
}
Index twiddle_shift = DEGREE;
for (Index l = 1; l <= 4; ++l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
tid = threadIdx.x;
twiddle_shift >>= 1;
// at this point registers are ready for the butterfly
tid = threadIdx.x;
double2 reg_A[BUTTERFLY_DEPTH];
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
w = (u[i] - v[i]);
u[i] += v[i];
v[i] = w * conjugate(shared_twiddles[tid / lane_mask + twiddle_shift]);
tid = tid + STRIDE;
}
__syncwarp();
// prepare registers for next butterfly iteration
tid = threadIdx.x;
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
reg_A[i] = (u_stays_in_register) ? v[i] : u[i];
w = shfl_xor_double2(reg_A[i], 1 << (l - 1), 0xFFFFFFFF);
u[i] = (u_stays_in_register) ? u[i] : w;
v[i] = (u_stays_in_register) ? w : v[i];
tid = tid + STRIDE;
}
}
for (Index l = 5; l <= LOG2_DEGREE - 1; ++l) {
Index lane_mask = 1 << (l - 1);
Index thread_mask = (1 << l) - 1;
tid = threadIdx.x;
twiddle_shift >>= 1;
// at this point registers are ready for the butterfly
tid = threadIdx.x;
__syncthreads();
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
w = (u[i] - v[i]);
u[i] += v[i];
v[i] = w * conjugate(shared_twiddles[tid / lane_mask + twiddle_shift]);
// keep one of the register for next iteration and store another one in sm
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
A[tid] = (u_stays_in_register) ? v[i] : u[i];
tid = tid + STRIDE;
}
__syncthreads();
// prepare registers for next butterfly iteration
tid = threadIdx.x;
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
Index rank = tid & thread_mask;
bool u_stays_in_register = rank < lane_mask;
w = A[tid ^ lane_mask];
u[i] = (u_stays_in_register) ? u[i] : w;
v[i] = (u_stays_in_register) ? w : v[i];
tid = tid + STRIDE;
}
}
// last iteration
#pragma unroll
for (Index i = 0; i < BUTTERFLY_DEPTH; ++i) {
w = (u[i] - v[i]);
buffer_regs[i] = u[i] + v[i];
buffer_regs[i + params::opt / 2] =
w * (double2){0.707106781186547461715008466854,
-0.707106781186547461715008466854};
}
}
/*
* global batch fft
* does fft in half size

View File

@@ -234,6 +234,29 @@ __device__ void convert_u128_to_f128_as_torus(
}
}
// params is expected to be full degree not half degree
// same as convert_u128_to_f128_as_torus() but expects input to be on registers
template <class params>
__device__ void convert_u128_on_regs_to_f128_as_torus(
double *out_re_hi, double *out_re_lo, double *out_im_hi, double *out_im_lo,
const __uint128_t *in_re_on_regs, const __uint128_t *in_im_on_regs) {
const double normalization = pow(2., -128.);
Index tid = threadIdx.x;
// #pragma unroll
for (Index i = 0; i < params::opt / 2; i++) {
auto out_re = u128_to_signed_to_f128(in_re_on_regs[i]);
auto out_im = u128_to_signed_to_f128(in_im_on_regs[i]);
out_re_hi[tid] = out_re.hi * normalization;
out_re_lo[tid] = out_re.lo * normalization;
out_im_hi[tid] = out_im.hi * normalization;
out_im_lo[tid] = out_im.lo * normalization;
tid += params::degree / params::opt;
}
}
template <class params>
__device__ void
convert_f128_to_u128_as_torus(__uint128_t *out_re, __uint128_t *out_im,
@@ -272,7 +295,7 @@ batch_convert_u128_to_f128_as_integer(double *out_re_hi, double *out_re_lo,
}
// params is expected to be full degree not half degree
// converts standqard input into complex<128> represented by 4 double
// converts standard input into complex<128> represented by 4 double
// with following pattern: [re_hi_0, re_hi_1, ... re_hi_n, re_lo_0, re_lo_1,
// ... re_lo_n, im_hi_0, im_hi_1, ..., im_hi_n, im_lo_0, im_lo_1, ..., im_lo_n]
template <class params>
@@ -291,7 +314,7 @@ batch_convert_u128_to_f128_as_torus(double *out_re_hi, double *out_re_lo,
}
// params is expected to be full degree not half degree
// converts standqard input into complex<128> represented by 4 double
// converts standard input into complex<128> represented by 4 double
// with following pattern: [re_hi_0, re_lo_0, im_hi_0, im_lo_0, re_hi_1,
// re_lo_1, im_hi_1, im_lo_1,
// ...,re_hi_n, re_lo_n, im_hi_n, im_lo_n, ]

View File

@@ -26,7 +26,7 @@ __host__ uint64_t scratch_cuda_integer_abs_kb(
if (is_signed) {
*mem_ptr = new int_abs_buffer<Torus>(streams, gpu_indexes, gpu_count,
params, num_blocks,
allocate_gpu_memory, &size_tracker);
allocate_gpu_memory, size_tracker);
}
return size_tracker;
}
@@ -53,7 +53,8 @@ __host__ void host_integer_abs_kb(
streams, gpu_indexes, gpu_count, mask, num_bits_in_ciphertext - 1,
mem_ptr->arithmetic_scalar_shift_mem, bsks, ksks, ms_noise_reduction_key);
host_addition<Torus>(streams[0], gpu_indexes[0], ct, mask, ct,
ct->num_radix_blocks);
ct->num_radix_blocks, mem_ptr->params.message_modulus,
mem_ptr->params.carry_modulus);
uint32_t requested_flag = outputFlag::FLAG_NONE;
uint32_t uses_carry = 0;

Some files were not shown because too many files have changed in this diff Show More