Compare commits

..

22 Commits

Author SHA1 Message Date
Nicolas Sarlin
1e9c269a1e chore(test-vectors): update README 2026-01-13 15:26:23 +01:00
Nicolas Sarlin
6300a025d9 chore(docs): fix api levels description 2026-01-13 09:43:49 +01:00
David Testé
7222bff5d6 chore(ci): fix artifact naming for hpu benchmarks
Prior to this commit, all generated artifacts would be identified
as integer benchmarks.
2026-01-12 15:42:24 +01:00
Arthur Meyre
cb4d62b40a chore: fix wasm-pack URL and update build output listing
Corrected the URL for 'wasm-pack' and updated the file listing after the build.

co-authored-by: d4wae89d498 <faussurier.marc@icloud.com>
2026-01-12 12:51:04 +01:00
David Testé
7a0c054095 chore(bench): use ks32 parameters set as default only for cpu 2026-01-12 11:00:52 +01:00
Agnes Leroy
ddb7d56f56 chore(gpu): add neg to dedup ops 2026-01-12 11:00:52 +01:00
Guillermo Oyarzun
cbe39c8e98 feat(gpu): create noise and pfail tests pbs128 and packingks 2026-01-12 10:46:41 +01:00
pgardratzama
27364857f1 fix(hpu): prf is not available yet on HPU 2026-01-12 09:55:18 +01:00
Arthur Meyre
7043246c17 chore: update CODEOWNERS file 2026-01-09 16:12:50 +01:00
Theo Souchon
51735fb8ed chore(bench): code refactor and automation for hlapi 2026-01-09 16:09:27 +01:00
pgardratzama
23a348c9ae feat(hpu): new HPU bitstream RTL v2.2 2026-01-09 15:25:35 +01:00
Mayeul@Zama
61b616b784 chore(hlapi): add bench of oprf over any range 2026-01-09 15:19:08 +01:00
Mayeul@Zama
df48e176f3 feat(hlapi): add oprf over any range 2026-01-09 15:19:08 +01:00
Mayeul@Zama
dd2345df6b refactor(integer): use NonZeroU64 for excluded_upper_bound 2026-01-09 15:19:08 +01:00
Mayeul@Zama
933800ea6f doc(hlapi): fix documentation 2026-01-09 15:19:08 +01:00
Mayeul@Zama
3e4cee3a75 refactor(integer): split oprf_almost_uniformity_test 2026-01-09 15:19:08 +01:00
Mayeul@Zama
00ea9b8e07 refactor(shortint): improve error in uniformity_p_value 2026-01-09 15:19:08 +01:00
Mayeul@Zama
23ce85f6a2 fix(core): make sup_diff more permissive 2026-01-09 15:19:08 +01:00
Nicolas Sarlin
126a95e929 fix(js): unsafe coop bench was overwritting mt one 2026-01-08 16:48:18 +01:00
Nicolas Sarlin
23fffb1443 chore(deps): ignore unmaintained bincode cargo audit warning 2026-01-08 15:16:37 +01:00
Agnes Leroy
6d58a54266 chore(gpu): attempt to fix apt in ci 2026-01-08 14:54:03 +01:00
Baptiste Roux
9b8d5f5a43 chore(hpu): bump version of lru
Lru required version update following caro audit

Signed-off-by: Baptiste Roux <baptiste.roux@zama.ai>
2026-01-08 14:08:31 +01:00
39 changed files with 2780 additions and 985 deletions

View File

@@ -2,6 +2,8 @@
ignore = [
# Ignoring unmaintained 'paste' advisory as it is a widely used, low-risk build dependency.
"RUSTSEC-2024-0436",
# Ignoring unmaintained 'bincode' crate. Getting rid of it would be too complex on the short term.
"RUSTSEC-2025-0141",
]
[output]

View File

@@ -23,6 +23,8 @@ runs:
echo "${CMAKE_SCRIPT_SHA} cmake-${CMAKE_VERSION}-linux-x86_64.sh" > checksum
sha256sum -c checksum
sudo bash cmake-"${CMAKE_VERSION}"-linux-x86_64.sh --skip-license --prefix=/usr/ --exclude-subdir
sudo apt-get clean
sudo rm -rf /var/lib/apt/lists/*
sudo apt update
sudo apt remove -y unattended-upgrades
sudo apt install -y cmake-format libclang-dev

View File

@@ -187,7 +187,7 @@ jobs:
- name: Upload parsed results artifact
uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f
with:
name: ${{ github.sha }}_${{ matrix.bench_type }}_integer_benchmarks
name: ${{ github.sha }}_${{ matrix.bench_type }}_${{ matrix.command }}_benchmarks
path: ${{ env.RESULTS_FILENAME }}
- name: Checkout Slab repo

1
.gitignore vendored
View File

@@ -10,6 +10,7 @@ target/
**/*.rmeta
**/Cargo.lock
**/*.bin
**/.DS_Store
# Some of our bench outputs
/tfhe/benchmarks_parameters

View File

@@ -11,7 +11,7 @@
/tfhe/src/core_crypto/gpu @agnesLeroy
/tfhe/src/core_crypto/hpu @zama-ai/hardware
/tfhe/src/shortint/ @mayeul-zama
/tfhe/src/shortint/ @mayeul-zama @nsarlin-zama
/tfhe/src/integer/ @tmontaigu
/tfhe/src/integer/gpu @agnesLeroy
@@ -19,8 +19,12 @@
/tfhe/src/high_level_api/ @tmontaigu
/tfhe-zk-pok/ @nsarlin-zama
/tfhe-benchmark/ @soonum
/utils/ @nsarlin-zama
/Makefile @IceTDrinker @soonum
/mockups/tfhe-hpu-mockup @zama-ai/hardware

View File

@@ -36,6 +36,7 @@ rayon = "1.11"
serde = { version = "1.0", default-features = false }
wasm-bindgen = "0.2.101"
getrandom = "0.2.8"
# The project maintainers consider that this is the last version of the 1.3 branch, any newer version should not be trusted
bincode = "=1.3.3"
[profile.bench]

View File

@@ -1,704 +0,0 @@
# 🔄 TFHE-rs GitHub Workflows Documentation
This document provides a comprehensive overview of all GitHub Actions workflows in the TFHE-rs project, organized by category with visual diagrams showing their triggers and purposes.
## 📊 Workflow Overview
The project contains **71 workflows** organized into the following categories:
- **Testing & Validation** (31 workflows) - AWS CPU (7), GPU (16), HPU (1), M1 (1), special tests (4), cargo tests (2)
- **Benchmarking** (17 workflows) - CPU, GPU, HPU, WASM, specialized benchmarks
- **Building & Compilation** (4 workflows) - Cargo builds
- **Release Management** (9 workflows) - Publishing to crates.io and npm
- **CI/CD & Maintenance** (10 workflows) - Linting, PR management, security
---
## 🔍 Workflow Trigger Types
```mermaid
graph LR
A[Workflow Triggers] --> B[Pull Request]
A --> C[Push to main]
A --> D[Schedule/Cron]
A --> E[Workflow Dispatch]
A --> F[Label Events]
B --> B1[On PR Open]
B --> B2[On PR Approval]
F --> F1[approved label]
F --> F2[m1_test label]
D --> D1[Daily]
D --> D2[Weekly]
D --> D3[Nightly]
```
---
## 🧪 Testing & Validation Workflows
### CPU Testing Workflows
```mermaid
flowchart TB
subgraph "CPU Test Workflows"
AWS[aws_tfhe_tests]
FAST[aws_tfhe_fast_tests]
INT[aws_tfhe_integer_tests]
SIGN[aws_tfhe_signed_integer_tests]
BACK[aws_tfhe_backward_compat_tests]
WASM[aws_tfhe_wasm_tests]
NOISE[aws_tfhe_noise_checks]
M1[m1_tests]
end
subgraph "Triggers"
PR[Pull Request<br/>+ approved label]
SCHED[Schedule<br/>Nightly Mon-Fri]
DISP[Workflow Dispatch]
M1LABEL[m1_test label]
end
PR --> AWS
SCHED --> AWS
DISP --> AWS
PR --> FAST
PR --> INT
PR --> SIGN
PR --> BACK
PR --> WASM
DISP --> M1
M1LABEL --> M1
SCHED --> M1
```
| Workflow | Trigger | Purpose | Runner |
|----------|---------|---------|--------|
| **aws_tfhe_tests** | PR (approved) / Nightly / Manual | Comprehensive CPU tests (csprng, zk-pok, core_crypto, boolean, shortint, strings, high-level API, C API, examples, apps) | AWS cpu-big |
| **aws_tfhe_fast_tests** | PR (approved) / Manual | Fast subset of tests for quick validation | AWS cpu-small |
| **aws_tfhe_integer_tests** | PR (approved) / Manual | Integer operations testing | AWS cpu-big |
| **aws_tfhe_signed_integer_tests** | PR (approved) / Manual | Signed integer operations testing | AWS cpu-big |
| **aws_tfhe_backward_compat_tests** | PR (approved) / Manual | Backward compatibility validation | AWS cpu-small |
| **aws_tfhe_wasm_tests** | PR (approved) / Manual | WebAssembly tests | AWS cpu-small |
| **aws_tfhe_noise_checks** | PR (approved) / Manual | Cryptographic noise validation | AWS cpu-small |
| **m1_tests** | Manual / Schedule (10pm daily) / m1_test label | Tests on Apple M1 architecture | Self-hosted M1 Mac |
---
### GPU Testing Workflows
```mermaid
flowchart TB
subgraph "GPU Test Workflows"
GFAST[gpu_fast_tests]
G4090[gpu_4090_tests]
GH100F[gpu_fast_h100_tests]
GH100[gpu_full_h100_tests]
GMULTI[gpu_full_multi_gpu_tests]
GVAL[gpu_code_validation_tests]
GMEM[gpu_memory_sanitizer]
GMEMH[gpu_memory_sanitizer_h100]
GUINT[gpu_unsigned_integer_tests]
GSINT[gpu_signed_integer_tests]
GUINTC[gpu_unsigned_integer_classic_tests]
GSINTC[gpu_signed_integer_classic_tests]
GUINTH[gpu_unsigned_integer_h100_tests]
GSINTH[gpu_signed_integer_h100_tests]
GLONG[gpu_integer_long_run_tests]
GPCC[gpu_pcc]
end
subgraph "Triggers"
PR[Pull Request]
DISP[Workflow Dispatch]
APPR[PR approved label]
end
PR --> GFAST
DISP --> GFAST
DISP --> G4090
DISP --> GH100F
DISP --> GH100
DISP --> GMULTI
DISP --> GVAL
APPR --> GMEM
APPR --> GMEMH
APPR --> GUINT
APPR --> GSINT
APPR --> GPCC
```
| Workflow | Trigger | Purpose | GPU |
|----------|---------|---------|-----|
| **gpu_fast_tests** | PR / Manual | Quick GPU validation tests | Hyperstack GPU |
| **gpu_4090_tests** | Manual | Tests on RTX 4090 hardware | RTX 4090 |
| **gpu_fast_h100_tests** | Manual | Fast tests on H100 GPU | H100 |
| **gpu_full_h100_tests** | Manual | Comprehensive H100 tests | H100 |
| **gpu_full_multi_gpu_tests** | Manual | Multi-GPU testing | Multiple GPUs |
| **gpu_code_validation_tests** | Manual | GPU code validation | GPU |
| **gpu_memory_sanitizer** | PR (approved) / Manual | Memory leak detection | GPU |
| **gpu_memory_sanitizer_h100** | PR (approved) / Manual | Memory sanitizer on H100 | H100 |
| **gpu_unsigned_integer_tests** | PR (approved) / Manual | Unsigned integer GPU tests | GPU |
| **gpu_signed_integer_tests** | PR (approved) / Manual | Signed integer GPU tests | GPU |
| **gpu_unsigned_integer_classic_tests** | Manual | Classic unsigned integer tests | GPU |
| **gpu_signed_integer_classic_tests** | Manual | Classic signed integer tests | GPU |
| **gpu_unsigned_integer_h100_tests** | Manual | Unsigned integer tests on H100 | H100 |
| **gpu_signed_integer_h100_tests** | Manual | Signed integer tests on H100 | H100 |
| **gpu_integer_long_run_tests** | Manual | Long-running integer tests | GPU |
| **gpu_pcc** | PR (approved) / Manual | GPU PCC checks | GPU |
---
### HPU Testing Workflows
```mermaid
flowchart LR
HPU[hpu_hlapi_tests]
DISP[Workflow Dispatch] --> HPU
HPU --> |Tests on|INTEL[Intel HPU Hardware]
```
| Workflow | Trigger | Purpose | Hardware |
|----------|---------|---------|----------|
| **hpu_hlapi_tests** | Manual | High-level API tests on Intel HPU | Intel HPU |
---
### Special Testing Workflows
```mermaid
flowchart TB
subgraph "Special Tests"
COV[code_coverage]
CSPRNG[csprng_randomness_tests]
LONG[integer_long_run_tests]
PARAMS[parameters_check]
end
subgraph "Cargo Tests"
TESTFFT[cargo_test_fft]
TESTNTT[cargo_test_ntt]
end
DISP[Workflow Dispatch] --> COV
DISP --> CSPRNG
DISP --> LONG
APPR[PR approved label] --> CSPRNG
PUSH[Push to main] --> PARAMS
PR[PR on specific paths] --> PARAMS
DISP --> PARAMS
PR --> TESTFFT
PR --> TESTNTT
```
| Workflow | Trigger | Purpose |
|----------|---------|---------|
| **code_coverage** | Manual | Generate code coverage reports and upload to Codecov |
| **csprng_randomness_tests** | Manual / PR (approved) | Dieharder randomness test suite for CSPRNG |
| **integer_long_run_tests** | Manual | Extended integer testing |
| **parameters_check** | Push to main / PR (specific paths) / Manual | Security check on cryptographic parameters using lattice estimator |
| **cargo_test_fft** | PR | Run tfhe-fft tests |
| **cargo_test_ntt** | PR | Run tfhe-ntt tests |
---
## 🏗️ Building & Compilation Workflows (4 workflows)
```mermaid
flowchart TB
subgraph "Build Workflows"
BUILD[cargo_build]
COMMON[cargo_build_common]
FFT[cargo_build_tfhe_fft]
NTT[cargo_build_tfhe_ntt]
end
subgraph "Build Jobs"
PCC[Parallel PCC CPU]
PCCHPU[PCC HPU]
FULL[Build TFHE Full]
LAYERS[Build Layers]
CAPI[Build C API]
end
PR[Pull Request] --> BUILD
BUILD --> PCC
BUILD --> PCCHPU
BUILD --> FULL
BUILD --> LAYERS
BUILD --> CAPI
PR --> FFT
PR --> NTT
```
| Workflow | Trigger | Purpose |
|----------|---------|---------|
| **cargo_build** | PR | Main build workflow - coordinates all build jobs |
| **cargo_build_common** | Reusable | Shared build logic for different targets |
| **cargo_build_tfhe_fft** | PR | Build and validate tfhe-fft crate |
| **cargo_build_tfhe_ntt** | PR | Build and validate tfhe-ntt crate |
**Build Targets:**
- ✅ Parallel PCC (Program Counter Checks) for CPU
- ✅ PCC for HPU
- ✅ Full TFHE build (Linux, macOS M1, Windows)
- ✅ Layer-by-layer builds
- ✅ C API builds
---
## 📊 Benchmarking Workflows (17 workflows)
All benchmark workflows are **triggered manually** via workflow_dispatch.
```mermaid
flowchart TB
subgraph "CPU Benchmarks - 3 workflows"
BCPU[benchmark_cpu<br/>Main CPU benchmarks]
BCPUW[benchmark_cpu_weekly<br/>Weekly CPU benchmarks]
BCPUC[benchmark_cpu_common<br/>Reusable workflow]
end
subgraph "GPU Benchmarks - 5 workflows"
BGPU[benchmark_gpu<br/>Main GPU benchmarks]
BGPUW[benchmark_gpu_weekly<br/>Weekly GPU benchmarks]
BGPUC[benchmark_gpu_common<br/>Reusable workflow]
BGPU4090[benchmark_gpu_4090<br/>RTX 4090 specific]
BGPUCOP[benchmark_gpu_coprocessor<br/>Coprocessor mode]
end
subgraph "HPU Benchmarks - 2 workflows"
BHPU[benchmark_hpu<br/>Intel HPU benchmarks]
BHPUC[benchmark_hpu_common<br/>Reusable workflow]
end
subgraph "Specialized Benchmarks - 7 workflows"
BWASM[benchmark_wasm_client<br/>WebAssembly client]
BCT[benchmark_ct_key_sizes<br/>Ciphertext & key sizes]
BFFT[benchmark_tfhe_fft<br/>FFT performance]
BNTT[benchmark_tfhe_ntt<br/>NTT performance]
BWHITE[benchmark_whitepaper<br/>Whitepaper params]
BREG[benchmark_perf_regression<br/>Regression detection]
BDOC[benchmark_documentation<br/>Generate docs]
end
DISP[Workflow Dispatch<br/>Manual Trigger] --> BCPU
DISP --> BCPUW
DISP --> BGPU
DISP --> BGPUW
DISP --> BHPU
DISP --> BWASM
DISP --> BCT
DISP --> BFFT
DISP --> BNTT
DISP --> BWHITE
DISP --> BREG
DISP --> BDOC
DISP --> BGPU4090
DISP --> BGPUCOP
```
### CPU Benchmarks (3 workflows)
| Workflow | Purpose | Operations Tested |
|----------|---------|-------------------|
| **benchmark_cpu** | Main CPU performance benchmarks | integer, signed_integer, integer_compression, integer_zk, shortint, shortint_oprf, hlapi, hlapi_erc20, hlapi_dex, hlapi_noise_squash, tfhe_zk_pok, boolean, pbs, pbs128, ks, ks_pbs |
| **benchmark_cpu_weekly** | Weekly scheduled CPU benchmarks | Similar to benchmark_cpu |
| **benchmark_cpu_common** | Reusable workflow for CPU benchmarks | Shared logic |
### GPU Benchmarks (5 workflows)
| Workflow | Purpose | Hardware |
|----------|---------|----------|
| **benchmark_gpu** | Main GPU performance benchmarks | Standard GPU |
| **benchmark_gpu_weekly** | Weekly scheduled GPU benchmarks | Standard GPU |
| **benchmark_gpu_4090** | Benchmarks on RTX 4090 | RTX 4090 |
| **benchmark_gpu_coprocessor** | GPU coprocessor mode benchmarks | GPU |
| **benchmark_gpu_common** | Reusable workflow for GPU benchmarks | Shared logic |
### HPU Benchmarks (2 workflows)
| Workflow | Purpose | Hardware |
|----------|---------|----------|
| **benchmark_hpu** | Intel HPU performance benchmarks | Intel HPU |
| **benchmark_hpu_common** | Reusable workflow for HPU benchmarks | Shared logic |
### Specialized Benchmarks (7 workflows)
| Workflow | Purpose | Focus |
|----------|---------|-------|
| **benchmark_wasm_client** | WebAssembly client performance | WASM execution |
| **benchmark_ct_key_sizes** | Measure ciphertext and key sizes | Memory footprint |
| **benchmark_tfhe_fft** | FFT library performance | tfhe-fft crate |
| **benchmark_tfhe_ntt** | NTT library performance | tfhe-ntt crate |
| **benchmark_whitepaper** | Whitepaper parameter validation | Research params |
| **benchmark_perf_regression** | Detect performance regressions | Regression testing |
| **benchmark_documentation** | Generate benchmark documentation | Documentation |
### Benchmark Configuration Options
**📏 Operation Flavors:**
- `default` - Standard operations
- `fast_default` - Fast variant operations
- `smart` - Smart operations (with automatic PBS)
- `unchecked` - Unchecked operations (no PBS)
- `misc` - Miscellaneous operations
**🎯 Precision Sets:**
- `fast` - Quick validation subset
- `all` - All supported bit precisions
- `documentation` - Precisions for documentation
**⏱️ Benchmark Types:**
- `latency` - Single operation timing
- `throughput` - Operations per second
- `both` - Both latency and throughput
**🔧 Parameter Types:**
- `classical` - Classical parameters
- `multi_bit` - Multi-bit parameters
- `classical + multi_bit` - Both parameter sets
- `classical_documentation` - Classical for docs
- `multi_bit_documentation` - Multi-bit for docs
- `classical_documentation + multi_bit_documentation` - Both for docs
---
## 📦 Release Management Workflows (9 workflows)
```mermaid
flowchart TB
subgraph "Release Workflows"
RTFHE[make_release_tfhe]
RCUDA[make_release_cuda]
RHPU[make_release_hpu]
RFFT[make_release_tfhe_fft]
RNTT[make_release_tfhe_ntt]
RCSPRNG[make_release_tfhe_csprng]
RZK[make_release_zk_pok]
RVER[make_release_tfhe_versionable]
RCOMMON[make_release_common]
end
DISP[Workflow Dispatch<br/>Manual Only] --> RTFHE
DISP --> RCUDA
DISP --> RHPU
DISP --> RFFT
DISP --> RNTT
DISP --> RCSPRNG
DISP --> RZK
DISP --> RVER
RTFHE --> |Publishes to|CRATES[crates.io]
RTFHE --> |Publishes to|NPM[npm registry]
style RTFHE fill:#ff6b6b
style DISP fill:#ffd93d
```
| Workflow | Purpose | Platforms |
|----------|---------|-----------|
| **make_release_tfhe** | Release main TFHE library | crates.io, npm (web & node packages) |
| **make_release_cuda** | Release CUDA backend | crates.io |
| **make_release_hpu** | Release HPU backend | crates.io |
| **make_release_tfhe_fft** | Release FFT library | crates.io |
| **make_release_tfhe_ntt** | Release NTT library | crates.io |
| **make_release_tfhe_csprng** | Release CSPRNG library | crates.io |
| **make_release_zk_pok** | Release Zero-Knowledge Proof of Knowledge library | crates.io |
| **make_release_tfhe_versionable** | Release versionable trait library | crates.io |
| **make_release_common** | Shared release logic | Reusable workflow |
**Release Options:**
- 🧪 Dry-run mode
- 📦 Push to crates.io
- 🌐 Push web JS package
- 📱 Push Node.js package
- 🏷️ Set NPM latest tag
---
## 🛠️ CI/CD & Maintenance Workflows (10 workflows)
```mermaid
flowchart TB
subgraph "Code Quality"
LINT[ci_lint]
COMMIT[check_commit]
AUDIT[cargo_audit]
end
subgraph "PR Management"
APPROVE[approve_label]
UNVER[unverified_prs]
VERIFY[verify_triggering_actor]
end
subgraph "Repository Sync"
SYNC[sync_on_push]
end
subgraph "SVG Generation"
SVG[generate_svgs]
SVGC[generate_svg_common]
end
PR[Pull Request] --> LINT
PR --> COMMIT
PR --> APPROVE
SCHED1[Daily 4am UTC] --> AUDIT
SCHED2[Daily 1:30am UTC] --> UNVER
PUSH[Push to main] --> SYNC
DISP[Workflow Dispatch] --> SVG
DISP --> AUDIT
DISP --> SYNC
```
| Workflow | Trigger | Purpose |
|----------|---------|---------|
| **ci_lint** | PR | Lint workflows with actionlint & check security with zizmor |
| **check_commit** | PR | Validate commit message format, line length, and signatures |
| **approve_label** | PR / PR Review | Auto-manage "approved" label on PRs |
| **cargo_audit** | Daily 4am UTC / Manual | Check dependencies for security vulnerabilities |
| **unverified_prs** | Daily 1:30am UTC | Close PRs without CLA signature after 2 days |
| **verify_triggering_actor** | Various | Verify actor permissions for sensitive workflows |
| **sync_on_push** | Push to main / Manual | Sync repository to internal mirror |
| **generate_svgs** | Manual | Generate parameter curve SVG visualizations |
| **generate_svg_common** | Reusable | Common SVG generation logic |
| **placeholder_workflow** | N/A | Template workflow |
---
## 🔐 Security & Quality Workflows
```mermaid
flowchart LR
subgraph "Security Checks"
A[commit signatures]
B[dependency audit]
C[zizmor security scan]
D[parameters security]
end
subgraph "Quality Checks"
E[commit format]
F[actionlint]
G[code coverage]
H[randomness tests]
end
COMMIT[check_commit] --> A
COMMIT --> E
AUDIT[cargo_audit] --> B
LINT[ci_lint] --> C
LINT --> F
PARAMS[parameters_check] --> D
COV[code_coverage] --> G
CSPRNG[csprng_randomness_tests] --> H
```
---
## 📈 Workflow Statistics
### By Trigger Type
| Trigger | Count | Examples |
|---------|-------|----------|
| **Workflow Dispatch** (Manual) | 65 | All benchmarks, releases, most tests |
| **Pull Request** | 18 | Build, lint, fast tests, GPU tests |
| **Pull Request (approved label)** | 12 | AWS tests, GPU memory tests |
| **Schedule/Cron** | 5 | Nightly tests, audit, unverified PRs |
| **Push to main** | 2 | Sync, parameters check |
| **Label Events** | 3 | M1 tests, approve workflow |
### By Runner Type
| Runner | Count | Purpose |
|--------|-------|---------|
| **AWS CPU** | 15 | Main testing infrastructure |
| **Hyperstack GPU** | 13 | GPU testing and benchmarks |
| **Self-hosted M1 Mac** | 1 | Apple Silicon testing |
| **Intel HPU** | 2 | HPU testing and benchmarks |
| **Ubuntu Latest** | 25 | CI/CD, builds, coordination |
| **Windows** | 1 | Windows builds |
---
## 🎯 Key Workflow Patterns
### 1. Instance Management Pattern
Many workflows follow this pattern for cost optimization:
```mermaid
sequenceDiagram
participant GitHub
participant Setup
participant Runner
participant Tests
participant Teardown
GitHub->>Setup: Trigger workflow
Setup->>Runner: Start AWS/GPU instance
Runner->>Tests: Execute tests
Tests->>Teardown: Complete (success/fail)
Teardown->>Runner: Stop instance
Teardown->>GitHub: Send Slack notification
```
**Workflows using this pattern:**
- All `aws_tfhe_*` workflows
- All `gpu_*` workflows
- `hpu_hlapi_tests`
- `code_coverage`
- `parameters_check`
- `csprng_randomness_tests`
### 2. Branch Protection Rules (BPR)
Workflows marked with `(bpr)` are required for PRs to be merged:
-`cargo_build/cargo-builds (bpr)`
-`ci_lint/lint-check (bpr)`
-`check_commit/check-commit-pr (bpr)`
### 3. File Change Detection
Many workflows use `tj-actions/changed-files` to conditionally run tests based on changed files, optimizing CI time and resources.
---
## 🔄 Workflow Dependencies
```mermaid
graph TD
subgraph "Reusable Workflows"
COMMON[cargo_build_common]
BENCH_CPU_C[benchmark_cpu_common]
BENCH_GPU_C[benchmark_gpu_common]
BENCH_HPU_C[benchmark_hpu_common]
REL_COMMON[make_release_common]
SVG_COMMON[generate_svg_common]
end
subgraph "Parent Workflows"
BUILD[cargo_build]
BENCH_CPU[benchmark_cpu]
BENCH_GPU[benchmark_gpu]
BENCH_HPU[benchmark_hpu]
RELEASES[make_release_*]
SVG[generate_svgs]
end
BUILD --> COMMON
BENCH_CPU --> BENCH_CPU_C
BENCH_GPU --> BENCH_GPU_C
BENCH_HPU --> BENCH_HPU_C
RELEASES --> REL_COMMON
SVG --> SVG_COMMON
```
---
## 📝 Workflow Naming Convention
```
<category>_<component>_<type>
```
Examples:
- `aws_tfhe_tests` - AWS infrastructure, TFHE component, tests type
- `gpu_fast_tests` - GPU infrastructure, fast variant, tests type
- `benchmark_cpu_weekly` - Benchmark category, CPU target, weekly schedule
- `make_release_tfhe` - Make/release action, TFHE component
---
## 🚀 Quick Reference
### Running Tests on PR
1. **Quick validation**: Automatic on PR creation
- `cargo_build` - Build checks
- `ci_lint` - Linting
- `check_commit` - Commit format
- `gpu_fast_tests` - Basic GPU tests
2. **Full test suite**: After PR approval (add "approved" label)
- `aws_tfhe_tests` - Comprehensive CPU tests
- `gpu_memory_sanitizer` - Memory checks
- GPU integer tests
3. **Special hardware**: Manual label addition
- Add `m1_test` label for M1 Mac tests
### Running Benchmarks
All benchmarks are **manual only** via workflow dispatch. Choose:
- Target: CPU, GPU, HPU, or WASM
- Operation flavor: default, smart, unchecked
- Precision set: fast, all, documentation
- Benchmark type: latency, throughput, both
### Creating a Release
1. Run appropriate `make_release_*` workflow
2. Configure options (dry-run, push to crates, npm packages)
3. Workflow handles versioning, building, and publishing
4. Includes provenance and SLSA attestation
---
## 🔔 Notification System
All critical workflows send Slack notifications on:
- ❌ Failure
- 🚫 Cancellation (non-PR events)
- ⚠️ Instance teardown failures
Notifications include:
- Job status
- Pull request link (if applicable)
- Action run URL
---
## 📚 Additional Resources
- **Workflow Files**: `.github/workflows/`
- **Reusable Actions**: `.github/actions/`
- **Configuration**: `ci/slab.toml`
- **Scripts**: `scripts/` directory
---
## ✅ Verification Summary
**Total Workflows: 71**
Count by category:
- Testing & Validation: **31 workflows** (7 AWS CPU + 16 GPU + 1 HPU + 1 M1 + 4 special + 2 cargo tests)
- Benchmarking: **17 workflows** (3 CPU + 5 GPU + 2 HPU + 7 specialized)
- Building & Compilation: **4 workflows**
- Release Management: **9 workflows**
- CI/CD & Maintenance: **10 workflows**
**Verification:** 31 + 17 + 4 + 9 + 10 = **71**
*Last Updated: 2026-01-08*

View File

@@ -1,43 +1,43 @@
# Test vectors for TFHE
These test vectors are generated using [TFHE-rs](https://github.com/zama-ai/tfhe-rs), with the git tag `tfhe-test-vectors-0.2.0`.
They are TFHE-rs objects serialized in the [cbor format](https://cbor.io/). You can deserialize them using any cbor library for the language of your choice. For example, using the [cbor2](https://pypi.org/project/cbor2/) program, run: `cbor2 --pretty toy_params/lwe_a.cbor`.
They are TFHE-rs objects serialized in the [cbor format](https://cbor.io/). These can be deserialized using any cbor library for any programming languages. For example, using the [cbor2](https://pypi.org/project/cbor2/) program, the command to run is: `cbor2 --pretty toy_params/lwe_a.cbor`.
You will find 2 folders with test vectors for different parameter sets:
- `valid_params_128`: valid classical PBS parameters using a gaussian noise distribution, providing 128bits of security in the IND-CPA model and a bootstrapping probability of failure of 2^{-64}.
- `toy_params`: insecure parameters that yield smaller values
There are 2 folders with test vectors for different parameter sets:
- `valid_params_128`: valid classical PBS parameters using a Gaussian noise distribution, providing 128-bits of security in the IND-CPA model (i.e., the probability of failure is smaller than 2^{-64}).
- `toy_params`: insecure parameters that yield smaller values to simplify the bit comparison of the results.
The values are generated for the keyswitch -> bootstrap (KS-PBS) atomic pattern. The cleartext inputs are 2 values, A and B defined below.
The values are generated to compute a keyswitch (KS) followed by a bootstrap (PBS). The cleartext inputs are 2 values, A and B defined below.
All the random values are generated from a fixed seed, that can be found in the `RAND_SEED` constant below. The PRNG used is the one based on the AES block cipher in counter mode, from tfhe `tfhe-csprng` crate.
The programmable bootstrap is applied twice, with 2 different lut, the identity lut and a specific one (currently a x2 operation)
The bootstrap is applied twice, with 2 different lut, the identity lut and a specific one computing the double of the input value (i.e., f(x) = 2*x).
## Vectors
The following values are generated:
### Keys
| name | description | TFHE-rs type |
|------------------------|---------------------------------------------------------------------------------------|-----------------------------|
| `large_lwe_secret_key` | Encryption secret key, before the KS and after the PBS | `LweSecretKey<Vec<u64>>` |
| `small_lwe_secret_key` | Secret key encrypting ciphertexts between the KS and the PBS | `LweSecretKey<Vec<u64>>` |
| `ksk` | The keyswitching key to convert a ct from the large key to the small one | `LweKeyswitchKey<Vec<u64>>` |
| name | description | TFHE-rs type |
|------------------------|-----------------------------------------------------------------------------------------|-----------------------------|
| `large_lwe_secret_key` | Encryption secret key, before the KS and after the PBS | `LweSecretKey<Vec<u64>>` |
| `small_lwe_secret_key` | Secret key encrypting ciphertexts between the KS and the PBS | `LweSecretKey<Vec<u64>>` |
| `ksk` | The keyswitching key to convert a ct from the large key to the small one | `LweKeyswitchKey<Vec<u64>>` |
| `bsk` | the bootstrapping key to perform a programmable bootstrap on the keyswitched ciphertext | `LweBootstrapKey<Vec<u64>>` |
### Ciphertexts
| name | description | TFHE-rs type | Cleartext |
|----------------------|--------------------------------------------------------------------------------------------------------------|----------------------------|--------------|
| `lwe_a` | Lwe encryption of A | `LweCiphertext<Vec<u64>>` | `A` |
| `lwe_b` | Lwe encryption of B | `LweCiphertext<Vec<u64>>` | `B` |
| `lwe_sum` | Lwe encryption of A plus lwe encryption of B | `LweCiphertext<Vec<u64>>` | `A+B` |
| `lwe_prod` | Lwe encryption of A times cleartext B | `LweCiphertext<Vec<u64>>` | `A*B` |
| `lwe_ms` | The lwe ciphertext after the modswitch part of the PBS ([note](#non-native-encoding)) | `LweCiphertext<Vec<u64>>` | `A` |
| `lwe_ks` | The lwe ciphertext after the keyswitch | `LweCiphertext<Vec<u64>>` | `A` |
| `glwe_after_id_br` | The glwe returned by the application of the identity blind rotation on the mod switched ciphertexts. | `GlweCiphertext<Vec<u64>>` | rot id LUT |
| `lwe_after_id_pbs` | The lwe returned by the application of the sample extract operation on the output of the id blind rotation | `LweCiphertext<Vec<u64>>` | `A` |
| `glwe_after_spec_br` | The glwe returned by the application of the spec blind rotation on the mod switched ciphertexts. | `GlweCiphertext<Vec<u64>>` | rot spec LUT |
| `lwe_after_spec_pbs` | The lwe returned by the application of the sample extract operation on the output of the spec blind rotation | `LweCiphertext<Vec<u64>>` | `spec(A)` |
| name | description | TFHE-rs type | Cleartext |
|----------------------|-----------------------------------------------------------------------------------------------------|----------------------------|----------------------|
| `lwe_a` | LWE Ciphertext encrypting A | `LweCiphertext<Vec<u64>>` | `A` |
| `lwe_b` | LWE Ciphertext encrypting B | `LweCiphertext<Vec<u64>>` | `B` |
| `lwe_sum` | LWE Ciphertext encrypting A plus lwe encryption of B | `LweCiphertext<Vec<u64>>` | `A+B` |
| `lwe_prod` | LWE Ciphertext encrypting A times cleartext B | `LweCiphertext<Vec<u64>>` | `A*B` |
| `lwe_ms` | LWE Ciphertext encrypting A after a Modulus Switch from q to 2*N ([note](#non-native-encoding)) | `LweCiphertext<Vec<u64>>` | `A` |
| `lwe_ks` | LWE Ciphertext encrypting A after a keyswitch from `large_lwe_secret_key` to `small_lwe_secret_key` | `LweCiphertext<Vec<u64>>` | `A` |
| `glwe_after_id_br` | GLWE Ciphertext encrypting A after the application of the identity blind rotation on `lwe_ms` | `GlweCiphertext<Vec<u64>>` | rotation of id LUT |
| `lwe_after_id_pbs` | LWE Ciphertext encrypting A after the sample extract operation on `glwe_after_id_br` | `LweCiphertext<Vec<u64>>` | `A` |
| `glwe_after_spec_br` | GLWE Ciphertext encrypting spec(A) after the application of the spec blind rotation on `lwe_ms` | `GlweCiphertext<Vec<u64>>` | rotation of spec LUT |
| `lwe_after_spec_pbs` | LWE Ciphertext encrypting spec(A) after the sample extract operation on `glwe_after_spec_br` | `LweCiphertext<Vec<u64>>` | `spec(A)` |
Ciphertexts with the `_karatsuba` suffix are generated using the Karatsuba polynomial multiplication algorithm in the blind rotation, while default ciphertexts are generated using an FFT multiplication.
This makes it easier to reproduce bit exact results.

View File

@@ -65,6 +65,16 @@ void cleanup_cuda_integer_compress_radix_ciphertext_128(CudaStreamsFFI streams,
void cleanup_cuda_integer_decompress_radix_ciphertext_128(
CudaStreamsFFI streams, int8_t **mem_ptr_void);
void cuda_integer_extract_glwe_128(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);
void cuda_integer_extract_glwe_64(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index);
}
#endif

View File

@@ -155,3 +155,24 @@ void cleanup_cuda_integer_decompress_radix_ciphertext_128(
delete mem_ptr;
*mem_ptr_void = nullptr;
}
void cuda_integer_extract_glwe_128(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index) {
CudaStreams _streams = CudaStreams(streams);
host_extract<__uint128_t>(_streams.stream(0), _streams.gpu_index(0),
(__uint128_t *)glwe_array_out, glwe_list,
glwe_index);
}
void cuda_integer_extract_glwe_64(
CudaStreamsFFI streams, void *glwe_array_out,
CudaPackedGlweCiphertextListFFI const *glwe_list,
uint32_t const glwe_index) {
CudaStreams _streams = CudaStreams(streams);
host_extract<__uint64_t>(_streams.stream(0), _streams.gpu_index(0),
(__uint64_t *)glwe_array_out, glwe_list, glwe_index);
}

View File

@@ -2349,6 +2349,22 @@ unsafe extern "C" {
mem_ptr_void: *mut *mut i8,
);
}
unsafe extern "C" {
pub fn cuda_integer_extract_glwe_128(
streams: CudaStreamsFFI,
glwe_array_out: *mut ffi::c_void,
glwe_list: *const CudaPackedGlweCiphertextListFFI,
glwe_index: u32,
);
}
unsafe extern "C" {
pub fn cuda_integer_extract_glwe_64(
streams: CudaStreamsFFI,
glwe_array_out: *mut ffi::c_void,
glwe_list: *const CudaPackedGlweCiphertextListFFI,
glwe_index: u32,
);
}
unsafe extern "C" {
pub fn scratch_cuda_rerand_64(
streams: CudaStreamsFFI,

View File

@@ -40,7 +40,7 @@ rand = "0.8.5"
regex = "1.10.4"
bitflags = { version = "2.5.0", features = ["serde"] }
itertools = "0.11.0"
lru = "0.12.3"
lru = "0.16.3"
bitfield-struct = "0.10.0"
crossbeam = { version = "0.8.4", features = ["crossbeam-queue"] }
rayon = { workspace = true }

View File

@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:35cc06547a23b862ab9829351d74d944e60ea9dad3ecf593d15f0ce8445d145e
size 81710610
oid sha256:934c8131c12010dc837f6a2af5111b83f8f5d42f10485e9b3b971edb24c467f8
size 82201876

View File

@@ -160,9 +160,9 @@ impl ProgramInner {
.filter(|(_, var)| var.is_none())
.map(|(rid, _)| *rid)
.collect::<Vec<_>>();
demote_order
.into_iter()
.for_each(|rid| self.regs.demote(&rid));
demote_order.into_iter().for_each(|rid| {
self.regs.demote(&rid);
});
}
/// Release register entry
@@ -179,7 +179,7 @@ impl ProgramInner {
/// Notify register access to update LRU state
pub(crate) fn reg_access(&mut self, rid: asm::RegId) {
self.regs.promote(&rid)
self.regs.promote(&rid);
}
/// Retrieved least-recent-used heap entry
@@ -220,9 +220,9 @@ impl ProgramInner {
.filter(|(_mid, var)| var.is_none())
.map(|(mid, _)| *mid)
.collect::<Vec<_>>();
demote_order
.into_iter()
.for_each(|mid| self.heap.demote(&mid));
demote_order.into_iter().for_each(|mid| {
self.heap.demote(&mid);
});
}
_ => { /*Only release Heap slot*/ }
}
@@ -231,7 +231,9 @@ impl ProgramInner {
/// Notify heap access to update LRU state
pub(crate) fn heap_access(&mut self, mid: asm::MemId) {
match mid {
asm::MemId::Heap { .. } => self.heap.promote(&mid),
asm::MemId::Heap { .. } => {
self.heap.promote(&mid);
}
_ => { /* Do Nothing slot do not below to heap*/ }
}
}

1
tfhe-benchmark/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
benchmarks_parameters/*

View File

@@ -2,7 +2,9 @@ use benchmark::utilities::{
hlapi_throughput_num_ops, write_to_json, BenchmarkType, BitSizesSet, EnvConfig, OperatorType,
};
use criterion::{black_box, Criterion, Throughput};
use oprf::oprf_any_range2;
use rand::prelude::*;
use rayon::prelude::*;
use std::marker::PhantomData;
use std::ops::*;
use tfhe::core_crypto::prelude::Numeric;
@@ -11,34 +13,42 @@ use tfhe::keycache::NamedParam;
use tfhe::named::Named;
use tfhe::prelude::*;
use tfhe::{
ClientKey, CompressedServerKey, FheIntegerType, FheUint10, FheUint12, FheUint128, FheUint14,
FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId, IntegerId,
KVStore,
ClientKey, CompressedServerKey, FheIntegerType, FheUint, FheUint10, FheUint12, FheUint128,
FheUint14, FheUint16, FheUint2, FheUint32, FheUint4, FheUint6, FheUint64, FheUint8, FheUintId,
IntegerId, KVStore,
};
use rayon::prelude::*;
mod oprf;
fn bench_fhe_type<FheType>(
trait BenchWait {
fn wait_bench(&self);
}
impl<Id: FheUintId> BenchWait for FheUint<Id> {
fn wait_bench(&self) {
self.wait()
}
}
impl<T1: FheWait, T2> BenchWait for (T1, T2) {
fn wait_bench(&self) {
self.0.wait()
}
}
fn bench_fhe_type_op<FheType, F, R>(
c: &mut Criterion,
client_key: &ClientKey,
type_name: &str,
bit_size: usize,
display_name: &str,
func_name: &str,
func: F,
) where
F: Fn(&FheType, &FheType) -> R,
R: BenchWait,
FheType: FheEncrypt<u128, ClientKey>,
FheType: FheWait,
for<'a> &'a FheType: Add<&'a FheType, Output = FheType>
+ Sub<&'a FheType, Output = FheType>
+ Mul<&'a FheType, Output = FheType>
+ BitAnd<&'a FheType, Output = FheType>
+ BitOr<&'a FheType, Output = FheType>
+ BitXor<&'a FheType, Output = FheType>
+ Shl<&'a FheType, Output = FheType>
+ Shr<&'a FheType, Output = FheType>
+ RotateLeft<&'a FheType, Output = FheType>
+ RotateRight<&'a FheType, Output = FheType>
+ OverflowingAdd<&'a FheType, Output = FheType>
+ OverflowingSub<&'a FheType, Output = FheType>,
for<'a> FheType: FheMin<&'a FheType, Output = FheType> + FheMax<&'a FheType, Output = FheType>,
{
let mut bench_group = c.benchmark_group(type_name);
let mut bench_prefix = "hlapi".to_string();
@@ -71,170 +81,90 @@ fn bench_fhe_type<FheType>(
let lhs = FheType::encrypt(rng.gen(), client_key);
let rhs = FheType::encrypt(rng.gen(), client_key);
let mut bench_id;
let bench_id = format!("{bench_prefix}::{func_name}::{param_name}::{type_name}");
bench_id = format!("{bench_prefix}::add::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs + &rhs;
res.wait();
let res = func(&lhs, &rhs);
res.wait_bench();
black_box(res)
})
});
write_record(bench_id, "add");
bench_id = format!("{bench_prefix}::overflowing_add::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let (res, flag) = lhs.overflowing_add(&rhs);
res.wait();
black_box((res, flag))
})
});
write_record(bench_id, "overflowing_add");
bench_id = format!("{bench_prefix}::overflowing_sub::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let (res, flag) = lhs.overflowing_sub(&rhs);
res.wait();
black_box((res, flag))
})
});
write_record(bench_id, "overflowing_sub");
bench_id = format!("{bench_prefix}::sub::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs - &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "sub");
bench_id = format!("{bench_prefix}::mul::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs * &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "mul");
bench_id = format!("{bench_prefix}::bitand::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs & &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "bitand");
bench_id = format!("{bench_prefix}::bitor::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs | &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "bitor");
bench_id = format!("{bench_prefix}::bitxor::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs ^ &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "bitxor");
bench_id = format!("{bench_prefix}::left_shift::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs << &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "left_shift");
bench_id = format!("{bench_prefix}::right_shift::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = &lhs >> &rhs;
res.wait();
black_box(res)
})
});
write_record(bench_id, "right_shift");
bench_id = format!("{bench_prefix}::left_rotate::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = (&lhs).rotate_left(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "left_rotate");
bench_id = format!("{bench_prefix}::right_rotate::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = (&lhs).rotate_right(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "right_rotate");
bench_id = format!("{bench_prefix}::min::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = lhs.min(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "min");
bench_id = format!("{bench_prefix}::max::{param_name}::{type_name}");
bench_group.bench_function(&bench_id, |b| {
b.iter(|| {
let res = lhs.max(&rhs);
res.wait();
black_box(res)
})
});
write_record(bench_id, "max");
write_record(bench_id, display_name);
}
macro_rules! bench_type {
($fhe_type:ident) => {
macro_rules! bench_type_op (
(type_name: $fhe_type:ident, display_name: $display_name:literal, operation: $op:ident) => {
::paste::paste! {
fn [<bench_ $fhe_type:snake>](c: &mut Criterion, cks: &ClientKey) {
bench_fhe_type::<$fhe_type>(c, cks, stringify!($fhe_type), $fhe_type::num_bits());
fn [<bench_ $fhe_type:snake _ $op>](c: &mut Criterion, cks: &ClientKey) {
bench_fhe_type_op::<$fhe_type, _, _>(
c,
cks,
stringify!($fhe_type),
$fhe_type::num_bits(),
$display_name,
stringify!($op),
|lhs, rhs| lhs.$op(rhs)
);
}
}
};
);
macro_rules! generate_typed_benches {
($fhe_type:ident) => {
bench_type_op!(type_name: $fhe_type, display_name: "add", operation: add);
bench_type_op!(type_name: $fhe_type, display_name: "overflowing_add", operation: overflowing_add);
bench_type_op!(type_name: $fhe_type, display_name: "sub", operation: sub);
bench_type_op!(type_name: $fhe_type, display_name: "overflowing_sub", operation: overflowing_sub);
bench_type_op!(type_name: $fhe_type, display_name: "mul", operation: mul);
bench_type_op!(type_name: $fhe_type, display_name: "bitand", operation: bitand);
bench_type_op!(type_name: $fhe_type, display_name: "bitor", operation: bitor);
bench_type_op!(type_name: $fhe_type, display_name: "bitxor", operation: bitxor);
bench_type_op!(type_name: $fhe_type, display_name: "left_shift", operation: shl);
bench_type_op!(type_name: $fhe_type, display_name: "right_shift", operation: shr);
bench_type_op!(type_name: $fhe_type, display_name: "left_rotate", operation: rotate_left);
bench_type_op!(type_name: $fhe_type, display_name: "right_rotate", operation: rotate_right);
bench_type_op!(type_name: $fhe_type, display_name: "min", operation: min);
bench_type_op!(type_name: $fhe_type, display_name: "max", operation: max);
};
}
bench_type!(FheUint2);
bench_type!(FheUint4);
bench_type!(FheUint6);
bench_type!(FheUint8);
bench_type!(FheUint10);
bench_type!(FheUint12);
bench_type!(FheUint14);
bench_type!(FheUint16);
bench_type!(FheUint32);
bench_type!(FheUint64);
bench_type!(FheUint128);
// Generate benches for all FheUint types
generate_typed_benches!(FheUint2);
generate_typed_benches!(FheUint4);
generate_typed_benches!(FheUint6);
generate_typed_benches!(FheUint8);
generate_typed_benches!(FheUint10);
generate_typed_benches!(FheUint12);
generate_typed_benches!(FheUint14);
generate_typed_benches!(FheUint16);
generate_typed_benches!(FheUint32);
generate_typed_benches!(FheUint64);
generate_typed_benches!(FheUint128);
macro_rules! run_benches {
($c:expr, $cks:expr, $($fhe_type:ident),+ $(,)?) => {
$(
::paste::paste! {
[<bench_ $fhe_type:snake _add>]($c, $cks);
[<bench_ $fhe_type:snake _overflowing_add>]($c, $cks);
[<bench_ $fhe_type:snake _sub>]($c, $cks);
[<bench_ $fhe_type:snake _overflowing_sub>]($c, $cks);
[<bench_ $fhe_type:snake _mul>]($c, $cks);
[<bench_ $fhe_type:snake _bitand>]($c, $cks);
[<bench_ $fhe_type:snake _bitor>]($c, $cks);
[<bench_ $fhe_type:snake _bitxor>]($c, $cks);
[<bench_ $fhe_type:snake _shl>]($c, $cks);
[<bench_ $fhe_type:snake _shr>]($c, $cks);
[<bench_ $fhe_type:snake _rotate_left>]($c, $cks);
[<bench_ $fhe_type:snake _rotate_right>]($c, $cks);
[<bench_ $fhe_type:snake _min>]($c, $cks);
[<bench_ $fhe_type:snake _max>]($c, $cks);
}
)+
};
}
trait TypeDisplay {
fn fmt(f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
@@ -444,7 +374,7 @@ fn main() {
match env_config.bit_sizes_set {
BitSizesSet::Fast => {
bench_fhe_uint64(&mut c, &cks);
run_benches!(&mut c, &cks, FheUint64);
// KVStore Benches
if benched_device == tfhe::Device::Cpu {
@@ -452,17 +382,11 @@ fn main() {
}
}
_ => {
bench_fhe_uint2(&mut c, &cks);
bench_fhe_uint4(&mut c, &cks);
bench_fhe_uint6(&mut c, &cks);
bench_fhe_uint8(&mut c, &cks);
bench_fhe_uint10(&mut c, &cks);
bench_fhe_uint12(&mut c, &cks);
bench_fhe_uint14(&mut c, &cks);
bench_fhe_uint16(&mut c, &cks);
bench_fhe_uint32(&mut c, &cks);
bench_fhe_uint64(&mut c, &cks);
bench_fhe_uint128(&mut c, &cks);
// Call all benchmarks for all types
run_benches!(
&mut c, &cks, FheUint2, FheUint4, FheUint6, FheUint8, FheUint10, FheUint12,
FheUint14, FheUint16, FheUint32, FheUint64, FheUint128
);
// KVStore Benches
if benched_device == tfhe::Device::Cpu {
@@ -481,5 +405,8 @@ fn main() {
}
}
#[cfg(not(feature = "hpu"))]
oprf_any_range2();
c.final_summary();
}

View File

@@ -0,0 +1,44 @@
use benchmark::params_aliases::BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
use criterion::{black_box, criterion_group, Criterion};
use std::num::NonZeroU64;
use tfhe::{set_server_key, ClientKey, ConfigBuilder, FheUint64, RangeForRandom, Seed, ServerKey};
pub fn oprf_any_range(c: &mut Criterion) {
let bench_name = "hlapi::oprf_any_range";
let mut bench_group = c.benchmark_group(bench_name);
bench_group
.sample_size(15)
.measurement_time(std::time::Duration::from_secs(30));
let param = BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(param).build();
let cks = ClientKey::generate(config);
let sks = ServerKey::new(&cks);
rayon::broadcast(|_| set_server_key(sks.clone()));
set_server_key(sks);
for excluded_upper_bound in [3, 52] {
let range = RangeForRandom::new_from_excluded_upper_bound(
NonZeroU64::new(excluded_upper_bound).unwrap(),
);
let bench_id_oprf = format!("{bench_name}::bound_{excluded_upper_bound}");
bench_group.bench_function(&bench_id_oprf, |b| {
b.iter(|| {
_ = black_box(FheUint64::generate_oblivious_pseudo_random_custom_range(
Seed(0),
&range,
None,
));
})
});
}
bench_group.finish()
}
criterion_group!(oprf_any_range2, oprf_any_range);

View File

@@ -2809,6 +2809,7 @@ mod cuda {
criterion_group!(
default_cuda_dedup_ops,
cuda_add,
cuda_neg,
cuda_mul,
cuda_div_rem,
cuda_bitand,

View File

@@ -629,7 +629,9 @@ mod integer_params {
// operations.
#[cfg(feature = "hpu")]
let params = vec![BENCH_HPU_PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128.into()];
#[cfg(not(feature = "hpu"))]
#[cfg(feature = "gpu")]
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS_PBS.into()];
#[cfg(not(any(feature = "gpu", feature = "hpu")))]
let params = vec![BENCH_PARAM_MESSAGE_2_CARRY_2_KS32_PBS.into()];
let params_and_bit_sizes = iproduct!(params, env_config.bit_sizes());

View File

@@ -27,6 +27,7 @@ rand_distr = "0.4.3"
criterion = "0.5.1"
doc-comment = "0.3.3"
serde_json = "1.0.94"
num-bigint = "0.4.6"
# clap has to be pinned as its minimum supported rust version
# changes often between minor releases, which breaks our CI
clap = { version = "=4.5.30", features = ["derive"] }

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 181 KiB

View File

@@ -2,14 +2,30 @@
This document explains the mechanism and steps to generate an oblivious encrypted random value using only server keys.
The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server. The implementation is based on [this article](https://eprint.iacr.org/2024/665).
The goal is to give to the server the possibility to generate a random value, which will be obtained in an encrypted format and will remain unknown to the server.
This is possible through two methods on `FheUint` and `FheInt`:
The main method for this is `FheUint::generate_oblivious_pseudo_random_custom_range` which returns an integer in the given range.
Currently the range can only be in the form `[0, excluded_upper_bound[` with any `excluded_upper_bound` in `[1, 2^64[`
It follows a distribution close to the uniform.
This function guarantees the norm-1 distance (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) Q(ω)|)
between the actual distribution and the target uniform distribution will be below the `max_distance` argument (which must be in ]0, 1[).
The higher the distance, the more dissimilar the actual distribution is from the target uniform distribution.
The default value for `max_distance` is `2^-128` if `None` is provided.
Higher values allow better performance but must be considered carefully in the context of their target application as it may have serious unintended consequences.
If the range is a power of 2, the distribution is uniform (for any `max_distance`) and the cost is smaller.
For powers of 2 specifically there are two methods on `FheUint` and `FheInt` (based on [this article](https://eprint.iacr.org/2024/665)):
- `generate_oblivious_pseudo_random` which return an integer taken uniformly in the full integer range (`[0; 2^N[` for a `FheUintN` and `[-2^(N-1); 2^(N-1)[` for a `FheIntN`).
- `generate_oblivious_pseudo_random_bounded` which return an integer taken uniformly in `[0; 2^random_bits_count[`. For a `FheUintN`, we must have `random_bits_count <= N`. For a `FheIntN`, we must have `random_bits_count <= N - 1`.
Both methods functions take a seed `Seed` as input, which could be any `u128` value.
They both rely on the use of the usual server key.
These method functions take a seed `Seed` as input, which could be any `u128` value.
They rely on the use of the usual server key.
The output is reproducible, i.e., the function is deterministic from the inputs: assuming the same hardware, seed and server key, this function outputs the same random encrypted value.
@@ -18,7 +34,8 @@ Here is an example of the usage:
```rust
use tfhe::prelude::FheDecrypt;
use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, Seed};
use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, FheInt8, RangeForRandom, Seed};
use std::num::NonZeroU64;
pub fn main() {
let config = ConfigBuilder::default().build();
@@ -26,23 +43,30 @@ pub fn main() {
set_server_key(server_key);
let random_bits_count = 3;
let ct_res = FheUint8::generate_oblivious_pseudo_random(Seed(0));
let excluded_upper_bound = NonZeroU64::new(3).unwrap();
let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
// in [0, excluded_upper_bound[ = {0, 1, 2}
let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
let dec_result: u8 = ct_res.decrypt(&client_key);
let ct_res = FheUint8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
let random_bits_count = 3;
// in [0, 2^8[
let ct_res = FheUint8::generate_oblivious_pseudo_random(Seed(0));
let dec_result: u8 = ct_res.decrypt(&client_key);
// in [0, 2^random_bits_count[ = [0, 8[
let ct_res = FheUint8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
let dec_result: u8 = ct_res.decrypt(&client_key);
assert!(dec_result < (1 << random_bits_count));
// in [-2^7, 2^7[
let ct_res = FheInt8::generate_oblivious_pseudo_random(Seed(0));
let dec_result: i8 = ct_res.decrypt(&client_key);
// in [0, 2^random_bits_count[ = [0, 8[
let ct_res = FheInt8::generate_oblivious_pseudo_random_bounded(Seed(0), random_bits_count);
let dec_result: i8 = ct_res.decrypt(&client_key);
assert!(dec_result < (1 << random_bits_count));
}

View File

@@ -141,7 +141,7 @@ Some parameter sets lead to the FHE keys exceeding the 2GB memory limit of WASM,
### Setting up TFHE-rs JS on WASM API for Node.js programs.
To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://rustwasm.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):
To build the JS on WASM bindings for **TFHE-rs**, install [`wasm-pack`](https://drager.github.io/wasm-pack/) and the necessary [`rust toolchain`](https://rustup.rs/). Clone the **TFHE-rs** repository and build using the following commands (this will build using the default branch, you can check out a specific tag depending on your requirements):
```shell
$ git clone https://github.com/zama-ai/tfhe-rs.git
@@ -150,7 +150,7 @@ Cloning into 'tfhe-rs'...
Resolving deltas: 100% (3866/3866), done.
$ cd tfhe-rs
$ cd tfhe
$ rustup run wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
$ wasm-pack build --release --target=nodejs --features=boolean-client-js-wasm-api,shortint-client-js-wasm-api
[INFO]: Compiling to Wasm...
...
[INFO]: :-) Your wasm pkg is ready to publish at ...
@@ -164,7 +164,7 @@ After the build, a new directory **pkg** is available in the `tfhe` directory.
```shell
$ ls pkg
LICENSE index.html package.json tfhe.d.ts tfhe.js tfhe_bg.txt tfhe_bg.wasm tfhe_bg.wasm.d.ts
LICENSE README.md package.json tfhe.d.ts tfhe.js tfhe_bg.wasm tfhe_bg.wasm.d.ts
$
```

View File

@@ -19,11 +19,13 @@ The overall process to write an homomorphic program is the same for all types. T
This library has different modules, with different levels of abstraction.
There is the **core\_crypto** module, which is the lowest level API with the primitive functions and types of the TFHE scheme.
There is the [core\_crypto](../core-crypto-api/presentation.md) module, which is the lowest level API with the primitive functions and types of the TFHE scheme.
Above the core\_crypto module, there are the **Boolean**, **shortint**, and **integer** modules, which contain easy to use APIs enabling evaluation of Boolean, short integer, and integer circuits.
Above the core\_crypto module, there are the [Boolean](boolean/README.md), [shortint](shortint/README.md), and [integer](integer/README.md) modules, which contain easy to use APIs enabling evaluation of Boolean, short integer, and integer circuits.
Finally, there is the high-level module built on top of the Boolean, shortint, integer modules. This module is meant to abstract cryptographic complexities: no cryptographical knowledge is required to start developing an FHE application. Another benefit of the high-level module is the drastically simplified development process compared to lower level modules.
Finally, there is the high-level module built on top of the shortint and integer modules. This module is meant to abstract cryptographic complexities: no cryptographical knowledge is required to start developing an FHE application. Another benefit of the high-level module is the drastically simplified development process compared to lower level modules.
![API levels diagram](../../.gitbook/assets/api-levels.svg)
#### high-level API

View File

@@ -540,10 +540,12 @@ pub fn sup_diff(cumulative_bins: &[u64], theoretical_cdf: &[f64]) -> f64 {
.iter()
.copied()
.zip_eq(theoretical_cdf.iter().copied())
.map(|(x, theoretical_cdf)| {
.enumerate()
.map(|(i, (x, theoretical_cdf))| {
let empirical_cdf = x as f64 / number_of_samples as f64;
if theoretical_cdf == 1.0 {
if i == cumulative_bins.len() - 1 {
assert_eq!(theoretical_cdf, 1.0);
assert_eq!(empirical_cdf, 1.0);
}

View File

@@ -4,7 +4,9 @@ use crate::high_level_api::keys::InternalServerKey;
use crate::high_level_api::re_randomization::ReRandomizationMetadata;
#[cfg(feature = "gpu")]
use crate::integer::gpu::ciphertext::{CudaSignedRadixCiphertext, CudaUnsignedRadixCiphertext};
use crate::shortint::MessageModulus;
use crate::{FheInt, Seed};
use std::num::NonZeroU64;
impl<Id: FheUintId> FheUint<Id> {
/// Generates an encrypted unsigned integer
@@ -92,7 +94,7 @@ impl<Id: FheUintId> FheUint<Id> {
}
})
}
/// Generates an encrypted `num_block` blocks unsigned integer
/// Generates an encrypted unsigned integer
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
/// The encrypted value is oblivious to the server.
/// It can be useful to make server random generation deterministic.
@@ -150,6 +152,103 @@ impl<Id: FheUintId> FheUint<Id> {
}
})
}
/// Generates an encrypted unsigned integer
/// taken almost uniformly in the given range using the given seed.
/// Currently the range can only be in the form `[0, excluded_upper_bound[`
/// with any `excluded_upper_bound` in `[1, 2^64[`.
///
/// The encrypted value is oblivious to the server.
/// It can be useful to make server random generation deterministic.
///
/// This function guarantees the the norm-1 distance
/// (defined as ∆(P,Q) := 1/2 Sum[ω∈Ω] |P(ω) Q(ω)|)
/// between the actual distribution and the target uniform distribution
/// will be below the `max_distance` argument (which must be in ]0, 1[).
/// The higher the distance, the more dissimilar the actual distribution is
/// from the target uniform distribution.
///
/// The default value for `max_distance` is `2^-128` if `None` is provided.
///
/// Higher values allow better performance but must be considered carefully in the context of
/// their target application as it may have serious unintended consequences.
///
/// If the range is a power of 2, the distribution is uniform (for any `max_distance`) and
/// the cost is smaller.
///
/// ```rust
/// use std::num::NonZeroU64;
/// use tfhe::prelude::FheDecrypt;
/// use tfhe::{generate_keys, set_server_key, ConfigBuilder, FheUint8, RangeForRandom, Seed};
///
/// let config = ConfigBuilder::default().build();
/// let (client_key, server_key) = generate_keys(config);
///
/// set_server_key(server_key);
///
/// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
///
/// let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
///
/// let ct_res = FheUint8::generate_oblivious_pseudo_random_custom_range(Seed(0), &range, None);
///
/// let dec_result: u16 = ct_res.decrypt(&client_key);
/// assert!(dec_result < excluded_upper_bound.get() as u16);
/// ```
pub fn generate_oblivious_pseudo_random_custom_range(
seed: Seed,
range: &RangeForRandom,
max_distance: Option<f64>,
) -> Self {
let excluded_upper_bound = range.excluded_upper_bound;
if excluded_upper_bound.is_power_of_two() {
let random_bits_count = excluded_upper_bound.ilog2() as u64;
Self::generate_oblivious_pseudo_random_bounded(seed, random_bits_count)
} else {
let max_distance = max_distance.unwrap_or_else(|| 2_f64.powi(-128));
assert!(
0_f64 < max_distance && max_distance < 1_f64,
"max_distance (={max_distance}) should be in ]0, 1["
);
global_state::with_internal_keys(|key| match key {
InternalServerKey::Cpu(key) => {
let message_modulus = key.message_modulus();
let num_input_random_bits = num_input_random_bits_for_max_distance(
excluded_upper_bound,
max_distance,
message_modulus,
);
let num_blocks_output = Id::num_blocks(key.message_modulus()) as u64;
let ct = key
.pbs_key()
.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
excluded_upper_bound,
num_blocks_output,
);
Self::new(ct, key.tag.clone(), ReRandomizationMetadata::default())
}
#[cfg(feature = "gpu")]
InternalServerKey::Cuda(_cuda_key) => {
panic!("Gpu does not support this operation yet.")
}
#[cfg(feature = "hpu")]
InternalServerKey::Hpu(_device) => {
panic!("Hpu does not support this operation yet.")
}
})
}
}
#[cfg(feature = "gpu")]
/// Returns the amount of memory required to execute generate_oblivious_pseudo_random_bounded
///
@@ -273,7 +372,7 @@ impl<Id: FheIntId> FheInt<Id> {
}
})
}
/// Generates an encrypted `num_block` blocks signed integer
/// Generates an encrypted signed integer
/// taken uniformly in `[0, 2^random_bits_count[` using the given seed.
/// The encrypted value is oblivious to the server.
/// It can be useful to make server random generation deterministic.
@@ -367,10 +466,350 @@ impl<Id: FheIntId> FheInt<Id> {
}
}
pub struct RangeForRandom {
excluded_upper_bound: NonZeroU64,
}
impl RangeForRandom {
pub fn new_from_excluded_upper_bound(excluded_upper_bound: NonZeroU64) -> Self {
Self {
excluded_upper_bound,
}
}
}
fn num_input_random_bits_for_max_distance(
excluded_upper_bound: NonZeroU64,
max_distance: f64,
message_modulus: MessageModulus,
) -> u64 {
assert!(message_modulus.0.is_power_of_two());
let log_message_modulus = message_modulus.0.ilog2() as u64;
let mut random_block_count = 1;
let random_block_count = loop {
let random_bit_count = random_block_count * log_message_modulus;
let distance = distance(excluded_upper_bound.get(), random_bit_count);
if distance < max_distance {
break random_block_count;
}
random_block_count += 1;
};
random_block_count * log_message_modulus
}
fn distance(excluded_upper_bound: u64, random_bit_count: u64) -> f64 {
let remainder = mod_pow_2(random_bit_count, excluded_upper_bound);
remainder as f64 * (excluded_upper_bound - remainder) as f64
/ (2_f64.powi(random_bit_count as i32) * excluded_upper_bound as f64)
}
// Computes 2^exponent % modulus
fn mod_pow_2(exponent: u64, modulus: u64) -> u64 {
assert_ne!(modulus, 0);
if modulus == 1 {
return 0;
}
let mut result: u128 = 1;
let mut base: u128 = 2; // We are calculating 2^i
// We cast exponent to u128 to match the loop, though u64 is fine
let mut exp = exponent;
let mod_val = modulus as u128;
while exp > 0 {
// If exponent is odd, multiply result with base
if exp % 2 == 1 {
result = (result * base) % mod_val;
}
// Square the base
base = (base * base) % mod_val;
// Divide exponent by 2
exp /= 2;
}
result as u64
}
#[cfg(test)]
mod test {
use super::*;
use crate::integer::server_key::radix_parallel::tests_unsigned::test_oprf::{
oprf_density_function, p_value_upper_bound_oprf_almost_uniformity_from_values,
probability_density_function_from_density,
};
use crate::prelude::FheDecrypt;
use crate::shortint::oprf::test::test_uniformity;
use crate::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
use crate::{generate_keys, set_server_key, ClientKey, ConfigBuilder, FheUint8, Seed};
use num_bigint::BigUint;
use rand::{thread_rng, Rng};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
// Helper: The "Oracle" implementation using BigInt
// This is slow but mathematically guaranteed to be correct.
fn oracle_mod_pow_2(exponent: u64, modulus: u64) -> u64 {
assert_ne!(modulus, 0);
if modulus == 1 {
return 0;
}
let base = BigUint::from(2u32);
let exp = BigUint::from(exponent);
let modu = BigUint::from(modulus);
let res = base.modpow(&exp, &modu);
res.iter_u64_digits().next().unwrap_or(0)
}
#[test]
fn test_edge_cases() {
// 2^0 % 10 = 1
assert_eq!(mod_pow_2(0, 10), 1, "Failed exponent 0");
// 2^10 % 1 = 0
assert_eq!(mod_pow_2(10, 1), 0, "Failed modulus 1");
// 2^1 % 10 = 2
assert_eq!(mod_pow_2(1, 10), 2, "Failed exponent 1");
// 2^3 % 5 = 8 % 5 = 3
assert_eq!(mod_pow_2(3, 5), 3, "Failed small calc");
}
#[test]
fn test_boundaries_and_overflow() {
assert_eq!(mod_pow_2(2, u64::MAX), 4);
assert_eq!(mod_pow_2(u64::MAX, 3), 2);
assert_eq!(mod_pow_2(5, 32), 0);
}
#[test]
fn test_against_oracle() {
let mut rng = thread_rng();
for _ in 0..1_000_000 {
let exp: u64 = rng.gen();
let mod_val: u64 = rng.gen();
let mod_val = if mod_val == 0 { 1 } else { mod_val };
let expected = oracle_mod_pow_2(exp, mod_val);
let actual = mod_pow_2(exp, mod_val);
assert_eq!(
actual, expected,
"Mismatch! 2^{exp} % {mod_val} => Ours: {actual}, Oracle: {expected}",
);
}
}
#[test]
fn test_distance_with_uniform() {
for excluded_upper_bound in 1..20 {
for num_input_random_bits in 0..20 {
let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
let theoretical_pdf = probability_density_function_from_density(&density);
let p_uniform = 1. / excluded_upper_bound as f64;
let actual_distance: f64 = 1. / 2.
* theoretical_pdf
.iter()
.map(|p| (*p - p_uniform).abs())
.sum::<f64>();
let theoretical_distance = distance(excluded_upper_bound, num_input_random_bits);
assert!(
(theoretical_distance - actual_distance).abs()
<= theoretical_distance / 1_000_000.,
"{theoretical_distance} != {actual_distance}"
);
}
}
}
#[test]
fn test_uniformity_scalar_mul_shift() {
let max_distance = 2_f64.powi(-20);
let message_modulus = MessageModulus(4);
let excluded_upper_bound = 3;
let num_input_random_bits = num_input_random_bits_for_max_distance(
NonZeroU64::new(excluded_upper_bound).unwrap(),
max_distance,
message_modulus,
);
let sample_count: usize = 10_000_000;
let p_value_limit: f64 = 0.001;
// The distribution is not exactly uniform
// This check ensures than with the given low max_distance,
// the distribution is indistinguishable from the uniform with at the given sample count
test_uniformity(sample_count, p_value_limit, excluded_upper_bound, |_seed| {
oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits)
});
}
fn oprf_clear_equivalent(excluded_upper_bound: u64, num_input_random_bits: u64) -> u64 {
let random_input_upper_bound = 1 << num_input_random_bits;
let random_input = thread_rng().gen_range(0..random_input_upper_bound);
(random_input * excluded_upper_bound) >> num_input_random_bits
}
#[test]
fn test_uniformity_generate_oblivious_pseudo_random_custom_range() {
let base_sample_count: usize = 10_000;
let p_value_limit: f64 = 0.001;
let params = PARAM_MESSAGE_2_CARRY_2_KS32_PBS_TUNIFORM_2M128;
let config = ConfigBuilder::with_custom_parameters(params).build();
let (cks, sks) = generate_keys(config);
rayon::broadcast(|_| set_server_key(sks.clone()));
let message_modulus = params.message_modulus;
// [0.7, 0.1] for `max_distance` chosen to have `num_input_random_bits` be [2, 4]
// for any of the listed `excluded_upper_bound`
for (expected_num_input_random_bits, max_distance, excluded_upper_bounds) in
[(2, 0.7, [3, 5, 6, 7]), (4, 0.1, [3, 5, 6, 7])]
{
for excluded_upper_bound in excluded_upper_bounds {
let sample_count = base_sample_count * excluded_upper_bound as usize;
let excluded_upper_bound = NonZeroU64::new(excluded_upper_bound).unwrap();
let num_input_random_bits = num_input_random_bits_for_max_distance(
excluded_upper_bound,
max_distance,
message_modulus,
);
assert_eq!(num_input_random_bits, expected_num_input_random_bits);
test_uniformity_generate_oblivious_pseudo_random_custom_range2(
sample_count,
p_value_limit,
message_modulus,
&cks,
excluded_upper_bound,
max_distance,
);
}
}
}
fn test_uniformity_generate_oblivious_pseudo_random_custom_range2(
sample_count: usize,
p_value_limit: f64,
message_modulus: MessageModulus,
cks: &ClientKey,
excluded_upper_bound: NonZeroU64,
max_distance: f64,
) {
let num_input_random_bits = num_input_random_bits_for_max_distance(
excluded_upper_bound,
max_distance,
message_modulus,
);
let range = RangeForRandom::new_from_excluded_upper_bound(excluded_upper_bound);
let real_values: Vec<u64> = (0..sample_count)
.into_par_iter()
.map(|_| {
let img = FheUint8::generate_oblivious_pseudo_random_custom_range(
Seed(rand::thread_rng().gen::<u128>()),
&range,
Some(max_distance),
);
img.decrypt(cks)
})
.collect();
let excluded_upper_bound = excluded_upper_bound.get();
let uniform_values: Vec<u64> = (0..sample_count)
.into_par_iter()
.map(|_| thread_rng().gen_range(0..excluded_upper_bound))
.collect();
let clear_oprf_value_lower_num_input_random_bits = (0..sample_count)
.into_par_iter()
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits - 1))
.collect();
let clear_oprf_value_same_num_input_random_bits = (0..sample_count)
.into_par_iter()
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits))
.collect();
let clear_oprf_value_higher_num_input_random_bits = (0..sample_count)
.into_par_iter()
.map(|_| oprf_clear_equivalent(excluded_upper_bound, num_input_random_bits + 1))
.collect();
for (values, should_have_low_p_value) in [
(&real_values, false),
// to test that the same distribution passes
(&clear_oprf_value_same_num_input_random_bits, false),
// to test that other distribution don't pass
// (makes sure the test is statistically powerful)
(&uniform_values, true),
(&clear_oprf_value_lower_num_input_random_bits, true),
(&clear_oprf_value_higher_num_input_random_bits, true),
] {
let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
values,
num_input_random_bits,
excluded_upper_bound,
);
println!("p_value_upper_bound: {p_value_upper_bound}");
if should_have_low_p_value {
assert!(
p_value_upper_bound < p_value_limit,
"p_value_upper_bound (={p_value_upper_bound}) expected to be smaller than {p_value_limit}"
);
} else {
assert!(
p_value_limit < p_value_upper_bound ,
"p_value_upper_bound (={p_value_upper_bound}) expected to be bigger than {p_value_limit}"
);
}
}
}
}
#[cfg(test)]
#[cfg(feature = "gpu")]
#[allow(unused_imports)]
mod test {
mod test_gpu {
use crate::prelude::*;
use crate::{
generate_keys, set_server_key, ConfigBuilder, FheInt128, FheUint32, FheUint64, GpuIndex,

View File

@@ -48,6 +48,7 @@ macro_rules! export_concrete_array_types {
}
pub use crate::core_crypto::commons::math::random::{Seed, XofSeed};
pub use crate::high_level_api::integers::oprf::RangeForRandom;
pub use crate::integer::server_key::MatchValues;
use crate::{error, Error, Versionize};
use backward_compatibility::compressed_ciphertext_list::SquashedNoiseCiphertextStateVersions;

View File

@@ -1,4 +1,5 @@
use crate::core_crypto::gpu::entities::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::gpu::CudaStreams;
@@ -16,7 +17,8 @@ use crate::integer::gpu::ciphertext::CudaRadixCiphertext;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
use crate::integer::gpu::{
cuda_backend_compress, cuda_backend_decompress, cuda_backend_get_compression_size_on_gpu,
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, PBSType,
cuda_backend_get_decompression_size_on_gpu, cuda_memcpy_async_gpu_to_gpu, extract_glwe_async,
PBSType,
};
use crate::prelude::CastInto;
use crate::shortint::ciphertext::{
@@ -197,6 +199,30 @@ impl<T: UnsignedInteger> CudaPackedGlweCiphertextList<T> {
meta: self.meta,
}
}
pub fn extract_glwe(
&self,
glwe_index: usize,
streams: &CudaStreams,
) -> CudaGlweCiphertextList<T> {
let meta = self
.meta
.as_ref()
.expect("CudaPackedGlweCiphertextList meta must be set to extract GLWE");
let mut output_cuda_glwe_list = CudaGlweCiphertextList::new(
meta.glwe_dimension,
meta.polynomial_size,
GlweCiphertextCount(1),
meta.ciphertext_modulus,
streams,
);
unsafe {
extract_glwe_async(streams, &mut output_cuda_glwe_list, self, glwe_index as u32);
}
streams.synchronize();
output_cuda_glwe_list
}
}
impl<T: UnsignedInteger> Clone for CudaPackedGlweCiphertextList<T> {

View File

@@ -7,6 +7,7 @@ pub mod server_key;
#[cfg(feature = "zk-pok")]
pub mod zk;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::lwe_compact_ciphertext_list::CudaLweCompactCiphertextList;
@@ -10423,3 +10424,44 @@ pub unsafe fn unchecked_small_scalar_mul_integer_async(
carry_modulus.0 as u32,
);
}
#[allow(clippy::too_many_arguments)]
/// # Safety
///
/// - [CudaStreams::synchronize] __must__ be called after this function as soon as synchronization
/// is required
pub unsafe fn extract_glwe_async<T: UnsignedInteger>(
streams: &CudaStreams,
glwe_array_out: &mut CudaGlweCiphertextList<T>,
glwe_list: &CudaPackedGlweCiphertextList<T>,
glwe_index: u32,
) {
assert_eq!(
streams.gpu_indexes[0],
glwe_array_out.0.d_vec.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
assert_eq!(
streams.gpu_indexes[0],
glwe_list.data.gpu_index(0),
"GPU error: all data should reside on the same GPU."
);
let packed_glwe_list_ffi = prepare_cuda_packed_glwe_ct_ffi(glwe_list);
if T::BITS == 128 {
cuda_integer_extract_glwe_128(
streams.ffi(),
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
&raw const packed_glwe_list_ffi,
glwe_index,
);
} else if T::BITS == 64 {
cuda_integer_extract_glwe_64(
streams.ffi(),
glwe_array_out.0.d_vec.as_mut_c_ptr(0),
&raw const packed_glwe_list_ffi,
glwe_index,
);
} else {
panic!("Unsupported integer size for CUDA GLWE extraction");
}
}

View File

@@ -0,0 +1,757 @@
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertext};
use crate::integer::compression_keys::CompressionPrivateKeys;
use crate::integer::gpu::list_compression::server_keys::CudaCompressionKey;
use crate::integer::gpu::server_key::radix::tests_noise_distribution::utils::noise_simulation::cuda_glwe_list_to_glwe_ciphertext;
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
use crate::integer::gpu::server_key::radix::CudaUnsignedRadixCiphertext;
use crate::integer::gpu::CudaServerKey;
use crate::integer::{ClientKey, CompressedServerKey, IntegerCiphertext};
use crate::shortint::ciphertext::{Ciphertext, Degree, NoiseLevel};
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
use crate::shortint::engine::ShortintEngine;
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
use crate::shortint::parameters::{CompressionParameters, MetaParameters, Variance};
use crate::shortint::server_key::tests::noise_distribution::br_dp_packingks_ms::br_dp_packing_ks_ms;
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
NoiseSimulationLwePackingKeyswitchKey, NoiseSimulationModulus,
};
use crate::shortint::server_key::tests::noise_distribution::utils::{
expected_pfail_for_precision, mean_and_variance_check, normality_check, pfail_check,
precision_with_padding, update_ap_params_msg_and_carry_moduli, DecryptionAndNoiseResult,
NoiseSample, PfailAndPrecision, PfailTestMeta, PfailTestResult,
};
use crate::shortint::server_key::tests::noise_distribution::{
should_run_short_pfail_tests_debug, should_use_single_key_debug,
};
use crate::shortint::{
AtomicPatternParameters, CarryModulus, MessageModulus, ShortintEncoding, ShortintParameterSet,
};
use crate::GpuIndex;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
pub const SAMPLES_PER_MSG_PACKING_KS_NOISE: usize = 1000;
fn sanity_check_encrypt_br_dp_packing_ks_ms(meta_params: MetaParameters) {
let (params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
// The multiplication done in the compression is made to move the message up at the top of the
// carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let (d_before_packing, _after_packing, d_after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
&cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
let compression_inputs: Vec<_> = d_before_packing
.into_iter()
.map(|(_input, pbs_result, _dp_result)| {
let pbs_result_list_cpu = pbs_result.as_lwe_64().to_lwe_ciphertext_list(&streams);
let pbs_result_cpu = LweCiphertext::from_container(
pbs_result_list_cpu.clone().into_container(),
pbs_result_list_cpu.ciphertext_modulus(),
);
let cpu_ct = Ciphertext::new(
pbs_result_cpu,
Degree::new(params.message_modulus().0 - 1),
NoiseLevel::NOMINAL,
params.message_modulus(),
params.carry_modulus(),
params.atomic_pattern(),
);
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cpu_ct]);
let d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
d_ct.ciphertext
})
.collect();
let gpu_compressed =
cuda_compression_key.compress_ciphertexts_into_list(&compression_inputs, &streams);
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
let extracted_glwe = GlweCiphertext::from_container(
extracted_list.clone().into_container(),
extracted_list.polynomial_size(),
extracted_list.ciphertext_modulus(),
);
let after_ms_list = d_after_ms.to_glwe_ciphertext_list(&streams);
let mut after_ms = GlweCiphertext::from_container(
after_ms_list.clone().into_container(),
after_ms_list.polynomial_size(),
after_ms_list.ciphertext_modulus(),
);
// Bodies that were not filled are discarded
after_ms.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
assert_eq!(after_ms.as_view(), extracted_glwe.as_view());
}
create_gpu_parameterized_test!(sanity_check_encrypt_br_dp_packing_ks_ms {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> (
Vec<(
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
)>,
Vec<DecryptionAndNoiseResult>,
Vec<DecryptionAndNoiseResult>,
) {
let mut engine = ShortintEngine::new();
let thread_cks: crate::integer::ClientKey;
let thread_cuda_sks: CudaServerKey;
let thread_compression_private_key;
let thread_cuda_compression_key;
let (cks, cuda_sks, compression_private_key, cuda_compression_key) =
if should_use_single_key_debug() {
(
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
)
} else {
let block_params: ShortintParameterSet = params.into();
thread_cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key =
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
thread_cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, streams);
thread_compression_private_key = thread_cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) = thread_cks
.new_compressed_compression_decompression_keys(&thread_compression_private_key);
thread_cuda_compression_key = compressed_compression_key.decompress_to_cuda(streams);
(
&thread_cks,
&thread_cuda_sks,
&thread_compression_private_key,
&thread_cuda_compression_key,
)
};
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key.encrypt_noiseless_pbs_input_dyn_lwe_with_engine(
br_input_modulus_log,
msg,
&mut engine,
)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, streams);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(streams, cuda_block_info))
.collect();
let dp_scalar = params.carry_modulus().0;
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let (d_before_packing, d_after_packing, d_after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
let compute_large_lwe_secret_key = cks.key.encryption_key();
let compression_glwe_secret_key = &compression_private_key.key.post_packing_ks_key;
let compute_encoding = cuda_sks.encoding();
let compression_encoding = ShortintEncoding {
carry_modulus: CarryModulus(1),
..compute_encoding
};
let after_packing = cuda_glwe_list_to_glwe_ciphertext(&d_after_packing, streams);
let after_ms = cuda_glwe_list_to_glwe_ciphertext(&d_after_ms, streams);
(
d_before_packing
.into_iter()
.map(|(d_input, d_pbs_result, d_dp_result)| {
let input = d_input.as_ct_64_cpu(streams);
let pbs_result = d_pbs_result.as_ct_64_cpu(streams);
let dp_result = d_dp_result.as_ct_64_cpu(streams);
(
match &cks.key.atomic_pattern {
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
DecryptionAndNoiseResult::new_from_lwe(
&input,
&standard_atomic_pattern_client_key.lwe_secret_key,
msg,
&compute_encoding,
)
}
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
panic!("KS32 Atomic Pattern not supported on GPU tests yet");
}
},
DecryptionAndNoiseResult::new_from_lwe(
&pbs_result,
&compute_large_lwe_secret_key,
msg,
&compute_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&dp_result,
&compute_large_lwe_secret_key,
msg,
&compression_encoding,
),
)
})
.collect(),
DecryptionAndNoiseResult::new_from_glwe(
&after_packing,
compression_glwe_secret_key,
compression_private_key.key.params.lwe_per_glwe(),
msg,
&compression_encoding,
),
DecryptionAndNoiseResult::new_from_glwe(
&after_ms,
compression_glwe_secret_key,
compression_private_key.key.params.lwe_per_glwe(),
msg,
&compression_encoding,
),
)
}
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> (
Vec<(NoiseSample, NoiseSample, NoiseSample)>,
Vec<NoiseSample>,
Vec<NoiseSample>,
) {
let (before_packing, after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params,
comp_params,
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
msg,
streams,
);
(
before_packing
.into_iter()
.map(|(input, after_pbs, after_dp)| {
(
input
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_pbs
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_dp
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
)
})
.collect(),
after_packing
.into_iter()
.map(|x| {
x.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
after_ms
.into_iter()
.map(|x| {
x.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
)
}
#[allow(clippy::type_complexity, clippy::too_many_arguments)]
fn encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
params: AtomicPatternParameters,
comp_params: CompressionParameters,
single_cks: &ClientKey,
single_cuda_sks: &CudaServerKey,
single_compression_private_key: &CompressionPrivateKeys,
single_cuda_compression_key: &CudaCompressionKey,
msg: u64,
streams: &CudaStreams,
) -> Vec<DecryptionAndNoiseResult> {
let (_before_packing, _after_packing, after_ms) = encrypt_br_dp_packing_ks_ms_inner_helper_gpu(
params,
comp_params,
single_cks,
single_cuda_sks,
single_compression_private_key,
single_cuda_compression_key,
msg,
streams,
);
after_ms
}
fn noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu(meta_params: MetaParameters) {
let (params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let compression_key = compressed_compression_key.decompress();
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
assert!(noise_simulation_packing_key.matches_actual_shortint_comp_key(&compression_key.key));
// The multiplication done in the compression is made to move the message up at the top of the
// carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
noise_simulation_bsk.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk.modulus(),
);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let storage_modulus_log = cuda_compression_key.storage_log_modulus;
let br_input_modulus_log = cuda_sks.br_input_modulus_log();
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
let noise_simulation = NoiseSimulationLwe::new(
cks.parameters().lwe_dimension(),
Variance(0.0),
NoiseSimulationModulus::from_ciphertext_modulus(cks.parameters().ciphertext_modulus()),
);
br_dp_packing_ks_ms(
vec![noise_simulation; lwe_per_glwe.0],
&noise_simulation_bsk,
&noise_simulation_accumulator,
dp_scalar,
&noise_simulation_packing_key,
storage_modulus_log,
&mut vec![(); lwe_per_glwe.0],
)
};
let input_zeros: Vec<_> = (0..lwe_per_glwe.0)
.map(|_| {
cks.key
.encrypt_noiseless_pbs_input_dyn_lwe(br_input_modulus_log, 0)
})
.collect();
let d_input_zeros: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaLweCiphertextList::from_lwe_ciphertext(&ct.as_lwe_64(), &streams);
CudaDynLwe::U64(d_ct_input)
})
.collect();
let id_lut = cuda_sks.generate_lookup_table(|x| x);
let d_accumulator = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut.acc, &streams);
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
// dimension checks
let (expected_glwe_size_out, expected_polynomial_size_out, expected_modulus_f64_out) = {
let (_before_packing_sim, _after_packing, after_ms) = br_dp_packing_ks_ms(
d_input_zeros,
&cuda_sks,
&d_accumulator,
dp_scalar,
&cuda_compression_key.packing_key_switching_key,
storage_modulus_log,
&mut cuda_side_resources,
);
(
after_ms.glwe_dimension().to_glwe_size(),
after_ms.polynomial_size(),
after_ms.ciphertext_modulus().raw_modulus_float(),
)
};
assert_eq!(after_ms_sim.glwe_size(), expected_glwe_size_out);
assert_eq!(after_ms_sim.polynomial_size(), expected_polynomial_size_out);
assert_eq!(after_ms_sim.modulus().as_f64(), expected_modulus_f64_out);
let cleartext_modulus = params.message_modulus().0 * params.carry_modulus().0;
let mut noise_samples_before_ms = vec![];
let mut noise_samples_after_ms = vec![];
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
for _ in 0..cleartext_modulus {
let (current_noise_samples_before_ms, current_noise_samples_after_ms): (Vec<_>, Vec<_>) =
(0..SAMPLES_PER_MSG_PACKING_KS_NOISE)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i % chunk_size];
let (_before_packing, after_packing, after_ms) =
encrypt_br_dp_packing_ks_ms_noise_helper_gpu(
params,
comp_params,
&cks,
&cuda_sks,
&private_compression_key,
&cuda_compression_key,
0,
local_stream,
);
(after_packing, after_ms)
})
.collect::<Vec<_>>()
})
.unzip();
noise_samples_before_ms.extend(current_noise_samples_before_ms);
noise_samples_after_ms.extend(current_noise_samples_after_ms);
}
let noise_samples_before_ms_flattened: Vec<_> = noise_samples_before_ms
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let noise_samples_after_ms_flattened: Vec<_> = noise_samples_after_ms
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let before_ms_normality =
normality_check(&noise_samples_before_ms_flattened, "before ms", 0.01);
let after_ms_is_ok = mean_and_variance_check(
&noise_samples_after_ms_flattened,
"after_ms",
0.0,
after_ms_sim.variance_per_occupied_slot(),
comp_params.packing_ks_key_noise_distribution(),
after_ms_sim
.glwe_dimension()
.to_equivalent_lwe_dimension(after_ms_sim.polynomial_size()),
after_ms_sim.modulus().as_f64(),
);
assert!(before_ms_normality.null_hypothesis_is_valid && after_ms_is_ok);
}
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_noise_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
fn noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu(meta_params: MetaParameters) {
let (pfail_test_meta, params, comp_params) = {
let (mut params, comp_params) = (
meta_params.compute_parameters,
meta_params.compression_parameters.unwrap(),
);
let original_message_modulus = params.message_modulus();
let original_carry_modulus = params.carry_modulus();
// For now only allow 2_2 parameters, and see later for heuristics to use
assert_eq!(original_message_modulus.0, 4);
assert_eq!(original_carry_modulus.0, 4);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_comp_parameters(params, comp_params);
// The multiplication done in the compression is made to move the message up at the top of
// the carry space, multiplying by the carry modulus achieves that
let dp_scalar = params.carry_modulus().0;
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk.output_glwe_size().to_glwe_dimension(),
noise_simulation_bsk.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk.modulus(),
);
let lwe_per_glwe = comp_params.lwe_per_glwe();
let storage_modulus_log = comp_params.storage_log_modulus();
let (_before_packing_sim, _after_packing_sim, after_ms_sim) = {
let noise_simulation = NoiseSimulationLwe::new(
params.lwe_dimension(),
Variance(0.0),
NoiseSimulationModulus::from_ciphertext_modulus(params.ciphertext_modulus()),
);
br_dp_packing_ks_ms(
vec![noise_simulation; lwe_per_glwe.0],
&noise_simulation_bsk,
&noise_simulation_accumulator,
dp_scalar,
&noise_simulation_packing_key,
storage_modulus_log,
&mut vec![(); lwe_per_glwe.0],
)
};
let expected_variance_after_storage = after_ms_sim.variance_per_occupied_slot();
let compression_carry_mod = CarryModulus(1);
let compression_message_mod = original_message_modulus;
let compression_precision_with_padding =
precision_with_padding(compression_message_mod, compression_carry_mod);
let expected_pfail_for_storage = expected_pfail_for_precision(
compression_precision_with_padding,
expected_variance_after_storage,
);
let original_pfail_and_precision = PfailAndPrecision::new(
expected_pfail_for_storage,
compression_message_mod,
compression_carry_mod,
);
// Here we update the message modulus only:
// - because the message modulus matches for the compression encoding and compute encoding
// - so that the carry modulus stays the same and we apply the same dot product as normal
// for 2_2
// - so that the effective encoding after the storage is the one we used to evaluate the
// pfail
let updated_message_mod = MessageModulus(1 << 6);
let updated_carry_mod = compression_carry_mod;
update_ap_params_msg_and_carry_moduli(&mut params, updated_message_mod, updated_carry_mod);
assert!(
(params.message_modulus().0 * params.carry_modulus().0).ilog2()
<= comp_params.storage_log_modulus().0 as u32,
"Compression storage modulus cannot store enough bits for pfail estimation"
);
let updated_precision_with_padding =
precision_with_padding(updated_message_mod, updated_carry_mod);
let new_expected_pfail_for_storage = expected_pfail_for_precision(
updated_precision_with_padding,
expected_variance_after_storage,
);
let new_expected_pfail_and_precision = PfailAndPrecision::new(
new_expected_pfail_for_storage,
updated_message_mod,
updated_carry_mod,
);
let pfail_test_meta = if should_run_short_pfail_tests_debug() {
// To have the same amount of keys generated as the case where a single run is a single
// sample
let expected_fails = 200 * lwe_per_glwe.0 as u32;
PfailTestMeta::new_with_desired_expected_fails(
original_pfail_and_precision,
new_expected_pfail_and_precision,
expected_fails,
)
} else {
// To guarantee 1_000_000 keysets are generated
let total_runs = 1_000_000 * lwe_per_glwe.0 as u32;
PfailTestMeta::new_with_total_runs(
original_pfail_and_precision,
new_expected_pfail_and_precision,
total_runs,
)
};
(pfail_test_meta, params, comp_params)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let private_compression_key = cks.new_compression_private_key(comp_params);
let (compressed_compression_key, _compressed_decompression_key) =
cks.new_compressed_compression_decompression_keys(&private_compression_key);
let cuda_compression_key = compressed_compression_key.decompress_to_cuda(&streams);
let lwe_per_glwe = cuda_compression_key.lwe_per_glwe;
let total_runs_for_expected_fails = pfail_test_meta
.total_runs_for_expected_fails()
.div_ceil(lwe_per_glwe.0.try_into().unwrap());
let chunk_size = 8;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
let measured_fails: f64 = (0..total_runs_for_expected_fails)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_streams = &vec_local_streams[*i as usize % chunk_size];
let after_ms_decryption_result = encrypt_br_dp_packing_ks_ms_pfail_helper_gpu(
params,
comp_params,
&cks,
&cuda_sks,
&private_compression_key,
&cuda_compression_key,
0,
local_streams,
);
after_ms_decryption_result
.into_iter()
.map(|result| result.failure_as_f64())
.sum::<f64>()
})
.collect::<Vec<_>>()
})
.sum();
let test_result = PfailTestResult { measured_fails };
pfail_check(&pfail_test_meta, test_result);
}
create_gpu_parameterized_test!(noise_check_encrypt_br_dp_packing_ks_ms_pfail_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});

View File

@@ -0,0 +1,869 @@
use super::utils::noise_simulation::{CudaDynLwe, CudaSideResources};
use crate::core_crypto::commons::noise_formulas::noise_simulation::{
NoiseSimulationLweFourier128Bsk, NoiseSimulationLwePackingKeyswitchKey,
};
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::CudaStreams;
use crate::core_crypto::prelude::{GlweCiphertext, LweCiphertextCount};
use crate::integer::gpu::CudaServerKey;
use crate::integer::noise_squashing::NoiseSquashingPrivateKey;
use crate::integer::CompressedServerKey;
use crate::core_crypto::commons::parameters::CiphertextModulusLog;
use crate::core_crypto::prelude::generate_programmable_bootstrap_glwe_lut;
use crate::integer::ciphertext::NoiseSquashingCompressionPrivateKey;
use crate::integer::gpu::list_compression::server_keys::CudaNoiseSquashingCompressionKey;
use crate::integer::gpu::server_key::radix::tests_unsigned::create_gpu_parameterized_test;
use crate::integer::gpu::server_key::radix::{CudaNoiseSquashingKey, CudaUnsignedRadixCiphertext};
use crate::integer::gpu::unchecked_small_scalar_mul_integer_async;
use crate::integer::IntegerCiphertext;
use crate::shortint::client_key::atomic_pattern::AtomicPatternClientKey;
use crate::shortint::parameters::noise_squashing::NoiseSquashingParameters;
use crate::shortint::parameters::test_params::TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128;
use crate::shortint::parameters::{
AtomicPatternParameters, MetaParameters, NoiseSquashingCompressionParameters, Variance,
};
use crate::shortint::server_key::tests::noise_distribution::dp_ks_pbs128_packingks::{
dp_ks_any_ms_standard_pbs128, dp_ks_any_ms_standard_pbs128_packing_ks,
};
use crate::shortint::server_key::tests::noise_distribution::should_use_single_key_debug;
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::{
NoiseSimulationGlwe, NoiseSimulationLwe, NoiseSimulationLweFourierBsk,
NoiseSimulationLweKeyswitchKey, NoiseSimulationModulusSwitchConfig,
};
use crate::shortint::server_key::tests::noise_distribution::utils::{
mean_and_variance_check, DecryptionAndNoiseResult, NoiseSample,
};
use crate::shortint::{PaddingBit, ShortintEncoding, ShortintParameterSet};
use crate::GpuIndex;
use rayon::prelude::*;
/// Test function to verify that the noise checking tools match the actual atomic patterns
/// implemented in shortint for GPU
fn sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu(meta_params: MetaParameters) {
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
meta_noise_squashing_params.compression_parameters.unwrap(),
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = atomic_params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let noise_squashing_compression_key = noise_squashing_private_key
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
let cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&noise_squashing_compression_key,
&streams,
);
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let max_scalar_mul = cuda_sks.max_noise_level.get();
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
noise_squashing_key.key.polynomial_size(),
noise_squashing_key.key.glwe_size(),
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(atomic_params.message_modulus().0 - 1),
message_modulus: atomic_params.message_modulus(),
carry_modulus: atomic_params.carry_modulus(),
atomic_pattern: atomic_params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let (_before_packing, d_after_packing) = dp_ks_any_ms_standard_pbs128_packing_ks(
input_zero_as_lwe,
max_scalar_mul,
&cuda_sks,
modulus_switch_config,
&cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
&cuda_noise_squashing_compression_key.packing_key_switching_key,
&mut cuda_side_resources,
);
let cuda_noise_squashed_cts: Vec<_> = input_zeros
.into_par_iter()
.map(|ct| {
let cloned_ct = ct;
let radix_ct = crate::integer::RadixCiphertext::from_blocks(vec![cloned_ct]);
let mut d_ct = CudaUnsignedRadixCiphertext::from_radix_ciphertext(&radix_ct, &streams);
unsafe {
unchecked_small_scalar_mul_integer_async(
&streams,
&mut d_ct.ciphertext,
max_scalar_mul,
atomic_params.message_modulus(),
atomic_params.carry_modulus(),
);
}
streams.synchronize();
cuda_noise_squashing_key.unchecked_squash_ciphertext_noise(
&d_ct.ciphertext,
&cuda_sks,
&streams,
)
})
.collect();
let gpu_compressed = cuda_noise_squashing_compression_key
.compress_noise_squashed_ciphertexts_into_list(&cuda_noise_squashed_cts, &streams);
let gpu_extracted = gpu_compressed.extract_glwe(0, &streams);
let extracted_list = gpu_extracted.to_glwe_ciphertext_list(&streams);
let extracted_glwe = GlweCiphertext::from_container(
extracted_list.clone().into_container(),
extracted_list.polynomial_size(),
extracted_list.ciphertext_modulus(),
);
let after_packing_list = d_after_packing.to_glwe_ciphertext_list(&streams);
let mut after_packing = GlweCiphertext::from_container(
after_packing_list.clone().into_container(),
after_packing_list.polynomial_size(),
after_packing_list.ciphertext_modulus(),
);
// Bodies that were not filled are discarded
after_packing.get_mut_body().as_mut()[lwe_per_glwe.0..].fill(0);
assert_eq!(after_packing.as_view(), extracted_glwe.as_view());
}
/// Test function to verify that the noise checking tools match the actual atomic patterns
/// implemented in shortint for GPU
fn sanity_check_encrypt_dp_ks_standard_pbs128_gpu(meta_params: MetaParameters) {
let (params, noise_squashing_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let max_scalar_mul = cuda_sks.max_noise_level.get();
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
noise_squashing_key.key.polynomial_size(),
noise_squashing_key.key.glwe_size(),
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, &streams);
let lwe_per_glwe = LweCiphertextCount(128);
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(0)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(&streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let res: Vec<_> = input_zero_as_lwe
.into_par_iter()
.zip(cuda_side_resources.par_iter_mut())
.map(|(input, side_resources)| {
let (input, after_dp, ks_result, drift_technique_result, ms_result, pbs_result) =
dp_ks_any_ms_standard_pbs128(
input,
max_scalar_mul,
&cuda_sks,
modulus_switch_config,
&cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
side_resources,
);
(
input,
after_dp,
ks_result,
drift_technique_result,
ms_result,
pbs_result,
)
})
.collect();
let input_zeros_non_pattern: Vec<_> = input_zeros
.iter()
.map(|ct| {
CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
&streams,
)
})
.collect();
let vector_non_pattern: Vec<_> = input_zeros_non_pattern
.into_par_iter()
.map(|mut d_ct_input2| {
unsafe {
unchecked_small_scalar_mul_integer_async(
&streams,
&mut d_ct_input2.ciphertext,
max_scalar_mul,
params.message_modulus(),
params.carry_modulus(),
);
}
streams.synchronize();
cuda_noise_squashing_key
.squash_radix_ciphertext_noise(&cuda_sks, &d_ct_input2.ciphertext, &streams)
.unwrap()
})
.collect();
let vector_pattern_cpu: Vec<_> = res
.into_iter()
.map(
|(_input, _after_dp, _ks_result, _drift_technique_result, _ms_result, pbs_result)| {
pbs_result.as_ct_128_cpu(&streams)
},
)
.collect();
let vector_non_pattern_cpu: Vec<_> = vector_non_pattern
.into_par_iter()
.map(|cuda_squashed_radix_ct| {
let squashed_noise_ct_cpu =
cuda_squashed_radix_ct.to_squashed_noise_radix_ciphertext(&streams);
squashed_noise_ct_cpu.packed_blocks()[0]
.lwe_ciphertext()
.clone()
})
.collect();
// Compare that all the results are equivalent
assert_eq!(vector_pattern_cpu, vector_non_pattern_cpu);
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
params: AtomicPatternParameters,
noise_squashing_params: NoiseSquashingParameters,
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
single_cks: &crate::integer::ClientKey,
single_cuda_sks: &CudaServerKey,
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
msg: u64,
scalar_for_multiplication: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (
Vec<(
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
DecryptionAndNoiseResult,
)>,
Vec<DecryptionAndNoiseResult>,
) {
let thread_cks: crate::integer::ClientKey;
let thread_cuda_sks: CudaServerKey;
let thread_noise_squashing_private_key: NoiseSquashingPrivateKey;
let thread_noise_squashing_key: crate::integer::noise_squashing::NoiseSquashingKey;
let thread_cuda_noise_squashing_key: CudaNoiseSquashingKey;
let thread_noise_squashing_compression_private_key: NoiseSquashingCompressionPrivateKey;
let thread_cuda_noise_squashing_compression_key: CudaNoiseSquashingCompressionKey;
let (
cks,
cuda_sks,
noise_squashing_private_key,
noise_squashing_key,
cuda_noise_squashing_key,
noise_squashing_compression_private_key,
cuda_noise_squashing_compression_key,
) = if should_use_single_key_debug() {
(
single_cks,
single_cuda_sks,
single_noise_squashing_private_key,
single_noise_squashing_key,
single_cuda_noise_squashing_key,
single_noise_squashing_compression_private_key,
single_cuda_noise_squashing_compression_key,
)
} else {
let block_params: ShortintParameterSet = params.into();
thread_cks = crate::integer::ClientKey::new(block_params);
let thread_compressed_server_key =
CompressedServerKey::new_radix_compressed_server_key(&thread_cks);
thread_cuda_sks =
CudaServerKey::decompress_from_cpu(&thread_compressed_server_key, streams);
thread_noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let thread_compressed_noise_squashing_compression_key =
thread_cks.new_compressed_noise_squashing_key(&thread_noise_squashing_private_key);
thread_noise_squashing_key = thread_compressed_noise_squashing_compression_key.decompress();
thread_cuda_noise_squashing_key =
thread_compressed_noise_squashing_compression_key.decompress_to_cuda(streams);
thread_noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let thread_noise_squashing_compression_key = thread_noise_squashing_private_key
.new_noise_squashing_compression_key(&thread_noise_squashing_compression_private_key);
thread_cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&thread_noise_squashing_compression_key,
streams,
);
(
&thread_cks,
&thread_cuda_sks,
&thread_noise_squashing_private_key,
&thread_noise_squashing_key,
&thread_cuda_noise_squashing_key,
&thread_noise_squashing_compression_private_key,
&thread_cuda_noise_squashing_compression_key,
)
};
let modulus_switch_config = cuda_noise_squashing_key.noise_simulation_modulus_switch_config();
let bsk_polynomial_size = noise_squashing_key.key.polynomial_size();
let bsk_glwe_size = noise_squashing_key.key.glwe_size();
let u128_encoding = ShortintEncoding {
ciphertext_modulus: noise_squashing_params.ciphertext_modulus(),
message_modulus: noise_squashing_params.message_modulus(),
carry_modulus: noise_squashing_params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let id_lut_cpu = generate_programmable_bootstrap_glwe_lut(
bsk_polynomial_size,
bsk_glwe_size,
u128_encoding
.cleartext_space_without_padding()
.try_into()
.unwrap(),
u128_encoding.ciphertext_modulus,
u128_encoding.delta(),
|x| x,
);
let id_lut_gpu = CudaGlweCiphertextList::from_glwe_ciphertext(&id_lut_cpu, streams);
let lwe_per_glwe = cuda_noise_squashing_compression_key.lwe_per_glwe;
let input_zeros: Vec<_> = (0..lwe_per_glwe.0).map(|_| cks.key.encrypt(msg)).collect();
let cuda_block_info = crate::integer::gpu::ciphertext::info::CudaBlockInfo {
degree: crate::shortint::ciphertext::Degree::new(params.message_modulus().0 - 1),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
atomic_pattern: params.atomic_pattern(),
noise_level: crate::shortint::parameters::NoiseLevel::NOMINAL,
};
let mut cuda_side_resources: Vec<CudaSideResources> = (0..input_zeros.len())
.map(|_| CudaSideResources::new(streams, cuda_block_info))
.collect();
let input_zero_as_lwe: Vec<_> = input_zeros
.iter()
.map(|ct| {
let d_ct_input = CudaUnsignedRadixCiphertext::from_radix_ciphertext(
&crate::integer::RadixCiphertext::from_blocks(vec![ct.clone()]),
streams,
);
CudaDynLwe::U64(d_ct_input.ciphertext.d_blocks)
})
.collect();
let (before_packing_gpu, after_packing_gpu) = dp_ks_any_ms_standard_pbs128_packing_ks(
input_zero_as_lwe,
scalar_for_multiplication,
cuda_sks,
modulus_switch_config,
cuda_noise_squashing_key,
br_input_modulus_log,
&id_lut_gpu,
&cuda_noise_squashing_compression_key.packing_key_switching_key,
&mut cuda_side_resources,
);
let before_packing: Vec<_> = before_packing_gpu
.into_iter()
.map(
|(
input_gpu,
after_dp_gpu,
after_ks_gpu,
after_drift_gpu,
after_ms_gpu,
after_pbs128_gpu,
)| {
match &cks.key.atomic_pattern {
AtomicPatternClientKey::Standard(standard_atomic_pattern_client_key) => {
let params = standard_atomic_pattern_client_key.parameters;
let u64_encoding = ShortintEncoding {
ciphertext_modulus: params.ciphertext_modulus(),
message_modulus: params.message_modulus(),
carry_modulus: params.carry_modulus(),
padding_bit: PaddingBit::Yes,
};
let large_lwe_secret_key =
standard_atomic_pattern_client_key.large_lwe_secret_key();
let small_lwe_secret_key =
standard_atomic_pattern_client_key.small_lwe_secret_key();
let input_ct = input_gpu.as_ct_64_cpu(streams);
let after_dp_ct = after_dp_gpu.as_ct_64_cpu(streams);
let after_ks_ct = after_ks_gpu.as_ct_64_cpu(streams);
let before_ms_gpu: &CudaDynLwe =
after_drift_gpu.as_ref().unwrap_or(&after_ks_gpu);
let before_ms_ct = before_ms_gpu.as_ct_64_cpu(streams);
let after_ms_ct = after_ms_gpu.as_ct_64_cpu(streams);
let after_pbs128_ct = after_pbs128_gpu.as_ct_128_cpu(streams);
(
DecryptionAndNoiseResult::new_from_lwe(
&input_ct,
&large_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_dp_ct,
&large_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ks_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&before_ms_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_ms_ct,
&small_lwe_secret_key,
msg,
&u64_encoding,
),
DecryptionAndNoiseResult::new_from_lwe(
&after_pbs128_ct,
&noise_squashing_private_key
.key
.post_noise_squashing_lwe_secret_key(),
msg.into(),
&u128_encoding,
),
)
}
AtomicPatternClientKey::KeySwitch32(_ks32_atomic_pattern_client_key) => {
panic!("KS32 atomic pattern not supported for GPU yet");
}
}
},
)
.collect();
let after_packing_list = after_packing_gpu.to_glwe_ciphertext_list(streams);
let after_packing = GlweCiphertext::from_container(
after_packing_list.clone().into_container(),
after_packing_list.polynomial_size(),
after_packing_list.ciphertext_modulus(),
);
let after_packing = DecryptionAndNoiseResult::new_from_glwe(
&after_packing,
noise_squashing_compression_private_key
.key
.post_packing_ks_key(),
lwe_per_glwe,
msg.into(),
&u128_encoding,
);
assert_eq!(after_packing.len(), lwe_per_glwe.0);
(before_packing, after_packing)
}
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
params: AtomicPatternParameters,
noise_squashing_params: NoiseSquashingParameters,
noise_squashing_compression_params: NoiseSquashingCompressionParameters,
single_cks: &crate::integer::ClientKey,
single_cuda_sks: &CudaServerKey,
single_noise_squashing_private_key: &NoiseSquashingPrivateKey,
single_noise_squashing_key: &crate::integer::noise_squashing::NoiseSquashingKey,
single_cuda_noise_squashing_key: &CudaNoiseSquashingKey,
single_noise_squashing_compression_private_key: &NoiseSquashingCompressionPrivateKey,
single_cuda_noise_squashing_compression_key: &CudaNoiseSquashingCompressionKey,
msg: u64,
scalar_for_multiplication: u64,
br_input_modulus_log: CiphertextModulusLog,
streams: &CudaStreams,
) -> (
Vec<(
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
NoiseSample,
)>,
Vec<NoiseSample>,
) {
let (before_compression, after_compression) =
encrypt_dp_ks_standard_pbs128_packing_ks_inner_helper_gpu(
params,
noise_squashing_params,
noise_squashing_compression_params,
single_cks,
single_cuda_sks,
single_noise_squashing_private_key,
single_noise_squashing_key,
single_cuda_noise_squashing_key,
single_noise_squashing_compression_private_key,
single_cuda_noise_squashing_compression_key,
msg,
scalar_for_multiplication,
br_input_modulus_log,
streams,
);
(
before_compression
.into_iter()
.map(
|(input, after_dp, after_ks, after_drift, after_ms, after_pbs)| {
(
input
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_dp
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ks
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_drift
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_ms
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
after_pbs
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed"),
)
},
)
.collect(),
after_compression
.into_iter()
.map(|after_compression| {
after_compression
.get_noise_if_decryption_was_correct()
.expect("Decryption Failed")
})
.collect(),
)
}
fn noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu(meta_params: MetaParameters) {
let (atomic_params, noise_squashing_params, noise_squashing_compression_params) = {
let meta_noise_squashing_params = meta_params.noise_squashing_parameters.unwrap();
(
meta_params.compute_parameters,
meta_noise_squashing_params.parameters,
meta_noise_squashing_params.compression_parameters.unwrap(),
)
};
let gpu_index = 0;
let streams = CudaStreams::new_single_gpu(GpuIndex::new(gpu_index));
let block_params: ShortintParameterSet = atomic_params.into();
let cks = crate::integer::ClientKey::new(block_params);
let compressed_server_key = CompressedServerKey::new_radix_compressed_server_key(&cks);
let cuda_sks = CudaServerKey::decompress_from_cpu(&compressed_server_key, &streams);
let noise_squashing_private_key = NoiseSquashingPrivateKey::new(noise_squashing_params);
let compressed_noise_squashing_compression_key =
cks.new_compressed_noise_squashing_key(&noise_squashing_private_key);
let noise_squashing_key = compressed_noise_squashing_compression_key.decompress();
let cuda_noise_squashing_key =
compressed_noise_squashing_compression_key.decompress_to_cuda(&streams);
let noise_squashing_compression_private_key =
NoiseSquashingCompressionPrivateKey::new(noise_squashing_compression_params);
let noise_squashing_compression_key = noise_squashing_private_key
.new_noise_squashing_compression_key(&noise_squashing_compression_private_key);
let cuda_noise_squashing_compression_key =
CudaNoiseSquashingCompressionKey::from_noise_squashing_compression_key(
&noise_squashing_compression_key,
&streams,
);
let noise_simulation_ksk =
NoiseSimulationLweKeyswitchKey::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_bsk =
NoiseSimulationLweFourierBsk::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_modulus_switch_config =
NoiseSimulationModulusSwitchConfig::new_from_atomic_pattern_parameters(atomic_params);
let noise_simulation_bsk128 =
NoiseSimulationLweFourier128Bsk::new_from_parameters(atomic_params, noise_squashing_params);
let noise_simulation_packing_key =
NoiseSimulationLwePackingKeyswitchKey::new_from_noise_squashing_parameters(
noise_squashing_params,
noise_squashing_compression_params,
);
assert!(noise_simulation_bsk.matches_actual_bsk_gpu(&cuda_sks.bootstrapping_key));
assert!(noise_simulation_bsk128
.matches_actual_shortint_noise_squashing_key(&noise_squashing_key.key));
assert!(noise_simulation_packing_key.matches_actual_pksk(
noise_squashing_compression_key
.key
.packing_key_switching_key()
));
let br_input_modulus_log = noise_squashing_key.key.br_input_modulus_log();
let max_scalar_mul = cuda_sks.max_noise_level.get();
let noise_simulation_accumulator = NoiseSimulationGlwe::new(
noise_simulation_bsk128
.output_glwe_size()
.to_glwe_dimension(),
noise_simulation_bsk128.output_polynomial_size(),
Variance(0.0),
noise_simulation_bsk128.modulus(),
);
let (_before_packing_sim, after_packing_sim) = {
let noise_simulation = NoiseSimulationLwe::encrypt(&cks.key, 0);
dp_ks_any_ms_standard_pbs128_packing_ks(
vec![noise_simulation; cuda_noise_squashing_compression_key.lwe_per_glwe.0],
max_scalar_mul,
&noise_simulation_ksk,
noise_simulation_modulus_switch_config.as_ref(),
&noise_simulation_bsk128,
br_input_modulus_log,
&noise_simulation_accumulator,
&noise_simulation_packing_key,
&mut vec![(); cuda_noise_squashing_compression_key.lwe_per_glwe.0],
)
};
let after_packing_sim = after_packing_sim.into_lwe();
// Check that the circuit is correct with respect to core implementation, i.e. does not crash on
// dimension checks
let (expected_lwe_dimension_out, expected_modulus_f64_out) = {
let pksk = noise_squashing_compression_key
.key
.packing_key_switching_key();
let out_glwe_dim = pksk.output_key_glwe_dimension();
let out_poly_size = pksk.output_key_polynomial_size();
(
out_glwe_dim.to_equivalent_lwe_dimension(out_poly_size),
pksk.ciphertext_modulus().raw_modulus_float(),
)
};
assert_eq!(
after_packing_sim.lwe_dimension(),
expected_lwe_dimension_out
);
assert_eq!(
after_packing_sim.modulus().as_f64(),
expected_modulus_f64_out
);
let cleartext_modulus = atomic_params.message_modulus().0 * atomic_params.carry_modulus().0;
let mut noise_samples_after_packing = vec![];
let sample_count_per_msg =
1000usize.div_ceil(cuda_noise_squashing_compression_key.lwe_per_glwe.0);
let chunk_size = 4;
let vec_local_streams = (0..chunk_size)
.map(|_| CudaStreams::new_single_gpu(GpuIndex::new(gpu_index)))
.collect::<Vec<_>>();
for _i in 0..cleartext_modulus {
let current_noise_samples_after_packing: Vec<_> = (0..sample_count_per_msg)
.collect::<Vec<_>>()
.chunks(chunk_size)
.flat_map(|chunk| {
chunk
.into_par_iter()
.map(|i| {
let local_stream = &vec_local_streams[*i % chunk_size];
let (_before_packing, after_packing) =
encrypt_dp_ks_standard_pbs128_packing_ks_noise_helper_gpu(
atomic_params,
noise_squashing_params,
noise_squashing_compression_params,
&cks,
&cuda_sks,
&noise_squashing_private_key,
&noise_squashing_key,
&cuda_noise_squashing_key,
&noise_squashing_compression_private_key,
&cuda_noise_squashing_compression_key,
0,
max_scalar_mul,
br_input_modulus_log,
local_stream,
);
after_packing
})
.collect::<Vec<_>>()
})
.collect();
noise_samples_after_packing.extend(current_noise_samples_after_packing);
}
let noise_samples_after_packing_flattened: Vec<_> = noise_samples_after_packing
.into_iter()
.flatten()
.map(|x| x.value)
.collect();
let after_packing_is_ok = mean_and_variance_check(
&noise_samples_after_packing_flattened,
"after_packing",
0.0,
after_packing_sim.variance(),
noise_squashing_compression_params.packing_ks_key_noise_distribution,
after_packing_sim.lwe_dimension(),
after_packing_sim.modulus().as_f64(),
);
assert!(after_packing_is_ok);
}
create_gpu_parameterized_test!(
noise_check_encrypt_dp_ks_standard_pbs128_packing_ks_noise_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
}
);
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_packing_ks_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});
create_gpu_parameterized_test!(sanity_check_encrypt_dp_ks_standard_pbs128_gpu {
TEST_META_PARAM_CPU_2_2_KS_PBS_PKE_TO_SMALL_ZKV2_TUNIFORM_2M128,
});

View File

@@ -1,3 +1,5 @@
pub mod br_dp_ks_ms;
pub mod br_dp_packingks_ms;
pub mod dp_ks_ms;
pub mod dp_ks_pbs_128_packingks;
pub mod utils;

View File

@@ -1,7 +1,7 @@
use crate::core_crypto::commons::noise_formulas::noise_simulation::traits::{
AllocateCenteredBinaryShiftedStandardModSwitchResult,
AllocateDriftTechniqueStandardModSwitchResult, AllocateLweBootstrapResult,
AllocateLweKeyswitchResult, AllocateStandardModSwitchResult,
AllocateLweKeyswitchResult, AllocateLwePackingKeyswitchResult, AllocateStandardModSwitchResult,
CenteredBinaryShiftedStandardModSwitch, DriftTechniqueStandardModSwitch,
LweClassicFftBootstrap, LweKeyswitch, ScalarMul, StandardModSwitch,
};
@@ -13,6 +13,7 @@ use crate::core_crypto::gpu::cuda_modulus_switch_ciphertext;
use crate::core_crypto::gpu::glwe_ciphertext_list::CudaGlweCiphertextList;
use crate::core_crypto::gpu::lwe_bootstrap_key::CudaModulusSwitchNoiseReductionConfiguration;
use crate::core_crypto::gpu::lwe_ciphertext_list::CudaLweCiphertextList;
use crate::core_crypto::gpu::lwe_packing_keyswitch_key::CudaLwePackingKeyswitchKey;
use crate::core_crypto::gpu::vec::CudaVec;
use crate::core_crypto::prelude::*;
use crate::integer::gpu::ciphertext::info::CudaBlockInfo;
@@ -25,7 +26,7 @@ use crate::integer::gpu::{
cuda_centered_modulus_switch_64, unchecked_small_scalar_mul_integer_async, CudaStreams,
};
use crate::shortint::server_key::tests::noise_distribution::utils::noise_simulation::NoiseSimulationModulusSwitchConfig;
use crate::shortint::server_key::tests::noise_distribution::utils::traits::LwePackingKeyswitch;
/// Side resources for CUDA operations in noise simulation
#[derive(Clone)]
pub struct CudaSideResources {
@@ -128,6 +129,19 @@ impl CudaDynLwe {
}
}
pub fn as_ct_128_cpu(&self, streams: &CudaStreams) -> LweCiphertext<Vec<u128>> {
match self {
Self::U32(_) => panic!("Tried getting a u32 CudaLweCiphertextList as u128."),
Self::U64(_) => panic!("Tried getting a u64 CudaLweCiphertextList as u128."),
Self::U128(_cuda_lwe) => {
let cpu_lwe_list = self.as_lwe_128().to_lwe_ciphertext_list(streams);
LweCiphertext::from_container(
cpu_lwe_list.clone().into_container(),
cpu_lwe_list.ciphertext_modulus(),
)
}
}
}
pub fn from_lwe_32(cuda_lwe: CudaLweCiphertextList<u32>) -> Self {
Self::U32(cuda_lwe)
}
@@ -141,6 +155,19 @@ impl CudaDynLwe {
}
}
/// Converts a CudaGlweCiphertextList<u64> to a GlweCiphertext<Vec<u64>>
pub fn cuda_glwe_list_to_glwe_ciphertext(
cuda_glwe_list: &CudaGlweCiphertextList<u64>,
streams: &CudaStreams,
) -> GlweCiphertext<Vec<u64>> {
let cpu_glwe_list = cuda_glwe_list.to_glwe_ciphertext_list(streams);
GlweCiphertext::from_container(
cpu_glwe_list.clone().into_container(),
cpu_glwe_list.polynomial_size(),
cpu_glwe_list.ciphertext_modulus(),
)
}
impl ScalarMul<u64> for CudaDynLwe {
type Output = Self;
type SideResources = CudaSideResources;
@@ -313,13 +340,14 @@ impl StandardModSwitch<Self> for CudaDynLwe {
panic!("U32 modulus switch not implemented for CudaDynLwe - only U64 is supported");
}
(Self::U64(input), Self::U64(output_cuda_lwe)) => {
let internal_output = input.duplicate(&side_resources.streams);
let mut internal_output = input.duplicate(&side_resources.streams);
cuda_modulus_switch_ciphertext(
&mut output_cuda_lwe.0.d_vec,
&mut internal_output.0.d_vec,
output_modulus_log.0 as u32,
&side_resources.streams,
);
let mut cpu_lwe = internal_output.to_lwe_ciphertext_list(&side_resources.streams);
let shift_to_map_to_native = u64::BITS - output_modulus_log.0 as u32;
for val in cpu_lwe.as_mut_view().into_container().iter_mut() {
*val <<= shift_to_map_to_native;
@@ -713,3 +741,193 @@ impl AllocateLweBootstrapResult for CudaGlweCiphertextList<u128> {
CudaDynLwe::U128(cuda_lwe)
}
}
// Implement LweClassicFft128Bootstrap for CudaNoiseSquashingKey using 128-bit PBS CUDA function
impl
crate::core_crypto::commons::noise_formulas::noise_simulation::traits::LweClassicFft128Bootstrap<
CudaDynLwe,
CudaDynLwe,
CudaGlweCiphertextList<u128>,
> for crate::integer::gpu::noise_squashing::keys::CudaNoiseSquashingKey
{
type SideResources = CudaSideResources;
fn lwe_classic_fft_128_pbs(
&self,
input: &CudaDynLwe,
output: &mut CudaDynLwe,
accumulator: &CudaGlweCiphertextList<u128>,
side_resources: &mut Self::SideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_programmable_bootstrapping::cuda_programmable_bootstrap_128_lwe_ciphertext_async;
use crate::integer::gpu::server_key::CudaBootstrappingKey;
match (input, output) {
(CudaDynLwe::U64(input_cuda_lwe), CudaDynLwe::U128(output_cuda_lwe)) => {
// Get the bootstrap key from self - it's already u128 type
let bsk = match &self.bootstrapping_key {
CudaBootstrappingKey::Classic(d_bsk) => d_bsk,
CudaBootstrappingKey::MultiBit(_) => {
panic!("MultiBit bootstrapping keys are not supported for 128-bit PBS");
}
};
unsafe {
cuda_programmable_bootstrap_128_lwe_ciphertext_async(
input_cuda_lwe,
output_cuda_lwe,
accumulator,
bsk,
&side_resources.streams,
);
side_resources.streams.synchronize();
}
}
_ => panic!("128-bit PBS expects U64 input and U128 output for CudaDynLwe"),
}
}
}
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u64> {
type Output = CudaGlweCiphertextList<u64>;
type SideResources = CudaSideResources;
fn allocate_lwe_packing_keyswitch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
let polynomial_size = self.output_polynomial_size();
let ciphertext_modulus = self.ciphertext_modulus();
CudaGlweCiphertextList::new(
glwe_dimension,
polynomial_size,
GlweCiphertextCount(1),
ciphertext_modulus,
&side_resources.streams,
)
}
}
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u64>>
for CudaLwePackingKeyswitchKey<u64>
{
type SideResources = CudaSideResources;
fn keyswitch_lwes_and_pack_in_glwe(
&self,
input: &[&CudaDynLwe],
output: &mut CudaGlweCiphertextList<u64>,
side_resources: &mut CudaSideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64;
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
input.iter().map(|ciphertext| ciphertext.as_lwe_64()),
&side_resources.streams,
);
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_64(
self,
&input_lwe_ciphertext_list,
output,
&side_resources.streams,
);
}
}
// Implement StandardModSwitch traits for CudaGlweCiphertextList<u64>
impl AllocateStandardModSwitchResult for CudaGlweCiphertextList<u64> {
type Output = Self;
type SideResources = CudaSideResources;
fn allocate_standard_mod_switch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
Self::new(
self.glwe_dimension(),
self.polynomial_size(),
self.glwe_ciphertext_count(),
self.ciphertext_modulus(),
&side_resources.streams,
)
}
}
impl StandardModSwitch<Self> for CudaGlweCiphertextList<u64> {
type SideResources = CudaSideResources;
fn standard_mod_switch(
&self,
storage_log_modulus: CiphertextModulusLog,
output: &mut Self,
side_resources: &mut CudaSideResources,
) {
let mut internal_output = self.duplicate(&side_resources.streams);
cuda_modulus_switch_ciphertext(
&mut internal_output.0.d_vec,
storage_log_modulus.0 as u32,
&side_resources.streams,
);
side_resources.streams.synchronize();
let mut cpu_glwe = internal_output.to_glwe_ciphertext_list(&side_resources.streams);
let shift_to_map_to_native = u64::BITS - storage_log_modulus.0 as u32;
for val in cpu_glwe.as_mut_view().into_container().iter_mut() {
*val <<= shift_to_map_to_native;
}
let d_after_ms = Self::from_glwe_ciphertext_list(&cpu_glwe, &side_resources.streams);
*output = d_after_ms;
}
}
impl AllocateLwePackingKeyswitchResult for CudaLwePackingKeyswitchKey<u128> {
type Output = CudaGlweCiphertextList<u128>;
type SideResources = CudaSideResources;
fn allocate_lwe_packing_keyswitch_result(
&self,
side_resources: &mut Self::SideResources,
) -> Self::Output {
let glwe_dimension = self.output_glwe_size().to_glwe_dimension();
let polynomial_size = self.output_polynomial_size();
let ciphertext_modulus = self.ciphertext_modulus();
CudaGlweCiphertextList::new(
glwe_dimension,
polynomial_size,
GlweCiphertextCount(1),
ciphertext_modulus,
&side_resources.streams,
)
}
}
impl LwePackingKeyswitch<[&CudaDynLwe], CudaGlweCiphertextList<u128>>
for CudaLwePackingKeyswitchKey<u128>
{
type SideResources = CudaSideResources;
fn keyswitch_lwes_and_pack_in_glwe(
&self,
input: &[&CudaDynLwe],
output: &mut CudaGlweCiphertextList<u128>,
side_resources: &mut CudaSideResources,
) {
use crate::core_crypto::gpu::algorithms::lwe_packing_keyswitch::cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128;
let input_lwe_ciphertext_list = CudaLweCiphertextList::from_vec_cuda_lwe_ciphertexts_list(
input.iter().map(|ciphertext| ciphertext.as_lwe_128()),
&side_resources.streams,
);
cuda_keyswitch_lwe_ciphertext_list_into_glwe_ciphertext_128(
self,
&input_lwe_ciphertext_list,
output,
&side_resources.streams,
);
}
}

View File

@@ -2,6 +2,7 @@ use super::{RadixCiphertext, ServerKey, SignedRadixCiphertext};
use crate::core_crypto::commons::generators::DeterministicSeeder;
use crate::core_crypto::prelude::DefaultRandomGenerator;
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use std::num::NonZeroU64;
pub use tfhe_csprng::seeders::{Seed, Seeder};
@@ -163,6 +164,7 @@ impl ServerKey {
/// as `num_input_random_bits`
///
/// ```rust
/// use std::num::NonZeroU64;
/// use tfhe::integer::gen_keys_radix;
/// use tfhe::shortint::parameters::PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128;
/// use tfhe::Seed;
@@ -173,7 +175,7 @@ impl ServerKey {
/// let (cks, sks) = gen_keys_radix(PARAM_MESSAGE_2_CARRY_2_KS_PBS_GAUSSIAN_2M128, size);
///
/// let num_input_random_bits = 5;
/// let excluded_upper_bound = 3;
/// let excluded_upper_bound = NonZeroU64::new(3).unwrap();
/// let num_blocks_output = 8;
///
/// let ct_res = sks.par_generate_oblivious_pseudo_random_unsigned_custom_range(
@@ -186,15 +188,17 @@ impl ServerKey {
/// // Decrypt:
/// let dec_result: u64 = cks.decrypt(&ct_res);
///
/// assert!(dec_result < excluded_upper_bound);
/// assert!(dec_result < excluded_upper_bound.get());
/// ```
pub fn par_generate_oblivious_pseudo_random_unsigned_custom_range(
&self,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
excluded_upper_bound: NonZeroU64,
num_blocks_output: u64,
) -> RadixCiphertext {
let excluded_upper_bound = excluded_upper_bound.get();
assert!(self.message_modulus().0.is_power_of_two());
let message_bits_count = self.message_modulus().0.ilog2() as u64;

View File

@@ -10,6 +10,7 @@ use crate::integer::{BooleanBlock, IntegerKeyKind, RadixCiphertext, RadixClientK
use crate::shortint::parameters::*;
use crate::{ClientKey, CompressedServerKey, MatchValues, Seed, Tag};
use std::cmp::{max, min};
use std::num::NonZeroU64;
use std::sync::Arc;
create_parameterized_test!(random_op_sequence {
@@ -498,7 +499,18 @@ where
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_integer_bounded,
);
let oprf_custom_range_executor = OpSequenceCpuFunctionExecutor::new(
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
&|sk: &ServerKey,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
num_blocks_output: u64| {
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
NonZeroU64::new(excluded_upper_bound).unwrap_or(NonZeroU64::new(1).unwrap()),
num_blocks_output,
)
},
);
let mut oprf_ops: Vec<(OprfExecutor, String)> = vec![(

View File

@@ -9,6 +9,7 @@ use crate::integer::{IntegerKeyKind, RadixCiphertext, RadixClientKey, ServerKey}
use crate::shortint::parameters::*;
use statrs::distribution::ContinuousCDF;
use std::collections::HashMap;
use std::num::NonZeroU64;
use std::sync::Arc;
use tfhe_csprng::seeders::Seed;
@@ -36,9 +37,19 @@ fn oprf_any_range_unsigned<P>(param: P)
where
P: Into<TestParameters>,
{
let executor = CpuFunctionExecutor::new(
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
);
let executor =
CpuFunctionExecutor::new(&|sk: &ServerKey,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
num_blocks_output: u64| {
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
NonZeroU64::new(excluded_upper_bound).unwrap(),
num_blocks_output,
)
});
oprf_any_range_test(param, executor);
}
@@ -46,9 +57,19 @@ fn oprf_almost_uniformity_unsigned<P>(param: P)
where
P: Into<TestParameters>,
{
let executor = CpuFunctionExecutor::new(
&ServerKey::par_generate_oblivious_pseudo_random_unsigned_custom_range,
);
let executor =
CpuFunctionExecutor::new(&|sk: &ServerKey,
seed: Seed,
num_input_random_bits: u64,
excluded_upper_bound: u64,
num_blocks_output: u64| {
sk.par_generate_oblivious_pseudo_random_unsigned_custom_range(
seed,
num_input_random_bits,
NonZeroU64::new(excluded_upper_bound).unwrap(),
num_blocks_output,
)
});
oprf_almost_uniformity_test(param, executor);
}
@@ -89,7 +110,7 @@ where
);
}
pub fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
pub(crate) fn oprf_uniformity_test<P, E>(param: P, mut executor: E)
where
P: Into<TestParameters>,
E: for<'a> FunctionExecutor<(Seed, u64, u64), RadixCiphertext>,
@@ -113,7 +134,7 @@ where
});
}
pub fn oprf_any_range_test<P, E>(param: P, mut executor: E)
pub(crate) fn oprf_any_range_test<P, E>(param: P, mut executor: E)
where
P: Into<TestParameters>,
E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
@@ -149,7 +170,7 @@ where
}
}
pub fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
pub(crate) fn oprf_almost_uniformity_test<P, E>(param: P, mut executor: E)
where
P: Into<TestParameters>,
E: for<'a> FunctionExecutor<(Seed, u64, u64, u64), RadixCiphertext>,
@@ -165,40 +186,70 @@ where
let num_input_random_bits: u64 = 4;
let num_blocks_output = 64;
let excluded_upper_bound = 10;
let random_input_upper_bound = 1 << num_input_random_bits;
let mut density = vec![0_usize; excluded_upper_bound as usize];
for i in 0..random_input_upper_bound {
let index = ((i * excluded_upper_bound) as f64 / random_input_upper_bound as f64) as usize;
density[index] += 1;
}
let theoretical_pdf: Vec<f64> = density
.iter()
.map(|count| *count as f64 / random_input_upper_bound as f64)
.collect();
let values: Vec<u64> = (0..sample_count)
.map(|seed| {
let img = executor.execute((
Seed(seed as u128),
num_input_random_bits,
excluded_upper_bound as u64,
excluded_upper_bound,
num_blocks_output,
));
cks.decrypt(&img)
})
.collect();
let p_value_upper_bound = p_value_upper_bound_oprf_almost_uniformity_from_values(
&values,
num_input_random_bits,
excluded_upper_bound,
);
assert!(p_value_limit < p_value_upper_bound);
}
pub(crate) fn p_value_upper_bound_oprf_almost_uniformity_from_values(
values: &[u64],
num_input_random_bits: u64,
excluded_upper_bound: u64,
) -> f64 {
let density = oprf_density_function(excluded_upper_bound, num_input_random_bits);
let theoretical_pdf = probability_density_function_from_density(&density);
let mut bins = vec![0_u64; excluded_upper_bound as usize];
for value in values {
for value in values.iter().copied() {
bins[value as usize] += 1;
}
let cumulative_bins = cumulate(&bins);
let theoretical_cdf = cumulate(&theoretical_pdf);
let sup_diff = sup_diff(&cumulative_bins, &theoretical_cdf);
let p_value_upper_bound = dkw_alpha_from_epsilon(sample_count as f64, sup_diff);
assert!(p_value_limit < p_value_upper_bound);
dkw_alpha_from_epsilon(values.len() as f64, sup_diff)
}
pub(crate) fn oprf_density_function(
excluded_upper_bound: u64,
num_input_random_bits: u64,
) -> Vec<usize> {
let random_input_upper_bound = 1 << num_input_random_bits;
let mut density = vec![0_usize; excluded_upper_bound as usize];
for i in 0..random_input_upper_bound {
let output = ((i * excluded_upper_bound) >> num_input_random_bits) as usize;
density[output] += 1;
}
density
}
pub(crate) fn probability_density_function_from_density(density: &[usize]) -> Vec<f64> {
let total_count: usize = density.iter().copied().sum();
density
.iter()
.map(|count| *count as f64 / total_count as f64)
.collect()
}

View File

@@ -475,8 +475,12 @@ pub(crate) mod test {
}
}
pub fn test_uniformity<F>(sample_count: usize, p_value_limit: f64, distinct_values: u64, f: F)
where
pub(crate) fn test_uniformity<F>(
sample_count: usize,
p_value_limit: f64,
distinct_values: u64,
f: F,
) where
F: Sync + Fn(usize) -> u64,
{
let p_value = uniformity_p_value(f, sample_count, distinct_values);
@@ -487,7 +491,7 @@ pub(crate) mod test {
);
}
fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
pub(crate) fn uniformity_p_value<F>(f: F, sample_count: usize, distinct_values: u64) -> f64
where
F: Sync + Fn(usize) -> u64,
{
@@ -495,8 +499,11 @@ pub(crate) mod test {
let mut values_count = HashMap::new();
for i in &values {
assert!(*i < distinct_values, "i {} dv{}", *i, distinct_values);
for i in values.iter().copied() {
assert!(
i < distinct_values,
"i (={i}) is supposed to be smaller than distinct_values (={distinct_values})",
);
*values_count.entry(i).or_insert(0) += 1;
}

View File

@@ -27,7 +27,7 @@ use crate::shortint::server_key::ServerKey;
use rayon::prelude::*;
#[allow(clippy::too_many_arguments)]
fn dp_ks_any_ms_standard_pbs128<
pub fn dp_ks_any_ms_standard_pbs128<
InputCt,
ScalarMulResult,
KsResult,
@@ -111,7 +111,7 @@ where
#[allow(clippy::too_many_arguments)]
#[allow(clippy::type_complexity)]
fn dp_ks_any_ms_standard_pbs128_packing_ks<
pub fn dp_ks_any_ms_standard_pbs128_packing_ks<
InputCt,
ScalarMulResult,
KsResult,

View File

@@ -727,8 +727,15 @@ async function compactPublicKeyZeroKnowledgeBench() {
serialized_size = list.safe_serialize(BigInt(10000000)).length;
}
const mean = timing / bench_loops;
let base_bench_str = "compact_fhe_uint_proven_encryption_";
let supportsThreads = await threads();
if (!supportsThreads) {
base_bench_str += "unsafe_coop_";
}
const common_bench_str =
"compact_fhe_uint_proven_encryption_" +
base_bench_str +
params.zk_scheme +
"_" +
bits_to_encrypt +